[ie/youtube] Player client maintenance (#10573)
[yt-dlp3.git] / yt_dlp / extractor / youtube.py
blob1a3e286c6861bafb21534dfb1ef32b7ec4f78d2b
1 import base64
2 import calendar
3 import collections
4 import copy
5 import datetime as dt
6 import enum
7 import functools
8 import hashlib
9 import itertools
10 import json
11 import math
12 import os.path
13 import random
14 import re
15 import shlex
16 import sys
17 import threading
18 import time
19 import traceback
20 import urllib.parse
22 from .common import InfoExtractor, SearchInfoExtractor
23 from .openload import PhantomJSwrapper
24 from ..jsinterp import JSInterpreter
25 from ..networking.exceptions import HTTPError, network_exceptions
26 from ..utils import (
27 NO_DEFAULT,
28 ExtractorError,
29 LazyList,
30 UserNotLive,
31 bug_reports_message,
32 classproperty,
33 clean_html,
34 datetime_from_str,
35 dict_get,
36 filesize_from_tbr,
37 filter_dict,
38 float_or_none,
39 format_field,
40 get_first,
41 int_or_none,
42 is_html,
43 join_nonempty,
44 js_to_json,
45 mimetype2ext,
46 orderedSet,
47 parse_codecs,
48 parse_count,
49 parse_duration,
50 parse_iso8601,
51 parse_qs,
52 qualities,
53 remove_start,
54 smuggle_url,
55 str_or_none,
56 str_to_int,
57 strftime_or_none,
58 traverse_obj,
59 try_call,
60 try_get,
61 unescapeHTML,
62 unified_strdate,
63 unified_timestamp,
64 unsmuggle_url,
65 update_url_query,
66 url_or_none,
67 urljoin,
68 variadic,
71 STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
72 # any clients starting with _ cannot be explicitly requested by the user
73 INNERTUBE_CLIENTS = {
74 'web': {
75 'INNERTUBE_CONTEXT': {
76 'client': {
77 'clientName': 'WEB',
78 'clientVersion': '2.20240726.00.00',
81 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
83 # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
84 'web_safari': {
85 'INNERTUBE_CONTEXT': {
86 'client': {
87 'clientName': 'WEB',
88 'clientVersion': '2.20240726.00.00',
89 'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)',
92 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
94 'web_embedded': {
95 'INNERTUBE_CONTEXT': {
96 'client': {
97 'clientName': 'WEB_EMBEDDED_PLAYER',
98 'clientVersion': '1.20240723.01.00',
101 'INNERTUBE_CONTEXT_CLIENT_NAME': 56,
103 'web_music': {
104 'INNERTUBE_HOST': 'music.youtube.com',
105 'INNERTUBE_CONTEXT': {
106 'client': {
107 'clientName': 'WEB_REMIX',
108 'clientVersion': '1.20240724.00.00',
111 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
113 'web_creator': {
114 'INNERTUBE_CONTEXT': {
115 'client': {
116 'clientName': 'WEB_CREATOR',
117 'clientVersion': '1.20240723.03.00',
120 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
122 'android': {
123 'INNERTUBE_CONTEXT': {
124 'client': {
125 'clientName': 'ANDROID',
126 'clientVersion': '19.29.37',
127 'androidSdkVersion': 30,
128 'userAgent': 'com.google.android.youtube/19.29.37 (Linux; U; Android 11) gzip',
129 'osName': 'Android',
130 'osVersion': '11',
133 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
134 'REQUIRE_JS_PLAYER': False,
136 'android_music': {
137 'INNERTUBE_CONTEXT': {
138 'client': {
139 'clientName': 'ANDROID_MUSIC',
140 'clientVersion': '7.11.50',
141 'androidSdkVersion': 30,
142 'userAgent': 'com.google.android.apps.youtube.music/7.11.50 (Linux; U; Android 11) gzip',
143 'osName': 'Android',
144 'osVersion': '11',
147 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
148 'REQUIRE_JS_PLAYER': False,
150 'android_creator': {
151 'INNERTUBE_CONTEXT': {
152 'client': {
153 'clientName': 'ANDROID_CREATOR',
154 'clientVersion': '24.30.100',
155 'androidSdkVersion': 30,
156 'userAgent': 'com.google.android.apps.youtube.creator/24.30.100 (Linux; U; Android 11) gzip',
157 'osName': 'Android',
158 'osVersion': '11',
161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
162 'REQUIRE_JS_PLAYER': False,
164 # YouTube Kids videos aren't returned on this client for some reason
165 'android_vr': {
166 'INNERTUBE_CONTEXT': {
167 'client': {
168 'clientName': 'ANDROID_VR',
169 'clientVersion': '1.57.29',
170 'deviceMake': 'Oculus',
171 'deviceModel': 'Quest 3',
172 'androidSdkVersion': 32,
173 'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.57.29 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip',
174 'osName': 'Android',
175 'osVersion': '12L',
178 'INNERTUBE_CONTEXT_CLIENT_NAME': 28,
179 'REQUIRE_JS_PLAYER': False,
181 'android_testsuite': {
182 'INNERTUBE_CONTEXT': {
183 'client': {
184 'clientName': 'ANDROID_TESTSUITE',
185 'clientVersion': '1.9',
186 'androidSdkVersion': 30,
187 'userAgent': 'com.google.android.youtube/1.9 (Linux; U; Android 11) gzip',
188 'osName': 'Android',
189 'osVersion': '11',
192 'INNERTUBE_CONTEXT_CLIENT_NAME': 30,
193 'REQUIRE_JS_PLAYER': False,
194 'PLAYER_PARAMS': '2AMB',
196 # This client only has legacy formats and storyboards
197 'android_producer': {
198 'INNERTUBE_CONTEXT': {
199 'client': {
200 'clientName': 'ANDROID_PRODUCER',
201 'clientVersion': '0.111.1',
202 'androidSdkVersion': 30,
203 'userAgent': 'com.google.android.apps.youtube.producer/0.111.1 (Linux; U; Android 11) gzip',
204 'osName': 'Android',
205 'osVersion': '11',
208 'INNERTUBE_CONTEXT_CLIENT_NAME': 91,
209 'REQUIRE_JS_PLAYER': False,
211 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
212 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
213 'ios': {
214 'INNERTUBE_CONTEXT': {
215 'client': {
216 'clientName': 'IOS',
217 'clientVersion': '19.29.1',
218 'deviceMake': 'Apple',
219 'deviceModel': 'iPhone16,2',
220 'userAgent': 'com.google.ios.youtube/19.29.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
221 'osName': 'iPhone',
222 'osVersion': '17.5.1.21F90',
225 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
226 'REQUIRE_JS_PLAYER': False,
228 'ios_music': {
229 'INNERTUBE_CONTEXT': {
230 'client': {
231 'clientName': 'IOS_MUSIC',
232 'clientVersion': '7.08.2',
233 'deviceMake': 'Apple',
234 'deviceModel': 'iPhone16,2',
235 'userAgent': 'com.google.ios.youtubemusic/7.08.2 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
236 'osName': 'iPhone',
237 'osVersion': '17.5.1.21F90',
240 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
241 'REQUIRE_JS_PLAYER': False,
243 'ios_creator': {
244 'INNERTUBE_CONTEXT': {
245 'client': {
246 'clientName': 'IOS_CREATOR',
247 'clientVersion': '24.30.100',
248 'deviceMake': 'Apple',
249 'deviceModel': 'iPhone16,2',
250 'userAgent': 'com.google.ios.ytcreator/24.30.100 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
251 'osName': 'iPhone',
252 'osVersion': '17.5.1.21F90',
255 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
256 'REQUIRE_JS_PLAYER': False,
258 # mweb has 'ultralow' formats
259 # See: https://github.com/yt-dlp/yt-dlp/pull/557
260 'mweb': {
261 'INNERTUBE_CONTEXT': {
262 'client': {
263 'clientName': 'MWEB',
264 'clientVersion': '2.20240726.01.00',
267 'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
269 'tv': {
270 'INNERTUBE_CONTEXT': {
271 'client': {
272 'clientName': 'TVHTML5',
273 'clientVersion': '7.20240724.13.00',
276 'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
278 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
279 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
280 'tv_embedded': {
281 'INNERTUBE_CONTEXT': {
282 'client': {
283 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
284 'clientVersion': '2.0',
287 'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
289 # This client has pre-merged video+audio 720p/1080p streams
290 'mediaconnect': {
291 'INNERTUBE_CONTEXT': {
292 'client': {
293 'clientName': 'MEDIA_CONNECT_FRONTEND',
294 'clientVersion': '0.1',
297 'INNERTUBE_CONTEXT_CLIENT_NAME': 95,
298 'REQUIRE_JS_PLAYER': False,
303 def _split_innertube_client(client_name):
304 variant, *base = client_name.rsplit('.', 1)
305 if base:
306 return variant, base[0], variant
307 base, *variant = client_name.split('_', 1)
308 return client_name, base, variant[0] if variant else None
311 def short_client_name(client_name):
312 main, *parts = _split_innertube_client(client_name)[0].split('_')
313 return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
316 def build_innertube_clients():
317 THIRD_PARTY = {
318 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
320 BASE_CLIENTS = ('ios', 'web', 'tv', 'mweb', 'android')
321 priority = qualities(BASE_CLIENTS[::-1])
323 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
324 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
325 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
326 ytcfg.setdefault('PLAYER_PARAMS', None)
327 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
329 _, base_client, variant = _split_innertube_client(client)
330 ytcfg['priority'] = 10 * priority(base_client)
332 if variant == 'embedded':
333 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
334 ytcfg['priority'] -= 2
335 elif variant:
336 ytcfg['priority'] -= 3
339 build_innertube_clients()
342 class BadgeType(enum.Enum):
343 AVAILABILITY_UNLISTED = enum.auto()
344 AVAILABILITY_PRIVATE = enum.auto()
345 AVAILABILITY_PUBLIC = enum.auto()
346 AVAILABILITY_PREMIUM = enum.auto()
347 AVAILABILITY_SUBSCRIPTION = enum.auto()
348 LIVE_NOW = enum.auto()
349 VERIFIED = enum.auto()
352 class YoutubeBaseInfoExtractor(InfoExtractor):
353 """Provide base functions for Youtube extractors"""
355 _RESERVED_NAMES = (
356 r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
357 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
358 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
359 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
361 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
363 # _NETRC_MACHINE = 'youtube'
365 # If True it will raise an error if no login info is provided
366 _LOGIN_REQUIRED = False
368 _INVIDIOUS_SITES = (
369 # invidious-redirect websites
370 r'(?:www\.)?redirect\.invidious\.io',
371 r'(?:(?:www|dev)\.)?invidio\.us',
372 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
373 r'(?:www\.)?invidious\.pussthecat\.org',
374 r'(?:www\.)?invidious\.zee\.li',
375 r'(?:www\.)?invidious\.ethibox\.fr',
376 r'(?:www\.)?iv\.ggtyler\.dev',
377 r'(?:www\.)?inv\.vern\.i2p',
378 r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
379 r'(?:www\.)?inv\.riverside\.rocks',
380 r'(?:www\.)?invidious\.silur\.me',
381 r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
382 r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
383 r'(?:www\.)?invidious\.slipfox\.xyz',
384 r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
385 r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
386 r'(?:www\.)?invidious\.tiekoetter\.com',
387 r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
388 r'(?:www\.)?invidious\.nerdvpn\.de',
389 r'(?:www\.)?invidious\.weblibre\.org',
390 r'(?:www\.)?inv\.odyssey346\.dev',
391 r'(?:www\.)?invidious\.dhusch\.de',
392 r'(?:www\.)?iv\.melmac\.space',
393 r'(?:www\.)?watch\.thekitty\.zone',
394 r'(?:www\.)?invidious\.privacydev\.net',
395 r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
396 r'(?:www\.)?invidious\.drivet\.xyz',
397 r'(?:www\.)?vid\.priv\.au',
398 r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
399 r'(?:www\.)?inv\.vern\.cc',
400 r'(?:www\.)?invidious\.esmailelbob\.xyz',
401 r'(?:www\.)?invidious\.sethforprivacy\.com',
402 r'(?:www\.)?yt\.oelrichsgarcia\.de',
403 r'(?:www\.)?yt\.artemislena\.eu',
404 r'(?:www\.)?invidious\.flokinet\.to',
405 r'(?:www\.)?invidious\.baczek\.me',
406 r'(?:www\.)?y\.com\.sb',
407 r'(?:www\.)?invidious\.epicsite\.xyz',
408 r'(?:www\.)?invidious\.lidarshield\.cloud',
409 r'(?:www\.)?yt\.funami\.tech',
410 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
411 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
412 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
413 # youtube-dl invidious instances list
414 r'(?:(?:www|no)\.)?invidiou\.sh',
415 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
416 r'(?:www\.)?invidious\.kabi\.tk',
417 r'(?:www\.)?invidious\.mastodon\.host',
418 r'(?:www\.)?invidious\.zapashcanon\.fr',
419 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
420 r'(?:www\.)?invidious\.tinfoil-hat\.net',
421 r'(?:www\.)?invidious\.himiko\.cloud',
422 r'(?:www\.)?invidious\.reallyancient\.tech',
423 r'(?:www\.)?invidious\.tube',
424 r'(?:www\.)?invidiou\.site',
425 r'(?:www\.)?invidious\.site',
426 r'(?:www\.)?invidious\.xyz',
427 r'(?:www\.)?invidious\.nixnet\.xyz',
428 r'(?:www\.)?invidious\.048596\.xyz',
429 r'(?:www\.)?invidious\.drycat\.fr',
430 r'(?:www\.)?inv\.skyn3t\.in',
431 r'(?:www\.)?tube\.poal\.co',
432 r'(?:www\.)?tube\.connect\.cafe',
433 r'(?:www\.)?vid\.wxzm\.sx',
434 r'(?:www\.)?vid\.mint\.lgbt',
435 r'(?:www\.)?vid\.puffyan\.us',
436 r'(?:www\.)?yewtu\.be',
437 r'(?:www\.)?yt\.elukerio\.org',
438 r'(?:www\.)?yt\.lelux\.fi',
439 r'(?:www\.)?invidious\.ggc-project\.de',
440 r'(?:www\.)?yt\.maisputain\.ovh',
441 r'(?:www\.)?ytprivate\.com',
442 r'(?:www\.)?invidious\.13ad\.de',
443 r'(?:www\.)?invidious\.toot\.koeln',
444 r'(?:www\.)?invidious\.fdn\.fr',
445 r'(?:www\.)?watch\.nettohikari\.com',
446 r'(?:www\.)?invidious\.namazso\.eu',
447 r'(?:www\.)?invidious\.silkky\.cloud',
448 r'(?:www\.)?invidious\.exonip\.de',
449 r'(?:www\.)?invidious\.riverside\.rocks',
450 r'(?:www\.)?invidious\.blamefran\.net',
451 r'(?:www\.)?invidious\.moomoo\.de',
452 r'(?:www\.)?ytb\.trom\.tf',
453 r'(?:www\.)?yt\.cyberhost\.uk',
454 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
455 r'(?:www\.)?qklhadlycap4cnod\.onion',
456 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
457 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
458 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
459 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
460 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
461 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
462 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
463 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
464 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
465 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
466 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
467 r'(?:www\.)?piped\.kavin\.rocks',
468 r'(?:www\.)?piped\.tokhmi\.xyz',
469 r'(?:www\.)?piped\.syncpundit\.io',
470 r'(?:www\.)?piped\.mha\.fi',
471 r'(?:www\.)?watch\.whatever\.social',
472 r'(?:www\.)?piped\.garudalinux\.org',
473 r'(?:www\.)?piped\.rivo\.lol',
474 r'(?:www\.)?piped-libre\.kavin\.rocks',
475 r'(?:www\.)?yt\.jae\.fi',
476 r'(?:www\.)?piped\.mint\.lgbt',
477 r'(?:www\.)?il\.ax',
478 r'(?:www\.)?piped\.esmailelbob\.xyz',
479 r'(?:www\.)?piped\.projectsegfau\.lt',
480 r'(?:www\.)?piped\.privacydev\.net',
481 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
482 r'(?:www\.)?piped\.smnz\.de',
483 r'(?:www\.)?piped\.adminforge\.de',
484 r'(?:www\.)?watch\.whatevertinfoil\.de',
485 r'(?:www\.)?piped\.qdi\.fi',
486 r'(?:(?:www|cf)\.)?piped\.video',
487 r'(?:www\.)?piped\.aeong\.one',
488 r'(?:www\.)?piped\.moomoo\.me',
489 r'(?:www\.)?piped\.chauvet\.pro',
490 r'(?:www\.)?watch\.leptons\.xyz',
491 r'(?:www\.)?pd\.vern\.cc',
492 r'(?:www\.)?piped\.hostux\.net',
493 r'(?:www\.)?piped\.lunar\.icu',
494 # Hyperpipe instances from https://hyperpipe.codeberg.page/
495 r'(?:www\.)?hyperpipe\.surge\.sh',
496 r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
497 r'(?:www\.)?listen\.whatever\.social',
498 r'(?:www\.)?music\.adminforge\.de',
501 # extracted from account/account_menu ep
502 # XXX: These are the supported YouTube UI and API languages,
503 # which is slightly different from languages supported for translation in YouTube studio
504 _SUPPORTED_LANG_CODES = [
505 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
506 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
507 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
508 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
509 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
510 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko',
513 _IGNORED_WARNINGS = {
514 'Unavailable videos will be hidden during playback',
515 'Unavailable videos are hidden',
518 _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
519 _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
521 def ucid_or_none(self, ucid):
522 return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
524 def handle_or_none(self, handle):
525 return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
527 def handle_from_url(self, url):
528 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
529 url, 'channel handle', default=None)
531 def ucid_from_url(self, url):
532 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
533 url, 'channel id', default=None)
535 @functools.cached_property
536 def _preferred_lang(self):
538 Returns a language code supported by YouTube for the user preferred language.
539 Returns None if no preferred language set.
541 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
542 if not preferred_lang:
543 return
544 if preferred_lang not in self._SUPPORTED_LANG_CODES:
545 raise ExtractorError(
546 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
547 expected=True)
548 elif preferred_lang != 'en':
549 self.report_warning(
550 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
551 return preferred_lang
553 def _initialize_consent(self):
554 cookies = self._get_cookies('https://www.youtube.com/')
555 if cookies.get('__Secure-3PSID'):
556 return
557 socs = cookies.get('SOCS')
558 if socs and not socs.value.startswith('CAA'): # not consented
559 return
560 self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
562 def _initialize_pref(self):
563 cookies = self._get_cookies('https://www.youtube.com/')
564 pref_cookie = cookies.get('PREF')
565 pref = {}
566 if pref_cookie:
567 try:
568 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
569 except ValueError:
570 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
571 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
572 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
574 def _real_initialize(self):
575 self._initialize_pref()
576 self._initialize_consent()
577 self._check_login_required()
579 def _check_login_required(self):
580 if self._LOGIN_REQUIRED and not self._cookies_passed:
581 self.raise_login_required('Login details are needed to download this content', method='cookies')
583 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
584 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
586 def _get_default_ytcfg(self, client='web'):
587 return copy.deepcopy(INNERTUBE_CLIENTS[client])
589 def _get_innertube_host(self, client='web'):
590 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
592 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
593 # try_get but with fallback to default ytcfg client values when present
594 _func = lambda y: try_get(y, getter, expected_type)
595 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
597 def _extract_client_name(self, ytcfg, default_client='web'):
598 return self._ytcfg_get_safe(
599 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
600 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
602 def _extract_client_version(self, ytcfg, default_client='web'):
603 return self._ytcfg_get_safe(
604 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
605 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
607 def _select_api_hostname(self, req_api_hostname, default_client=None):
608 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
609 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
611 def _extract_context(self, ytcfg=None, default_client='web'):
612 context = get_first(
613 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
614 # Enforce language and tz for extraction
615 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
616 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
617 return context
619 _SAPISID = None
621 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
622 time_now = round(time.time())
623 if self._SAPISID is None:
624 yt_cookies = self._get_cookies('https://www.youtube.com')
625 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
626 # See: https://github.com/yt-dlp/yt-dlp/issues/393
627 sapisid_cookie = dict_get(
628 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
629 if sapisid_cookie and sapisid_cookie.value:
630 self._SAPISID = sapisid_cookie.value
631 self.write_debug('Extracted SAPISID cookie')
632 # SAPISID cookie is required if not already present
633 if not yt_cookies.get('SAPISID'):
634 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
635 self._set_cookie(
636 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
637 else:
638 self._SAPISID = False
639 if not self._SAPISID:
640 return None
641 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
642 sapisidhash = hashlib.sha1(
643 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
644 return f'SAPISIDHASH {time_now}_{sapisidhash}'
646 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
647 note='Downloading API JSON', errnote='Unable to download API page',
648 context=None, api_key=None, api_hostname=None, default_client='web'):
650 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
651 data.update(query)
652 real_headers = self.generate_api_headers(default_client=default_client)
653 real_headers.update({'content-type': 'application/json'})
654 if headers:
655 real_headers.update(headers)
656 return self._download_json(
657 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
658 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
659 data=json.dumps(data).encode('utf8'), headers=real_headers,
660 query=filter_dict({
661 'key': self._configuration_arg(
662 'innertube_key', [api_key], ie_key=YoutubeIE.ie_key(), casesense=True)[0],
663 'prettyPrint': 'false',
664 }, cndn=lambda _, v: v))
666 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
667 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
669 @staticmethod
670 def _extract_session_index(*data):
672 Index of current account in account list.
673 See: https://github.com/yt-dlp/yt-dlp/pull/519
675 for ytcfg in data:
676 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
677 if session_index is not None:
678 return session_index
680 # Deprecated?
681 def _extract_identity_token(self, ytcfg=None, webpage=None):
682 if ytcfg:
683 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
684 if token:
685 return token
686 if webpage:
687 return self._search_regex(
688 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
689 'identity token', default=None, fatal=False)
691 @staticmethod
692 def _extract_account_syncid(*args):
694 Extract syncId required to download private playlists of secondary channels
695 @params response and/or ytcfg
697 for data in args:
698 # ytcfg includes channel_syncid if on secondary channel
699 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
700 if delegated_sid:
701 return delegated_sid
702 sync_ids = (try_get(
703 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
704 lambda x: x['DATASYNC_ID']), str) or '').split('||')
705 if len(sync_ids) >= 2 and sync_ids[1]:
706 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
707 # and just "user_syncid||" for primary channel. We only want the channel_syncid
708 return sync_ids[0]
710 @staticmethod
711 def _extract_visitor_data(*args):
713 Extracts visitorData from an API response or ytcfg
714 Appears to be used to track session state
716 return get_first(
717 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
718 expected_type=str)
720 @functools.cached_property
721 def is_authenticated(self):
722 return bool(self._generate_sapisidhash_header())
724 def extract_ytcfg(self, video_id, webpage):
725 if not webpage:
726 return {}
727 return self._parse_json(
728 self._search_regex(
729 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
730 default='{}'), video_id, fatal=False) or {}
732 def generate_api_headers(
733 self, *, ytcfg=None, account_syncid=None, session_index=None,
734 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
736 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
737 headers = {
738 'X-YouTube-Client-Name': str(
739 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
740 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
741 'Origin': origin,
742 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
743 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
744 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
745 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
747 if session_index is None:
748 session_index = self._extract_session_index(ytcfg)
749 if account_syncid or session_index is not None:
750 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
752 auth = self._generate_sapisidhash_header(origin)
753 if auth is not None:
754 headers['Authorization'] = auth
755 headers['X-Origin'] = origin
756 return filter_dict(headers)
758 def _download_ytcfg(self, client, video_id):
759 url = {
760 'web': 'https://www.youtube.com',
761 'web_music': 'https://music.youtube.com',
762 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1',
763 }.get(client)
764 if not url:
765 return {}
766 webpage = self._download_webpage(
767 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
768 return self.extract_ytcfg(video_id, webpage) or {}
770 @staticmethod
771 def _build_api_continuation_query(continuation, ctp=None):
772 query = {
773 'continuation': continuation,
775 # TODO: Inconsistency with clickTrackingParams.
776 # Currently we have a fixed ctp contained within context (from ytcfg)
777 # and a ctp in root query for continuation.
778 if ctp:
779 query['clickTracking'] = {'clickTrackingParams': ctp}
780 return query
782 @classmethod
783 def _extract_next_continuation_data(cls, renderer):
784 next_continuation = try_get(
785 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
786 lambda x: x['continuation']['reloadContinuationData']), dict)
787 if not next_continuation:
788 return
789 continuation = next_continuation.get('continuation')
790 if not continuation:
791 return
792 ctp = next_continuation.get('clickTrackingParams')
793 return cls._build_api_continuation_query(continuation, ctp)
795 @classmethod
796 def _extract_continuation_ep_data(cls, continuation_ep: dict):
797 if isinstance(continuation_ep, dict):
798 continuation = try_get(
799 continuation_ep, lambda x: x['continuationCommand']['token'], str)
800 if not continuation:
801 return
802 ctp = continuation_ep.get('clickTrackingParams')
803 return cls._build_api_continuation_query(continuation, ctp)
805 @classmethod
806 def _extract_continuation(cls, renderer):
807 next_continuation = cls._extract_next_continuation_data(renderer)
808 if next_continuation:
809 return next_continuation
811 return traverse_obj(renderer, (
812 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
813 ('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
814 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
816 @classmethod
817 def _extract_alerts(cls, data):
818 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
819 if not isinstance(alert_dict, dict):
820 continue
821 for alert in alert_dict.values():
822 alert_type = alert.get('type')
823 if not alert_type:
824 continue
825 message = cls._get_text(alert, 'text')
826 if message:
827 yield alert_type, message
829 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
830 errors, warnings = [], []
831 for alert_type, alert_message in alerts:
832 if alert_type.lower() == 'error' and fatal:
833 errors.append([alert_type, alert_message])
834 elif alert_message not in self._IGNORED_WARNINGS:
835 warnings.append([alert_type, alert_message])
837 for alert_type, alert_message in (warnings + errors[:-1]):
838 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
839 if errors:
840 raise ExtractorError(f'YouTube said: {errors[-1][1]}', expected=expected)
842 def _extract_and_report_alerts(self, data, *args, **kwargs):
843 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
845 def _extract_badges(self, badge_list: list):
847 Extract known BadgeType's from a list of badge renderers.
848 @returns [{'type': BadgeType}]
850 icon_type_map = {
851 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
852 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
853 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
854 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
855 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
856 'CHECK': BadgeType.VERIFIED,
859 badge_style_map = {
860 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
861 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
862 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
863 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
864 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
867 label_map = {
868 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
869 'private': BadgeType.AVAILABILITY_PRIVATE,
870 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
871 'live': BadgeType.LIVE_NOW,
872 'premium': BadgeType.AVAILABILITY_PREMIUM,
873 'verified': BadgeType.VERIFIED,
874 'official artist channel': BadgeType.VERIFIED,
877 badges = []
878 for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
879 badge_type = (
880 icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
881 or badge_style_map.get(traverse_obj(badge, 'style'))
883 if badge_type:
884 badges.append({'type': badge_type})
885 continue
887 # fallback, won't work in some languages
888 label = traverse_obj(
889 badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
890 for match, label_badge_type in label_map.items():
891 if match in label.lower():
892 badges.append({'type': label_badge_type})
893 break
895 return badges
897 @staticmethod
898 def _has_badge(badges, badge_type):
899 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
901 @staticmethod
902 def _get_text(data, *path_list, max_runs=None):
903 for path in path_list or [None]:
904 if path is None:
905 obj = [data]
906 else:
907 obj = traverse_obj(data, path, default=[])
908 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
909 obj = [obj]
910 for item in obj:
911 text = try_get(item, lambda x: x['simpleText'], str)
912 if text:
913 return text
914 runs = try_get(item, lambda x: x['runs'], list) or []
915 if not runs and isinstance(item, list):
916 runs = item
918 runs = runs[:min(len(runs), max_runs or len(runs))]
919 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
920 if text:
921 return text
923 def _get_count(self, data, *path_list):
924 count_text = self._get_text(data, *path_list) or ''
925 count = parse_count(count_text)
926 if count is None:
927 count = str_to_int(
928 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
929 return count
931 @staticmethod
932 def _extract_thumbnails(data, *path_list, final_key='thumbnails'):
934 Extract thumbnails from thumbnails dict
935 @param path_list: path list to level that contains 'thumbnails' key
937 thumbnails = []
938 for path in path_list or [()]:
939 for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)):
940 thumbnail_url = url_or_none(thumbnail.get('url'))
941 if not thumbnail_url:
942 continue
943 # Sometimes youtube gives a wrong thumbnail URL. See:
944 # https://github.com/yt-dlp/yt-dlp/issues/233
945 # https://github.com/ytdl-org/youtube-dl/issues/28023
946 if 'maxresdefault' in thumbnail_url:
947 thumbnail_url = thumbnail_url.split('?')[0]
948 thumbnails.append({
949 'url': thumbnail_url,
950 'height': int_or_none(thumbnail.get('height')),
951 'width': int_or_none(thumbnail.get('width')),
953 return thumbnails
955 @staticmethod
956 def extract_relative_time(relative_time_text):
958 Extracts a relative time from string and converts to dt object
959 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
962 # XXX: this could be moved to a general function in utils/_utils.py
963 # The relative time text strings are roughly the same as what
964 # Javascript's Intl.RelativeTimeFormat function generates.
965 # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
966 mobj = re.search(
967 r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
968 relative_time_text)
969 if mobj:
970 start = mobj.group('start')
971 if start:
972 return datetime_from_str(start)
973 try:
974 return datetime_from_str('now-{}{}'.format(mobj.group('time'), mobj.group('unit')))
975 except ValueError:
976 return None
978 def _parse_time_text(self, text):
979 if not text:
980 return
981 dt_ = self.extract_relative_time(text)
982 timestamp = None
983 if isinstance(dt_, dt.datetime):
984 timestamp = calendar.timegm(dt_.timetuple())
986 if timestamp is None:
987 timestamp = (
988 unified_timestamp(text) or unified_timestamp(
989 self._search_regex(
990 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
991 text.lower(), 'time text', default=None)))
993 if text and timestamp is None and self._preferred_lang in (None, 'en'):
994 self.report_warning(
995 f'Cannot parse localized time text "{text}"', only_once=True)
996 return timestamp
998 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
999 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
1000 default_client='web'):
1001 raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
1002 # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
1003 icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
1004 icd_rm = next(icd_retries)
1005 main_retries = iter(self.RetryManager())
1006 main_rm = next(main_retries)
1007 # Manual retry loop for multiple RetryManagers
1008 # The proper RetryManager MUST be advanced after an error
1009 # and its result MUST be checked if the manager is non fatal
1010 while True:
1011 try:
1012 response = self._call_api(
1013 ep=ep, fatal=True, headers=headers,
1014 video_id=item_id, query=query, note=note,
1015 context=self._extract_context(ytcfg, default_client),
1016 api_hostname=api_hostname, default_client=default_client)
1017 except ExtractorError as e:
1018 if not isinstance(e.cause, network_exceptions):
1019 return self._error_or_warning(e, fatal=fatal)
1020 elif not isinstance(e.cause, HTTPError):
1021 main_rm.error = e
1022 next(main_retries)
1023 continue
1025 first_bytes = e.cause.response.read(512)
1026 if not is_html(first_bytes):
1027 yt_error = try_get(
1028 self._parse_json(
1029 self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
1030 lambda x: x['error']['message'], str)
1031 if yt_error:
1032 self._report_alerts([('ERROR', yt_error)], fatal=False)
1033 # Downloading page may result in intermittent 5xx HTTP error
1034 # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1035 # We also want to catch all other network exceptions since errors in later pages can be troublesome
1036 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
1037 if e.cause.status not in (403, 429):
1038 main_rm.error = e
1039 next(main_retries)
1040 continue
1041 return self._error_or_warning(e, fatal=fatal)
1043 try:
1044 self._extract_and_report_alerts(response, only_once=True)
1045 except ExtractorError as e:
1046 # YouTube's servers may return errors we want to retry on in a 200 OK response
1047 # See: https://github.com/yt-dlp/yt-dlp/issues/839
1048 if 'unknown error' in e.msg.lower():
1049 main_rm.error = e
1050 next(main_retries)
1051 continue
1052 return self._error_or_warning(e, fatal=fatal)
1053 # Youtube sometimes sends incomplete data
1054 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1055 if not traverse_obj(response, *variadic(check_get_keys)):
1056 icd_rm.error = ExtractorError('Incomplete data received', expected=True)
1057 should_retry = next(icd_retries, None)
1058 if not should_retry:
1059 return None
1060 continue
1062 return response
1064 @staticmethod
1065 def is_music_url(url):
1066 return re.match(r'(https?://)?music\.youtube\.com/', url) is not None
1068 def _extract_video(self, renderer):
1069 video_id = renderer.get('videoId')
1071 reel_header_renderer = traverse_obj(renderer, (
1072 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
1073 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
1075 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
1076 description = self._get_text(renderer, 'descriptionSnippet')
1078 duration = int_or_none(renderer.get('lengthSeconds'))
1079 if duration is None:
1080 duration = parse_duration(self._get_text(
1081 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1082 if duration is None:
1083 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1084 duration = parse_duration(self._search_regex(
1085 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
1086 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
1087 video_id, default=None, group='duration'))
1089 channel_id = traverse_obj(
1090 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
1091 expected_type=str, get_all=False)
1092 if not channel_id:
1093 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
1095 channel_id = self.ucid_or_none(channel_id)
1097 overlay_style = traverse_obj(
1098 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
1099 get_all=False, expected_type=str)
1100 badges = self._extract_badges(traverse_obj(renderer, 'badges'))
1101 owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
1102 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
1103 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
1104 expected_type=str)) or ''
1105 url = f'https://www.youtube.com/watch?v={video_id}'
1106 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
1107 url = f'https://www.youtube.com/shorts/{video_id}'
1109 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
1110 or self._get_text(reel_header_renderer, 'timestampText') or '')
1111 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
1113 live_status = (
1114 'is_upcoming' if scheduled_timestamp is not None
1115 else 'was_live' if 'streamed' in time_text.lower()
1116 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
1117 else None)
1119 # videoInfo is a string like '50K views • 10 years ago'.
1120 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
1121 view_count = (0 if 'no views' in view_count_text.lower()
1122 else self._get_count({'simpleText': view_count_text}))
1123 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
1125 channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
1126 or self._get_text(reel_header_renderer, 'channelTitleText'))
1128 channel_handle = traverse_obj(renderer, (
1129 'shortBylineText', 'runs', ..., 'navigationEndpoint',
1130 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
1131 expected_type=self.handle_from_url, get_all=False)
1132 return {
1133 '_type': 'url',
1134 'ie_key': YoutubeIE.ie_key(),
1135 'id': video_id,
1136 'url': url,
1137 'title': title,
1138 'description': description,
1139 'duration': duration,
1140 'channel_id': channel_id,
1141 'channel': channel,
1142 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
1143 'uploader': channel,
1144 'uploader_id': channel_handle,
1145 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
1146 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
1147 'timestamp': (self._parse_time_text(time_text)
1148 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
1149 else None),
1150 'release_timestamp': scheduled_timestamp,
1151 'availability':
1152 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
1153 else self._availability(
1154 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
1155 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
1156 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
1157 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
1158 view_count_field: view_count,
1159 'live_status': live_status,
1160 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None,
1164 class YoutubeIE(YoutubeBaseInfoExtractor):
1165 IE_DESC = 'YouTube'
1166 _VALID_URL = r'''(?x)^
1168 (?:https?://|//) # http(s):// or protocol-independent URL
1169 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1170 (?:www\.)?deturl\.com/www\.youtube\.com|
1171 (?:www\.)?pwnyoutube\.com|
1172 (?:www\.)?hooktube\.com|
1173 (?:www\.)?yourepeat\.com|
1174 tube\.majestyc\.net|
1175 {invidious}|
1176 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
1177 (?:.*?\#/)? # handle anchor (#/) redirect urls
1178 (?: # the various things that can precede the ID:
1179 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
1180 |(?: # or the v= param in all its forms
1181 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
1182 (?:\?|\#!?) # the params delimiter ? or # or #!
1183 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
1187 |(?:
1188 youtu\.be| # just youtu.be/xxxx
1189 vid\.plus| # or vid.plus/xxxx
1190 zwearz\.com/watch| # or zwearz.com/watch/xxxx
1191 {invidious}
1193 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
1195 )? # all until now is optional -> you can pass the naked ID
1196 (?P<id>[0-9A-Za-z_-]{{11}}) # here is it! the YouTube video ID
1197 (?(1).+)? # if we found the ID, everything can follow
1198 (?:\#|$)'''.format(
1199 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
1201 _EMBED_REGEX = [
1202 r'''(?x)
1204 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
1205 data-video-url=|
1206 <embed[^>]+?src=|
1207 embedSWF\(?:\s*|
1208 <object[^>]+data=|
1209 new\s+SWFObject\(
1211 (["\'])
1212 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1213 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1214 \1''',
1215 # https://wordpress.org/plugins/lazy-load-for-videos/
1216 r'''(?xs)
1217 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1218 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1220 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
1222 _PLAYER_INFO_RE = (
1223 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1224 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1225 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1227 _formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
1228 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1229 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1230 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1231 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1232 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1233 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1234 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1235 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1236 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1237 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1238 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1239 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1240 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1241 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1242 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1243 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1244 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1245 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1248 # 3D videos
1249 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1250 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1251 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1252 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1253 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1254 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1255 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1257 # Apple HTTP Live Streaming
1258 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1259 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1260 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1261 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1262 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1263 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1264 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1265 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1267 # DASH mp4 video
1268 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1269 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1270 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1271 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1272 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1273 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1274 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1275 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1276 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1277 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1278 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1279 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1281 # Dash mp4 audio
1282 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1283 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1284 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1285 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1286 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1287 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1288 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1290 # Dash webm
1291 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1292 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1293 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1294 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1295 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1296 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1297 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1298 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1299 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1300 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1301 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1302 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1303 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1304 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1305 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1306 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1307 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1308 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1309 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1310 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1311 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1312 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1314 # Dash webm audio
1315 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1316 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1318 # Dash webm audio with opus inside
1319 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1320 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1321 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1323 # RTMP (unnamed)
1324 '_rtmp': {'protocol': 'rtmp'},
1326 # av01 video only formats sometimes served with "unknown" codecs
1327 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1328 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1329 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1330 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1331 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1332 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1333 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1334 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1336 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1337 _POTOKEN_EXPERIMENTS = ('51217476', '51217102')
1338 _BROKEN_CLIENTS = {
1339 short_client_name(client): client
1340 for client in ('android', 'android_creator', 'android_music')
1343 _GEO_BYPASS = False
1345 IE_NAME = 'youtube'
1346 _TESTS = [
1348 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1349 'info_dict': {
1350 'id': 'BaW_jenozKc',
1351 'ext': 'mp4',
1352 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1353 'channel': 'Philipp Hagemeister',
1354 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1355 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1356 'upload_date': '20121002',
1357 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1358 'categories': ['Science & Technology'],
1359 'tags': ['youtube-dl'],
1360 'duration': 10,
1361 'view_count': int,
1362 'like_count': int,
1363 'availability': 'public',
1364 'playable_in_embed': True,
1365 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1366 'live_status': 'not_live',
1367 'age_limit': 0,
1368 'start_time': 1,
1369 'end_time': 9,
1370 'comment_count': int,
1371 'channel_follower_count': int,
1372 'uploader': 'Philipp Hagemeister',
1373 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1374 'uploader_id': '@PhilippHagemeister',
1375 'heatmap': 'count:100',
1376 'timestamp': 1349198244,
1380 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1381 'note': 'Embed-only video (#1746)',
1382 'info_dict': {
1383 'id': 'yZIXLfi8CZQ',
1384 'ext': 'mp4',
1385 'upload_date': '20120608',
1386 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1387 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1388 'age_limit': 18,
1390 'skip': 'Private video',
1393 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1394 'note': 'Use the first video ID in the URL',
1395 'info_dict': {
1396 'id': 'BaW_jenozKc',
1397 'ext': 'mp4',
1398 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1399 'channel': 'Philipp Hagemeister',
1400 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1401 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1402 'upload_date': '20121002',
1403 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1404 'categories': ['Science & Technology'],
1405 'tags': ['youtube-dl'],
1406 'duration': 10,
1407 'view_count': int,
1408 'like_count': int,
1409 'availability': 'public',
1410 'playable_in_embed': True,
1411 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1412 'live_status': 'not_live',
1413 'age_limit': 0,
1414 'comment_count': int,
1415 'channel_follower_count': int,
1416 'uploader': 'Philipp Hagemeister',
1417 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1418 'uploader_id': '@PhilippHagemeister',
1419 'heatmap': 'count:100',
1420 'timestamp': 1349198244,
1422 'params': {
1423 'skip_download': True,
1427 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1428 'note': '256k DASH audio (format 141) via DASH manifest',
1429 'info_dict': {
1430 'id': 'a9LDPn-MO4I',
1431 'ext': 'm4a',
1432 'upload_date': '20121002',
1433 'description': '',
1434 'title': 'UHDTV TEST 8K VIDEO.mp4',
1436 'params': {
1437 'youtube_include_dash_manifest': True,
1438 'format': '141',
1440 'skip': 'format 141 not served anymore',
1442 # DASH manifest with encrypted signature
1444 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1445 'info_dict': {
1446 'id': 'IB3lcPjvWLA',
1447 'ext': 'm4a',
1448 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1449 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1450 'duration': 244,
1451 'upload_date': '20131011',
1452 'abr': 129.495,
1453 'like_count': int,
1454 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1455 'playable_in_embed': True,
1456 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1457 'view_count': int,
1458 'track': 'The Spark',
1459 'live_status': 'not_live',
1460 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1461 'channel': 'Afrojack',
1462 'tags': 'count:19',
1463 'availability': 'public',
1464 'categories': ['Music'],
1465 'age_limit': 0,
1466 'alt_title': 'The Spark',
1467 'channel_follower_count': int,
1468 'uploader': 'Afrojack',
1469 'uploader_url': 'https://www.youtube.com/@Afrojack',
1470 'uploader_id': '@Afrojack',
1472 'params': {
1473 'youtube_include_dash_manifest': True,
1474 'format': '141/bestaudio[ext=m4a]',
1477 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1479 'note': 'Embed allowed age-gate video',
1480 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1481 'info_dict': {
1482 'id': 'HtVdAasjOgU',
1483 'ext': 'mp4',
1484 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1485 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1486 'duration': 142,
1487 'upload_date': '20140605',
1488 'age_limit': 18,
1489 'categories': ['Gaming'],
1490 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1491 'availability': 'needs_auth',
1492 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1493 'like_count': int,
1494 'channel': 'The Witcher',
1495 'live_status': 'not_live',
1496 'tags': 'count:17',
1497 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1498 'playable_in_embed': True,
1499 'view_count': int,
1500 'channel_follower_count': int,
1501 'uploader': 'The Witcher',
1502 'uploader_url': 'https://www.youtube.com/@thewitcher',
1503 'uploader_id': '@thewitcher',
1504 'comment_count': int,
1505 'channel_is_verified': True,
1506 'heatmap': 'count:100',
1507 'timestamp': 1401991663,
1511 'note': 'Age-gate video with embed allowed in public site',
1512 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1513 'info_dict': {
1514 'id': 'HsUATh_Nc2U',
1515 'ext': 'mp4',
1516 'title': 'Godzilla 2 (Official Video)',
1517 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1518 'upload_date': '20200408',
1519 'age_limit': 18,
1520 'availability': 'needs_auth',
1521 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1522 'channel': 'FlyingKitty',
1523 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1524 'view_count': int,
1525 'categories': ['Entertainment'],
1526 'live_status': 'not_live',
1527 'tags': ['Flyingkitty', 'godzilla 2'],
1528 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1529 'like_count': int,
1530 'duration': 177,
1531 'playable_in_embed': True,
1532 'channel_follower_count': int,
1533 'uploader': 'FlyingKitty',
1534 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
1535 'uploader_id': '@FlyingKitty900',
1536 'comment_count': int,
1537 'channel_is_verified': True,
1541 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1542 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1543 'info_dict': {
1544 'id': 'Tq92D6wQ1mg',
1545 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1546 'ext': 'mp4',
1547 'upload_date': '20191228',
1548 'description': 'md5:17eccca93a786d51bc67646756894066',
1549 'age_limit': 18,
1550 'like_count': int,
1551 'availability': 'needs_auth',
1552 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1553 'view_count': int,
1554 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1555 'channel': 'Projekt Melody',
1556 'live_status': 'not_live',
1557 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1558 'playable_in_embed': True,
1559 'categories': ['Entertainment'],
1560 'duration': 106,
1561 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1562 'comment_count': int,
1563 'channel_follower_count': int,
1564 'uploader': 'Projekt Melody',
1565 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
1566 'uploader_id': '@ProjektMelody',
1567 'timestamp': 1577508724,
1571 'note': 'Non-Agegated non-embeddable video',
1572 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1573 'info_dict': {
1574 'id': 'MeJVWBSsPAY',
1575 'ext': 'mp4',
1576 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1577 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1578 'upload_date': '20130730',
1579 'track': 'Such mich find mich',
1580 'age_limit': 0,
1581 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1582 'like_count': int,
1583 'playable_in_embed': False,
1584 'creator': 'OOMPH!',
1585 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1586 'view_count': int,
1587 'alt_title': 'Such mich find mich',
1588 'duration': 210,
1589 'channel': 'Herr Lurik',
1590 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1591 'categories': ['Music'],
1592 'availability': 'public',
1593 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1594 'live_status': 'not_live',
1595 'artist': 'OOMPH!',
1596 'channel_follower_count': int,
1597 'uploader': 'Herr Lurik',
1598 'uploader_url': 'https://www.youtube.com/@HerrLurik',
1599 'uploader_id': '@HerrLurik',
1603 'note': 'Non-bypassable age-gated video',
1604 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1605 'only_matching': True,
1607 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1608 # YouTube Red ad is not captured for creator
1610 'url': '__2ABJjxzNo',
1611 'info_dict': {
1612 'id': '__2ABJjxzNo',
1613 'ext': 'mp4',
1614 'duration': 266,
1615 'upload_date': '20100430',
1616 'creator': 'deadmau5',
1617 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1618 'title': 'Deadmau5 - Some Chords (HD)',
1619 'alt_title': 'Some Chords',
1620 'availability': 'public',
1621 'tags': 'count:14',
1622 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1623 'view_count': int,
1624 'live_status': 'not_live',
1625 'channel': 'deadmau5',
1626 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1627 'like_count': int,
1628 'track': 'Some Chords',
1629 'artist': 'deadmau5',
1630 'playable_in_embed': True,
1631 'age_limit': 0,
1632 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1633 'categories': ['Music'],
1634 'album': 'Some Chords',
1635 'channel_follower_count': int,
1636 'uploader': 'deadmau5',
1637 'uploader_url': 'https://www.youtube.com/@deadmau5',
1638 'uploader_id': '@deadmau5',
1640 'expected_warnings': [
1641 'DASH manifest missing',
1644 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1646 'url': 'lqQg6PlCWgI',
1647 'info_dict': {
1648 'id': 'lqQg6PlCWgI',
1649 'ext': 'mp4',
1650 'duration': 6085,
1651 'upload_date': '20150827',
1652 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1653 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1654 'like_count': int,
1655 'release_timestamp': 1343767800,
1656 'playable_in_embed': True,
1657 'categories': ['Sports'],
1658 'release_date': '20120731',
1659 'channel': 'Olympics',
1660 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1661 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1662 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1663 'age_limit': 0,
1664 'availability': 'public',
1665 'live_status': 'was_live',
1666 'view_count': int,
1667 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1668 'channel_follower_count': int,
1669 'uploader': 'Olympics',
1670 'uploader_url': 'https://www.youtube.com/@Olympics',
1671 'uploader_id': '@Olympics',
1672 'channel_is_verified': True,
1673 'timestamp': 1440707674,
1675 'params': {
1676 'skip_download': 'requires avconv',
1679 # Non-square pixels
1681 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1682 'info_dict': {
1683 'id': '_b-2C3KPAM0',
1684 'ext': 'mp4',
1685 'stretched_ratio': 16 / 9.,
1686 'duration': 85,
1687 'upload_date': '20110310',
1688 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1689 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1690 'playable_in_embed': True,
1691 'channel': '孫ᄋᄅ',
1692 'age_limit': 0,
1693 'tags': 'count:11',
1694 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1695 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1696 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1697 'view_count': int,
1698 'categories': ['People & Blogs'],
1699 'like_count': int,
1700 'live_status': 'not_live',
1701 'availability': 'unlisted',
1702 'comment_count': int,
1703 'channel_follower_count': int,
1704 'uploader': '孫ᄋᄅ',
1705 'uploader_url': 'https://www.youtube.com/@AllenMeow',
1706 'uploader_id': '@AllenMeow',
1707 'timestamp': 1299776999,
1710 # url_encoded_fmt_stream_map is empty string
1712 'url': 'qEJwOuvDf7I',
1713 'info_dict': {
1714 'id': 'qEJwOuvDf7I',
1715 'ext': 'webm',
1716 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1717 'description': '',
1718 'upload_date': '20150404',
1720 'params': {
1721 'skip_download': 'requires avconv',
1723 'skip': 'This live event has ended.',
1725 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1727 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1728 'info_dict': {
1729 'id': 'FIl7x6_3R5Y',
1730 'ext': 'webm',
1731 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1732 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1733 'duration': 220,
1734 'upload_date': '20150625',
1735 'formats': 'mincount:31',
1737 'skip': 'not actual anymore',
1739 # DASH manifest with segment_list
1741 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1742 'md5': '8ce563a1d667b599d21064e982ab9e31',
1743 'info_dict': {
1744 'id': 'CsmdDsKjzN8',
1745 'ext': 'mp4',
1746 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1747 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1748 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1750 'params': {
1751 'youtube_include_dash_manifest': True,
1752 'format': '135', # bestvideo
1754 'skip': 'This live event has ended.',
1757 # Multifeed videos (multiple cameras), URL can be of any Camera
1758 # TODO: fix multifeed titles
1759 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
1760 'info_dict': {
1761 'id': 'zaPI8MvL8pg',
1762 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1763 'description': 'md5:563ccbc698b39298481ca3c571169519',
1765 'playlist': [{
1766 'info_dict': {
1767 'id': 'j5yGuxZ8lLU',
1768 'ext': 'mp4',
1769 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
1770 'description': 'md5:563ccbc698b39298481ca3c571169519',
1771 'duration': 10120,
1772 'channel_follower_count': int,
1773 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1774 'availability': 'public',
1775 'playable_in_embed': True,
1776 'upload_date': '20131105',
1777 'categories': ['Gaming'],
1778 'live_status': 'was_live',
1779 'tags': 'count:24',
1780 'release_timestamp': 1383701910,
1781 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1782 'comment_count': int,
1783 'age_limit': 0,
1784 'like_count': int,
1785 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1786 'channel': 'WiiLikeToPlay',
1787 'view_count': int,
1788 'release_date': '20131106',
1789 'uploader': 'WiiLikeToPlay',
1790 'uploader_id': '@WLTP',
1791 'uploader_url': 'https://www.youtube.com/@WLTP',
1793 }, {
1794 'info_dict': {
1795 'id': 'zaPI8MvL8pg',
1796 'ext': 'mp4',
1797 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
1798 'availability': 'public',
1799 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1800 'channel': 'WiiLikeToPlay',
1801 'channel_follower_count': int,
1802 'description': 'md5:563ccbc698b39298481ca3c571169519',
1803 'duration': 10108,
1804 'age_limit': 0,
1805 'like_count': int,
1806 'tags': 'count:24',
1807 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1808 'release_timestamp': 1383701915,
1809 'comment_count': int,
1810 'upload_date': '20131105',
1811 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1812 'release_date': '20131106',
1813 'playable_in_embed': True,
1814 'live_status': 'was_live',
1815 'categories': ['Gaming'],
1816 'view_count': int,
1817 'uploader': 'WiiLikeToPlay',
1818 'uploader_id': '@WLTP',
1819 'uploader_url': 'https://www.youtube.com/@WLTP',
1821 }, {
1822 'info_dict': {
1823 'id': 'R7r3vfO7Hao',
1824 'ext': 'mp4',
1825 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1826 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1827 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1828 'like_count': int,
1829 'availability': 'public',
1830 'playable_in_embed': True,
1831 'upload_date': '20131105',
1832 'description': 'md5:563ccbc698b39298481ca3c571169519',
1833 'channel_follower_count': int,
1834 'tags': 'count:24',
1835 'release_date': '20131106',
1836 'comment_count': int,
1837 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1838 'channel': 'WiiLikeToPlay',
1839 'categories': ['Gaming'],
1840 'release_timestamp': 1383701914,
1841 'live_status': 'was_live',
1842 'age_limit': 0,
1843 'duration': 10128,
1844 'view_count': int,
1845 'uploader': 'WiiLikeToPlay',
1846 'uploader_id': '@WLTP',
1847 'uploader_url': 'https://www.youtube.com/@WLTP',
1850 'params': {'skip_download': True},
1851 'skip': 'Not multifeed anymore',
1854 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1855 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1856 'info_dict': {
1857 'id': 'gVfLd0zydlo',
1858 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1860 'playlist_count': 2,
1861 'skip': 'Not multifeed anymore',
1864 'url': 'https://vid.plus/FlRa-iH7PGw',
1865 'only_matching': True,
1868 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1869 'only_matching': True,
1872 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1873 # Also tests cut-off URL expansion in video description (see
1874 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1875 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1876 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1877 'info_dict': {
1878 'id': 'lsguqyKfVQg',
1879 'ext': 'mp4',
1880 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1881 'alt_title': 'Dark Walk',
1882 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1883 'duration': 133,
1884 'upload_date': '20151119',
1885 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1886 'track': 'Dark Walk',
1887 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1888 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1889 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1890 'categories': ['Film & Animation'],
1891 'view_count': int,
1892 'live_status': 'not_live',
1893 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1894 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1895 'tags': 'count:13',
1896 'availability': 'public',
1897 'channel': 'IronSoulElf',
1898 'playable_in_embed': True,
1899 'like_count': int,
1900 'age_limit': 0,
1901 'channel_follower_count': int,
1903 'params': {
1904 'skip_download': True,
1908 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1909 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1910 'only_matching': True,
1913 # Video with yt:stretch=17:0
1914 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1915 'info_dict': {
1916 'id': 'Q39EVAstoRM',
1917 'ext': 'mp4',
1918 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1919 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1920 'upload_date': '20151107',
1922 'params': {
1923 'skip_download': True,
1925 'skip': 'This video does not exist.',
1928 # Video with incomplete 'yt:stretch=16:'
1929 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1930 'only_matching': True,
1933 # Video licensed under Creative Commons
1934 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1935 'info_dict': {
1936 'id': 'M4gD1WSo5mA',
1937 'ext': 'mp4',
1938 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1939 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1940 'duration': 721,
1941 'upload_date': '20150128',
1942 'license': 'Creative Commons Attribution license (reuse allowed)',
1943 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1944 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1945 'like_count': int,
1946 'age_limit': 0,
1947 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1948 'channel': 'The Berkman Klein Center for Internet & Society',
1949 'availability': 'public',
1950 'view_count': int,
1951 'categories': ['Education'],
1952 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1953 'live_status': 'not_live',
1954 'playable_in_embed': True,
1955 'channel_follower_count': int,
1956 'chapters': list,
1957 'uploader': 'The Berkman Klein Center for Internet & Society',
1958 'uploader_id': '@BKCHarvard',
1959 'uploader_url': 'https://www.youtube.com/@BKCHarvard',
1960 'timestamp': 1422422076,
1962 'params': {
1963 'skip_download': True,
1967 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1968 'info_dict': {
1969 'id': 'eQcmzGIKrzg',
1970 'ext': 'mp4',
1971 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1972 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1973 'duration': 4060,
1974 'upload_date': '20151120',
1975 'license': 'Creative Commons Attribution license (reuse allowed)',
1976 'playable_in_embed': True,
1977 'tags': 'count:12',
1978 'like_count': int,
1979 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1980 'age_limit': 0,
1981 'availability': 'public',
1982 'categories': ['News & Politics'],
1983 'channel': 'Bernie Sanders',
1984 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1985 'view_count': int,
1986 'live_status': 'not_live',
1987 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1988 'comment_count': int,
1989 'channel_follower_count': int,
1990 'chapters': list,
1991 'uploader': 'Bernie Sanders',
1992 'uploader_url': 'https://www.youtube.com/@BernieSanders',
1993 'uploader_id': '@BernieSanders',
1994 'channel_is_verified': True,
1995 'heatmap': 'count:100',
1996 'timestamp': 1447987198,
1998 'params': {
1999 'skip_download': True,
2003 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
2004 'only_matching': True,
2007 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
2008 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
2009 'only_matching': True,
2012 # Rental video preview
2013 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
2014 'info_dict': {
2015 'id': 'uGpuVWrhIzE',
2016 'ext': 'mp4',
2017 'title': 'Piku - Trailer',
2018 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
2019 'upload_date': '20150811',
2020 'license': 'Standard YouTube License',
2022 'params': {
2023 'skip_download': True,
2025 'skip': 'This video is not available.',
2028 # YouTube Red video with episode data
2029 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
2030 'info_dict': {
2031 'id': 'iqKdEhx-dD4',
2032 'ext': 'mp4',
2033 'title': 'Isolation - Mind Field (Ep 1)',
2034 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
2035 'duration': 2085,
2036 'upload_date': '20170118',
2037 'series': 'Mind Field',
2038 'season_number': 1,
2039 'episode_number': 1,
2040 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
2041 'tags': 'count:12',
2042 'view_count': int,
2043 'availability': 'public',
2044 'age_limit': 0,
2045 'channel': 'Vsauce',
2046 'episode': 'Episode 1',
2047 'categories': ['Entertainment'],
2048 'season': 'Season 1',
2049 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
2050 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
2051 'like_count': int,
2052 'playable_in_embed': True,
2053 'live_status': 'not_live',
2054 'channel_follower_count': int,
2055 'uploader': 'Vsauce',
2056 'uploader_url': 'https://www.youtube.com/@Vsauce',
2057 'uploader_id': '@Vsauce',
2058 'comment_count': int,
2059 'channel_is_verified': True,
2060 'timestamp': 1484761047,
2062 'params': {
2063 'skip_download': True,
2065 'expected_warnings': [
2066 'Skipping DASH manifest',
2070 # The following content has been identified by the YouTube community
2071 # as inappropriate or offensive to some audiences.
2072 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
2073 'info_dict': {
2074 'id': '6SJNVb0GnPI',
2075 'ext': 'mp4',
2076 'title': 'Race Differences in Intelligence',
2077 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
2078 'duration': 965,
2079 'upload_date': '20140124',
2081 'params': {
2082 'skip_download': True,
2084 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
2087 # itag 212
2088 'url': '1t24XAntNCY',
2089 'only_matching': True,
2092 # geo restricted to JP
2093 'url': 'sJL6WA-aGkQ',
2094 'only_matching': True,
2097 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
2098 'only_matching': True,
2101 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
2102 'only_matching': True,
2105 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
2106 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
2107 'only_matching': True,
2110 # DRM protected
2111 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
2112 'only_matching': True,
2115 # Video with unsupported adaptive stream type formats
2116 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
2117 'info_dict': {
2118 'id': 'Z4Vy8R84T1U',
2119 'ext': 'mp4',
2120 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
2121 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2122 'duration': 433,
2123 'upload_date': '20130923',
2124 'formats': 'maxcount:10',
2126 'params': {
2127 'skip_download': True,
2128 'youtube_include_dash_manifest': False,
2130 'skip': 'not actual anymore',
2133 # Youtube Music Auto-generated description
2134 # TODO: fix metadata extraction
2135 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2136 'info_dict': {
2137 'id': 'MgNrAu2pzNs',
2138 'ext': 'mp4',
2139 'title': 'Voyeur Girl',
2140 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
2141 'upload_date': '20190312',
2142 'artists': ['Stephen'],
2143 'creators': ['Stephen'],
2144 'track': 'Voyeur Girl',
2145 'album': 'it\'s too much love to know my dear',
2146 'release_date': '20190313',
2147 'alt_title': 'Voyeur Girl',
2148 'view_count': int,
2149 'playable_in_embed': True,
2150 'like_count': int,
2151 'categories': ['Music'],
2152 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
2153 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
2154 'uploader': 'Stephen',
2155 'availability': 'public',
2156 'duration': 169,
2157 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
2158 'age_limit': 0,
2159 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
2160 'tags': 'count:11',
2161 'live_status': 'not_live',
2162 'channel_follower_count': int,
2164 'params': {
2165 'skip_download': True,
2169 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2170 'only_matching': True,
2173 # invalid -> valid video id redirection
2174 'url': 'DJztXj2GPfl',
2175 'info_dict': {
2176 'id': 'DJztXj2GPfk',
2177 'ext': 'mp4',
2178 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2179 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2180 'upload_date': '20090125',
2181 'artist': 'Panjabi MC',
2182 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2183 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2185 'params': {
2186 'skip_download': True,
2188 'skip': 'Video unavailable',
2191 # empty description results in an empty string
2192 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2193 'info_dict': {
2194 'id': 'x41yOUIvK2k',
2195 'ext': 'mp4',
2196 'title': 'IMG 3456',
2197 'description': '',
2198 'upload_date': '20170613',
2199 'view_count': int,
2200 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
2201 'like_count': int,
2202 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2203 'tags': [],
2204 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2205 'availability': 'public',
2206 'age_limit': 0,
2207 'categories': ['Pets & Animals'],
2208 'duration': 7,
2209 'playable_in_embed': True,
2210 'live_status': 'not_live',
2211 'channel': 'l\'Or Vert asbl',
2212 'channel_follower_count': int,
2213 'uploader': 'l\'Or Vert asbl',
2214 'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
2215 'uploader_id': '@ElevageOrVert',
2216 'timestamp': 1497343210,
2218 'params': {
2219 'skip_download': True,
2223 # with '};' inside yt initial data (see [1])
2224 # see [2] for an example with '};' inside ytInitialPlayerResponse
2225 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2226 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
2227 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2228 'info_dict': {
2229 'id': 'CHqg6qOn4no',
2230 'ext': 'mp4',
2231 'title': 'Part 77 Sort a list of simple types in c#',
2232 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2233 'upload_date': '20130831',
2234 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2235 'like_count': int,
2236 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2237 'live_status': 'not_live',
2238 'categories': ['Education'],
2239 'availability': 'public',
2240 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2241 'tags': 'count:12',
2242 'playable_in_embed': True,
2243 'age_limit': 0,
2244 'view_count': int,
2245 'duration': 522,
2246 'channel': 'kudvenkat',
2247 'comment_count': int,
2248 'channel_follower_count': int,
2249 'chapters': list,
2250 'uploader': 'kudvenkat',
2251 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
2252 'uploader_id': '@Csharp-video-tutorialsBlogspot',
2253 'channel_is_verified': True,
2254 'heatmap': 'count:100',
2255 'timestamp': 1377976349,
2257 'params': {
2258 'skip_download': True,
2262 # another example of '};' in ytInitialData
2263 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2264 'only_matching': True,
2267 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2268 'only_matching': True,
2271 # https://github.com/ytdl-org/youtube-dl/pull/28094
2272 'url': 'OtqTfy26tG0',
2273 'info_dict': {
2274 'id': 'OtqTfy26tG0',
2275 'ext': 'mp4',
2276 'title': 'Burn Out',
2277 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2278 'upload_date': '20141120',
2279 'artist': 'The Cinematic Orchestra',
2280 'track': 'Burn Out',
2281 'album': 'Every Day',
2282 'like_count': int,
2283 'live_status': 'not_live',
2284 'alt_title': 'Burn Out',
2285 'duration': 614,
2286 'age_limit': 0,
2287 'view_count': int,
2288 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2289 'creator': 'The Cinematic Orchestra',
2290 'channel': 'The Cinematic Orchestra',
2291 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2292 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2293 'availability': 'public',
2294 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2295 'categories': ['Music'],
2296 'playable_in_embed': True,
2297 'channel_follower_count': int,
2298 'uploader': 'The Cinematic Orchestra',
2299 'comment_count': int,
2301 'params': {
2302 'skip_download': True,
2306 # controversial video, only works with bpctr when authenticated with cookies
2307 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2308 'only_matching': True,
2311 # controversial video, requires bpctr/contentCheckOk
2312 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2313 'info_dict': {
2314 'id': 'SZJvDhaSDnc',
2315 'ext': 'mp4',
2316 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2317 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2318 'upload_date': '20140716',
2319 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2320 'duration': 170,
2321 'categories': ['News & Politics'],
2322 'view_count': int,
2323 'channel': 'CBS Mornings',
2324 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2325 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2326 'age_limit': 18,
2327 'availability': 'needs_auth',
2328 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2329 'like_count': int,
2330 'live_status': 'not_live',
2331 'playable_in_embed': True,
2332 'channel_follower_count': int,
2333 'uploader': 'CBS Mornings',
2334 'uploader_url': 'https://www.youtube.com/@CBSMornings',
2335 'uploader_id': '@CBSMornings',
2336 'comment_count': int,
2337 'channel_is_verified': True,
2338 'timestamp': 1405513526,
2342 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2343 'url': 'cBvYw8_A0vQ',
2344 'info_dict': {
2345 'id': 'cBvYw8_A0vQ',
2346 'ext': 'mp4',
2347 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2348 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2349 'upload_date': '20201120',
2350 'duration': 1456,
2351 'categories': ['Travel & Events'],
2352 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2353 'view_count': int,
2354 'channel': 'Walk around Japan',
2355 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2356 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg',
2357 'age_limit': 0,
2358 'availability': 'public',
2359 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2360 'live_status': 'not_live',
2361 'playable_in_embed': True,
2362 'channel_follower_count': int,
2363 'uploader': 'Walk around Japan',
2364 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
2365 'uploader_id': '@walkaroundjapan7124',
2366 'timestamp': 1605884416,
2368 'params': {
2369 'skip_download': True,
2371 }, {
2372 # Has multiple audio streams
2373 'url': 'WaOKSUlf4TM',
2374 'only_matching': True,
2375 }, {
2376 # Requires Premium: has format 141 when requested using YTM url
2377 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2378 'only_matching': True,
2379 }, {
2380 # multiple subtitles with same lang_code
2381 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2382 'only_matching': True,
2383 }, {
2384 # Force use android client fallback
2385 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2386 'info_dict': {
2387 'id': 'YOelRv7fMxY',
2388 'title': 'DIGGING A SECRET TUNNEL Part 1',
2389 'ext': '3gp',
2390 'upload_date': '20210624',
2391 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2392 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2393 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2394 'duration': 596,
2395 'categories': ['Entertainment'],
2396 'view_count': int,
2397 'channel': 'colinfurze',
2398 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2399 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2400 'age_limit': 0,
2401 'availability': 'public',
2402 'like_count': int,
2403 'live_status': 'not_live',
2404 'playable_in_embed': True,
2405 'channel_follower_count': int,
2406 'chapters': list,
2407 'uploader': 'colinfurze',
2408 'uploader_url': 'https://www.youtube.com/@colinfurze',
2409 'uploader_id': '@colinfurze',
2410 'comment_count': int,
2411 'channel_is_verified': True,
2412 'heatmap': 'count:100',
2414 'params': {
2415 'format': '17', # 3gp format available on android
2416 'extractor_args': {'youtube': {'player_client': ['android']}},
2418 'skip': 'android client broken',
2421 # Skip download of additional client configs (remix client config in this case)
2422 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2423 'only_matching': True,
2424 'params': {
2425 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2427 }, {
2428 # shorts
2429 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2430 'only_matching': True,
2431 }, {
2432 'note': 'Storyboards',
2433 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2434 'info_dict': {
2435 'id': '5KLPxDtMqe8',
2436 'ext': 'mhtml',
2437 'format_id': 'sb0',
2438 'title': 'Your Brain is Plastic',
2439 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2440 'upload_date': '20140324',
2441 'like_count': int,
2442 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2443 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2444 'view_count': int,
2445 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2446 'playable_in_embed': True,
2447 'tags': 'count:12',
2448 'availability': 'public',
2449 'channel': 'SciShow',
2450 'live_status': 'not_live',
2451 'duration': 248,
2452 'categories': ['Education'],
2453 'age_limit': 0,
2454 'channel_follower_count': int,
2455 'chapters': list,
2456 'uploader': 'SciShow',
2457 'uploader_url': 'https://www.youtube.com/@SciShow',
2458 'uploader_id': '@SciShow',
2459 'comment_count': int,
2460 'channel_is_verified': True,
2461 'heatmap': 'count:100',
2462 'timestamp': 1395685455,
2463 }, 'params': {'format': 'mhtml', 'skip_download': True},
2464 }, {
2465 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2466 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2467 'info_dict': {
2468 'id': '2NUZ8W2llS4',
2469 'ext': 'mp4',
2470 'title': 'The NP that test your phone performance 🙂',
2471 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2472 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2473 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2474 'duration': 21,
2475 'view_count': int,
2476 'age_limit': 0,
2477 'categories': ['Gaming'],
2478 'tags': 'count:23',
2479 'playable_in_embed': True,
2480 'live_status': 'not_live',
2481 'upload_date': '20220103',
2482 'like_count': int,
2483 'availability': 'public',
2484 'channel': 'Leon Nguyen',
2485 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2486 'comment_count': int,
2487 'channel_follower_count': int,
2488 'uploader': 'Leon Nguyen',
2489 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2490 'uploader_id': '@LeonNguyen',
2491 'heatmap': 'count:100',
2492 'timestamp': 1641170939,
2494 }, {
2495 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2496 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2497 'info_dict': {
2498 'id': 'mzZzzBU6lrM',
2499 'ext': 'mp4',
2500 'title': 'I Met GeorgeNotFound In Real Life...',
2501 'description': 'md5:978296ec9783a031738b684d4ebf302d',
2502 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2503 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2504 'duration': 955,
2505 'view_count': int,
2506 'age_limit': 0,
2507 'categories': ['Entertainment'],
2508 'tags': 'count:26',
2509 'playable_in_embed': True,
2510 'live_status': 'not_live',
2511 'release_timestamp': 1641172509,
2512 'release_date': '20220103',
2513 'upload_date': '20220103',
2514 'like_count': int,
2515 'availability': 'public',
2516 'channel': 'Quackity',
2517 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2518 'channel_follower_count': int,
2519 'uploader': 'Quackity',
2520 'uploader_id': '@Quackity',
2521 'uploader_url': 'https://www.youtube.com/@Quackity',
2522 'comment_count': int,
2523 'channel_is_verified': True,
2524 'heatmap': 'count:100',
2525 'timestamp': 1641172509,
2528 { # continuous livestream.
2529 # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00
2530 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk',
2531 'info_dict': {
2532 'id': 'jfKfPfyJRdk',
2533 'ext': 'mp4',
2534 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow',
2535 'like_count': int,
2536 'uploader': 'Lofi Girl',
2537 'categories': ['Music'],
2538 'concurrent_view_count': int,
2539 'playable_in_embed': True,
2540 'timestamp': 1657627949,
2541 'release_date': '20220712',
2542 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow',
2543 'description': 'md5:13a6f76df898f5674f9127139f3df6f7',
2544 'age_limit': 0,
2545 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg',
2546 'release_timestamp': 1657641570,
2547 'uploader_url': 'https://www.youtube.com/@LofiGirl',
2548 'channel_follower_count': int,
2549 'channel_is_verified': True,
2550 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to',
2551 'view_count': int,
2552 'live_status': 'is_live',
2553 'tags': 'count:32',
2554 'channel': 'Lofi Girl',
2555 'availability': 'public',
2556 'upload_date': '20220712',
2557 'uploader_id': '@LofiGirl',
2559 'params': {'skip_download': True},
2560 }, {
2561 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2562 'info_dict': {
2563 'id': 'tjjjtzRLHvA',
2564 'ext': 'mp4',
2565 'title': 'ハッシュタグ無し };if window.ytcsi',
2566 'upload_date': '20220323',
2567 'like_count': int,
2568 'availability': 'unlisted',
2569 'channel': 'Lesmiscore',
2570 'thumbnail': r're:^https?://.*\.jpg',
2571 'age_limit': 0,
2572 'categories': ['Music'],
2573 'view_count': int,
2574 'description': '',
2575 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2576 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2577 'live_status': 'not_live',
2578 'playable_in_embed': True,
2579 'channel_follower_count': int,
2580 'duration': 6,
2581 'tags': [],
2582 'uploader_id': '@lesmiscore',
2583 'uploader': 'Lesmiscore',
2584 'uploader_url': 'https://www.youtube.com/@lesmiscore',
2585 'timestamp': 1648005313,
2587 }, {
2588 # Prefer primary title+description language metadata by default
2589 # Do not prefer translated description if primary is empty
2590 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2591 'info_dict': {
2592 'id': 'el3E4MbxRqQ',
2593 'ext': 'mp4',
2594 'title': 'dlp test video 2 - primary sv no desc',
2595 'description': '',
2596 'channel': 'cole-dlp-test-acc',
2597 'tags': [],
2598 'view_count': int,
2599 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2600 'like_count': int,
2601 'playable_in_embed': True,
2602 'availability': 'unlisted',
2603 'thumbnail': r're:^https?://.*\.jpg',
2604 'age_limit': 0,
2605 'duration': 5,
2606 'live_status': 'not_live',
2607 'upload_date': '20220908',
2608 'categories': ['People & Blogs'],
2609 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2610 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2611 'uploader_id': '@coletdjnz',
2612 'uploader': 'cole-dlp-test-acc',
2613 'timestamp': 1662677394,
2615 'params': {'skip_download': True},
2616 }, {
2617 # Extractor argument: prefer translated title+description
2618 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2619 'info_dict': {
2620 'id': 'gHKT4uU8Zng',
2621 'ext': 'mp4',
2622 'channel': 'cole-dlp-test-acc',
2623 'tags': [],
2624 'duration': 5,
2625 'live_status': 'not_live',
2626 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2627 'upload_date': '20220729',
2628 'view_count': int,
2629 'categories': ['People & Blogs'],
2630 'thumbnail': r're:^https?://.*\.jpg',
2631 'title': 'dlp test video title translated (fr)',
2632 'availability': 'public',
2633 'age_limit': 0,
2634 'description': 'dlp test video description translated (fr)',
2635 'playable_in_embed': True,
2636 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2637 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2638 'uploader_id': '@coletdjnz',
2639 'uploader': 'cole-dlp-test-acc',
2640 'timestamp': 1659073275,
2641 'like_count': int,
2643 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2644 'expected_warnings': [r'Preferring "fr" translated fields'],
2645 }, {
2646 'note': '6 channel audio',
2647 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2648 'only_matching': True,
2649 }, {
2650 'note': 'Multiple HLS formats with same itag',
2651 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2652 'info_dict': {
2653 'id': 'kX3nB4PpJko',
2654 'ext': 'mp4',
2655 'categories': ['Entertainment'],
2656 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
2657 'live_status': 'not_live',
2658 'duration': 937,
2659 'channel_follower_count': int,
2660 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2661 'title': 'Last To Take Hand Off Jet, Keeps It!',
2662 'channel': 'MrBeast',
2663 'playable_in_embed': True,
2664 'view_count': int,
2665 'upload_date': '20221112',
2666 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2667 'age_limit': 0,
2668 'availability': 'public',
2669 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2670 'like_count': int,
2671 'tags': [],
2672 'uploader': 'MrBeast',
2673 'uploader_url': 'https://www.youtube.com/@MrBeast',
2674 'uploader_id': '@MrBeast',
2675 'comment_count': int,
2676 'channel_is_verified': True,
2677 'heatmap': 'count:100',
2679 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
2680 }, {
2681 'note': 'Audio formats with Dynamic Range Compression',
2682 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2683 'info_dict': {
2684 'id': 'Tq92D6wQ1mg',
2685 'ext': 'webm',
2686 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2687 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2688 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2689 'channel_follower_count': int,
2690 'description': 'md5:17eccca93a786d51bc67646756894066',
2691 'upload_date': '20191228',
2692 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2693 'playable_in_embed': True,
2694 'like_count': int,
2695 'categories': ['Entertainment'],
2696 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2697 'age_limit': 18,
2698 'channel': 'Projekt Melody',
2699 'view_count': int,
2700 'availability': 'needs_auth',
2701 'comment_count': int,
2702 'live_status': 'not_live',
2703 'duration': 106,
2704 'uploader': 'Projekt Melody',
2705 'uploader_id': '@ProjektMelody',
2706 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
2707 'timestamp': 1577508724,
2709 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
2712 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2713 'info_dict': {
2714 'id': 'qVv6vCqciTM',
2715 'ext': 'mp4',
2716 'age_limit': 0,
2717 'comment_count': int,
2718 'chapters': 'count:13',
2719 'upload_date': '20221223',
2720 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2721 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
2722 'like_count': int,
2723 'release_date': '20221223',
2724 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2725 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2726 'view_count': int,
2727 'playable_in_embed': True,
2728 'duration': 4438,
2729 'availability': 'public',
2730 'channel_follower_count': int,
2731 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2732 'categories': ['Entertainment'],
2733 'live_status': 'was_live',
2734 'release_timestamp': 1671793345,
2735 'channel': 'さなちゃんねる',
2736 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2737 'uploader': 'さなちゃんねる',
2738 'uploader_url': 'https://www.youtube.com/@sana_natori',
2739 'uploader_id': '@sana_natori',
2740 'channel_is_verified': True,
2741 'heatmap': 'count:100',
2742 'timestamp': 1671798112,
2746 # Fallbacks when webpage and web client is unavailable
2747 'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
2748 'info_dict': {
2749 'id': 'wSSmNUl9Snw',
2750 'ext': 'mp4',
2751 # 'categories': ['Science & Technology'],
2752 'view_count': int,
2753 'chapters': 'count:2',
2754 'channel': 'Scott Manley',
2755 'like_count': int,
2756 'age_limit': 0,
2757 # 'availability': 'public',
2758 'channel_follower_count': int,
2759 'live_status': 'not_live',
2760 'upload_date': '20170831',
2761 'duration': 682,
2762 'tags': 'count:8',
2763 'uploader_url': 'https://www.youtube.com/@scottmanley',
2764 'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
2765 'uploader': 'Scott Manley',
2766 'uploader_id': '@scottmanley',
2767 'title': 'The Computer Hack That Saved Apollo 14',
2768 'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
2769 'thumbnail': r're:^https?://.*\.webp',
2770 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
2771 'playable_in_embed': True,
2772 'comment_count': int,
2773 'channel_is_verified': True,
2774 'heatmap': 'count:100',
2776 'params': {
2777 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
2782 _WEBPAGE_TESTS = [
2783 # YouTube <object> embed
2785 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2786 'md5': '873c81d308b979f0e23ee7e620b312a3',
2787 'info_dict': {
2788 'id': 'msN87y-iEx0',
2789 'ext': 'mp4',
2790 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2791 'upload_date': '20080526',
2792 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2793 'age_limit': 0,
2794 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2795 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2796 'playable_in_embed': True,
2797 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2798 'like_count': int,
2799 'comment_count': int,
2800 'channel': 'Christopher Sykes',
2801 'live_status': 'not_live',
2802 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2803 'availability': 'public',
2804 'duration': 195,
2805 'view_count': int,
2806 'categories': ['Science & Technology'],
2807 'channel_follower_count': int,
2808 'uploader': 'Christopher Sykes',
2809 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
2810 'uploader_id': '@ChristopherSykesDocumentaries',
2811 'heatmap': 'count:100',
2812 'timestamp': 1211825920,
2814 'params': {
2815 'skip_download': True,
2820 @classmethod
2821 def suitable(cls, url):
2822 from ..utils import parse_qs
2824 qs = parse_qs(url)
2825 if qs.get('list', [None])[0]:
2826 return False
2827 return super().suitable(url)
2829 def __init__(self, *args, **kwargs):
2830 super().__init__(*args, **kwargs)
2831 self._code_cache = {}
2832 self._player_cache = {}
2834 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
2835 lock = threading.Lock()
2836 start_time = time.time()
2837 formats = [f for f in formats if f.get('is_from_start')]
2839 def refetch_manifest(format_id, delay):
2840 nonlocal formats, start_time, is_live
2841 if time.time() <= start_time + delay:
2842 return
2844 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2845 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
2846 microformats = traverse_obj(
2847 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2848 expected_type=dict)
2849 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2850 is_live = live_status == 'is_live'
2851 start_time = time.time()
2853 def mpd_feed(format_id, delay):
2855 @returns (manifest_url, manifest_stream_number, is_live) or None
2857 for retry in self.RetryManager(fatal=False):
2858 with lock:
2859 refetch_manifest(format_id, delay)
2861 f = next((f for f in formats if f['format_id'] == format_id), None)
2862 if not f:
2863 if not is_live:
2864 retry.error = f'{video_id}: Video is no longer live'
2865 else:
2866 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
2867 continue
2868 return f['manifest_url'], f['manifest_stream_number'], is_live
2869 return None
2871 for f in formats:
2872 f['is_live'] = is_live
2873 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2874 live_start_time, mpd_feed, not is_live and f.copy())
2875 if is_live:
2876 f['fragments'] = gen
2877 f['protocol'] = 'http_dash_segments_generator'
2878 else:
2879 f['fragments'] = LazyList(gen({}))
2880 del f['is_from_start']
2882 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
2883 FETCH_SPAN, MAX_DURATION = 5, 432000
2885 mpd_url, stream_number, is_live = None, None, True
2887 begin_index = 0
2888 download_start_time = ctx.get('start') or time.time()
2890 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2891 if lack_early_segments:
2892 self.report_warning(bug_reports_message(
2893 'Starting download from the last 120 hours of the live stream since '
2894 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2895 lack_early_segments = True
2897 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2898 fragments, fragment_base_url = None, None
2900 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2901 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2902 # Obtain from MPD's maximum seq value
2903 old_mpd_url = mpd_url
2904 last_error = ctx.pop('last_error', None)
2905 expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
2906 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2907 or (mpd_url, stream_number, False))
2908 if not refresh_sequence:
2909 if expire_fast and not is_live:
2910 return False, last_seq
2911 elif old_mpd_url == mpd_url:
2912 return True, last_seq
2913 if manifestless_orig_fmt:
2914 fmt_info = manifestless_orig_fmt
2915 else:
2916 try:
2917 fmts, _ = self._extract_mpd_formats_and_subtitles(
2918 mpd_url, None, note=False, errnote=False, fatal=False)
2919 except ExtractorError:
2920 fmts = None
2921 if not fmts:
2922 no_fragment_score += 2
2923 return False, last_seq
2924 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2925 fragments = fmt_info['fragments']
2926 fragment_base_url = fmt_info['fragment_base_url']
2927 assert fragment_base_url
2929 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2930 return True, _last_seq
2932 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
2933 while is_live:
2934 fetch_time = time.time()
2935 if no_fragment_score > 30:
2936 return
2937 if last_segment_url:
2938 # Obtain from "X-Head-Seqnum" header value from each segment
2939 try:
2940 urlh = self._request_webpage(
2941 last_segment_url, None, note=False, errnote=False, fatal=False)
2942 except ExtractorError:
2943 urlh = None
2944 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2945 if last_seq is None:
2946 no_fragment_score += 2
2947 last_segment_url = None
2948 continue
2949 else:
2950 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2951 no_fragment_score += 2
2952 if not should_continue:
2953 continue
2955 if known_idx > last_seq:
2956 last_segment_url = None
2957 continue
2959 last_seq += 1
2961 if begin_index < 0 and known_idx < 0:
2962 # skip from the start when it's negative value
2963 known_idx = last_seq + begin_index
2964 if lack_early_segments:
2965 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2966 try:
2967 for idx in range(known_idx, last_seq):
2968 # do not update sequence here or you'll get skipped some part of it
2969 should_continue, _ = _extract_sequence_from_mpd(False, False)
2970 if not should_continue:
2971 known_idx = idx - 1
2972 raise ExtractorError('breaking out of outer loop')
2973 last_segment_url = urljoin(fragment_base_url, f'sq/{idx}')
2974 yield {
2975 'url': last_segment_url,
2976 'fragment_count': last_seq,
2978 if known_idx == last_seq:
2979 no_fragment_score += 5
2980 else:
2981 no_fragment_score = 0
2982 known_idx = last_seq
2983 except ExtractorError:
2984 continue
2986 if manifestless_orig_fmt:
2987 # Stop at the first iteration if running for post-live manifestless;
2988 # fragment count no longer increase since it starts
2989 break
2991 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2993 def _extract_player_url(self, *ytcfgs, webpage=None):
2994 player_url = traverse_obj(
2995 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2996 get_all=False, expected_type=str)
2997 if not player_url:
2998 return
2999 return urljoin('https://www.youtube.com', player_url)
3001 def _download_player_url(self, video_id, fatal=False):
3002 res = self._download_webpage(
3003 'https://www.youtube.com/iframe_api',
3004 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
3005 if res:
3006 player_version = self._search_regex(
3007 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
3008 if player_version:
3009 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
3011 def _signature_cache_id(self, example_sig):
3012 """ Return a string representation of a signature """
3013 return '.'.join(str(len(part)) for part in example_sig.split('.'))
3015 @classmethod
3016 def _extract_player_info(cls, player_url):
3017 for player_re in cls._PLAYER_INFO_RE:
3018 id_m = re.search(player_re, player_url)
3019 if id_m:
3020 break
3021 else:
3022 raise ExtractorError(f'Cannot identify player {player_url!r}')
3023 return id_m.group('id')
3025 def _load_player(self, video_id, player_url, fatal=True):
3026 player_id = self._extract_player_info(player_url)
3027 if player_id not in self._code_cache:
3028 code = self._download_webpage(
3029 player_url, video_id, fatal=fatal,
3030 note='Downloading player ' + player_id,
3031 errnote=f'Download of {player_url} failed')
3032 if code:
3033 self._code_cache[player_id] = code
3034 return self._code_cache.get(player_id)
3036 def _extract_signature_function(self, video_id, player_url, example_sig):
3037 player_id = self._extract_player_info(player_url)
3039 # Read from filesystem cache
3040 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
3041 assert os.path.basename(func_id) == func_id
3043 self.write_debug(f'Extracting signature function {func_id}')
3044 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
3046 if not cache_spec:
3047 code = self._load_player(video_id, player_url)
3048 if code:
3049 res = self._parse_sig_js(code)
3050 test_string = ''.join(map(chr, range(len(example_sig))))
3051 cache_spec = [ord(c) for c in res(test_string)]
3052 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
3054 return lambda s: ''.join(s[i] for i in cache_spec)
3056 def _print_sig_code(self, func, example_sig):
3057 if not self.get_param('youtube_print_sig_code'):
3058 return
3060 def gen_sig_code(idxs):
3061 def _genslice(start, end, step):
3062 starts = '' if start == 0 else str(start)
3063 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
3064 steps = '' if step == 1 else (':%d' % step)
3065 return f's[{starts}{ends}{steps}]'
3067 step = None
3068 # Quelch pyflakes warnings - start will be set when step is set
3069 start = '(Never used)'
3070 for i, prev in zip(idxs[1:], idxs[:-1]):
3071 if step is not None:
3072 if i - prev == step:
3073 continue
3074 yield _genslice(start, prev, step)
3075 step = None
3076 continue
3077 if i - prev in [-1, 1]:
3078 step = i - prev
3079 start = prev
3080 continue
3081 else:
3082 yield 's[%d]' % prev
3083 if step is None:
3084 yield 's[%d]' % i
3085 else:
3086 yield _genslice(start, i, step)
3088 test_string = ''.join(map(chr, range(len(example_sig))))
3089 cache_res = func(test_string)
3090 cache_spec = [ord(c) for c in cache_res]
3091 expr_code = ' + '.join(gen_sig_code(cache_spec))
3092 signature_id_tuple = '({})'.format(', '.join(str(len(p)) for p in example_sig.split('.')))
3093 code = (f'if tuple(len(p) for p in s.split(\'.\')) == {signature_id_tuple}:\n'
3094 f' return {expr_code}\n')
3095 self.to_screen('Extracted signature function:\n' + code)
3097 def _parse_sig_js(self, jscode):
3098 funcname = self._search_regex(
3099 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3100 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3101 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
3102 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
3103 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
3104 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
3105 # Obsolete patterns
3106 r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3107 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
3108 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3109 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3110 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3111 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3112 jscode, 'Initial JS player signature function name', group='sig')
3114 jsi = JSInterpreter(jscode)
3115 initial_function = jsi.extract_function(funcname)
3116 return lambda s: initial_function([s])
3118 def _cached(self, func, *cache_id):
3119 def inner(*args, **kwargs):
3120 if cache_id not in self._player_cache:
3121 try:
3122 self._player_cache[cache_id] = func(*args, **kwargs)
3123 except ExtractorError as e:
3124 self._player_cache[cache_id] = e
3125 except Exception as e:
3126 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
3128 ret = self._player_cache[cache_id]
3129 if isinstance(ret, Exception):
3130 raise ret
3131 return ret
3132 return inner
3134 def _decrypt_signature(self, s, video_id, player_url):
3135 """Turn the encrypted s field into a working signature"""
3136 extract_sig = self._cached(
3137 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
3138 func = extract_sig(video_id, player_url, s)
3139 self._print_sig_code(func, s)
3140 return func(s)
3142 def _decrypt_nsig(self, s, video_id, player_url):
3143 """Turn the encrypted n field into a working signature"""
3144 if player_url is None:
3145 raise ExtractorError('Cannot decrypt nsig without player_url')
3146 player_url = urljoin('https://www.youtube.com', player_url)
3148 try:
3149 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
3150 except ExtractorError as e:
3151 raise ExtractorError('Unable to extract nsig function code', cause=e)
3152 if self.get_param('youtube_print_sig_code'):
3153 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
3155 try:
3156 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
3157 ret = extract_nsig(jsi, func_code)(s)
3158 except JSInterpreter.Exception as e:
3159 try:
3160 jsi = PhantomJSwrapper(self, timeout=5000)
3161 except ExtractorError:
3162 raise e
3163 self.report_warning(
3164 f'Native nsig extraction failed: Trying with PhantomJS\n'
3165 f' n = {s} ; player = {player_url}', video_id)
3166 self.write_debug(e, only_once=True)
3168 args, func_body = func_code
3169 ret = jsi.execute(
3170 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3171 video_id=video_id, note='Executing signature code').strip()
3173 self.write_debug(f'Decrypted nsig {s} => {ret}')
3174 return ret
3176 def _extract_n_function_name(self, jscode):
3177 funcname, idx = self._search_regex(
3178 r'''(?x)
3180 \.get\("n"\)\)&&\(b=|
3182 b=String\.fromCharCode\(110\)|
3183 ([a-zA-Z0-9$.]+)&&\(b="nn"\[\+\1\]
3184 ),c=a\.get\(b\)\)&&\(c=
3186 (?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)''',
3187 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
3188 if not idx:
3189 return funcname
3191 return json.loads(js_to_json(self._search_regex(
3192 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
3193 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3195 def _extract_n_function_code(self, video_id, player_url):
3196 player_id = self._extract_player_info(player_url)
3197 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09')
3198 jscode = func_code or self._load_player(video_id, player_url)
3199 jsi = JSInterpreter(jscode)
3201 if func_code:
3202 return jsi, player_id, func_code
3204 func_name = self._extract_n_function_name(jscode)
3206 func_code = jsi.extract_function_code(func_name)
3208 self.cache.store('youtube-nsig', player_id, func_code)
3209 return jsi, player_id, func_code
3211 def _extract_n_function_from_code(self, jsi, func_code):
3212 func = jsi.extract_function_from_code(*func_code)
3214 def extract_nsig(s):
3215 try:
3216 ret = func([s])
3217 except JSInterpreter.Exception:
3218 raise
3219 except Exception as e:
3220 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3222 if ret.startswith('enhanced_except_'):
3223 raise JSInterpreter.Exception('Signature function returned an exception')
3224 return ret
3226 return extract_nsig
3228 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3230 Extract signatureTimestamp (sts)
3231 Required to tell API what sig/player version is in use.
3233 sts = None
3234 if isinstance(ytcfg, dict):
3235 sts = int_or_none(ytcfg.get('STS'))
3237 if not sts:
3238 # Attempt to extract from player
3239 if player_url is None:
3240 error_msg = 'Cannot extract signature timestamp without player_url.'
3241 if fatal:
3242 raise ExtractorError(error_msg)
3243 self.report_warning(error_msg)
3244 return
3245 code = self._load_player(video_id, player_url, fatal=fatal)
3246 if code:
3247 sts = int_or_none(self._search_regex(
3248 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3249 'JS player signature timestamp', group='sts', fatal=fatal))
3250 return sts
3252 def _mark_watched(self, video_id, player_responses):
3253 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3254 label = 'fully ' if is_full else ''
3255 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3256 expected_type=url_or_none)
3257 if not url:
3258 self.report_warning(f'Unable to mark {label}watched')
3259 return
3260 parsed_url = urllib.parse.urlparse(url)
3261 qs = urllib.parse.parse_qs(parsed_url.query)
3263 # cpn generation algorithm is reverse engineered from base.js.
3264 # In fact it works even with dummy cpn.
3265 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3266 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
3268 # # more consistent results setting it to right before the end
3269 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3271 qs.update({
3272 'ver': ['2'],
3273 'cpn': [cpn],
3274 'cmt': video_length,
3275 'el': 'detailpage', # otherwise defaults to "shorts"
3278 if is_full:
3279 # these seem to mark watchtime "history" in the real world
3280 # they're required, so send in a single value
3281 qs.update({
3282 'st': 0,
3283 'et': video_length,
3286 url = urllib.parse.urlunparse(
3287 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
3289 self._download_webpage(
3290 url, video_id, f'Marking {label}watched',
3291 'Unable to mark watched', fatal=False)
3293 @classmethod
3294 def _extract_from_webpage(cls, url, webpage):
3295 # Invidious Instances
3296 # https://github.com/yt-dlp/yt-dlp/issues/195
3297 # https://github.com/iv-org/invidious/pull/1730
3298 mobj = re.search(
3299 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3300 webpage)
3301 if mobj:
3302 yield cls.url_result(mobj.group('url'), cls)
3303 raise cls.StopExtraction
3305 yield from super()._extract_from_webpage(url, webpage)
3307 # lazyYT YouTube embed
3308 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3309 yield cls.url_result(unescapeHTML(id_), cls, id_)
3311 # Wordpress "YouTube Video Importer" plugin
3312 for m in re.findall(r'''(?x)<div[^>]+
3313 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3314 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3315 yield cls.url_result(m[-1], cls, m[-1])
3317 @classmethod
3318 def extract_id(cls, url):
3319 video_id = cls.get_temp_id(url)
3320 if not video_id:
3321 raise ExtractorError(f'Invalid URL: {url}')
3322 return video_id
3324 def _extract_chapters_from_json(self, data, duration):
3325 chapter_list = traverse_obj(
3326 data, (
3327 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3328 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters',
3329 ), expected_type=list)
3331 return self._extract_chapters_helper(
3332 chapter_list,
3333 start_function=lambda chapter: float_or_none(
3334 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3335 title_function=lambda chapter: traverse_obj(
3336 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3337 duration=duration)
3339 def _extract_chapters_from_engagement_panel(self, data, duration):
3340 content_list = traverse_obj(
3341 data,
3342 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
3343 expected_type=list)
3344 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3345 chapter_title = lambda chapter: self._get_text(chapter, 'title')
3347 return next(filter(None, (
3348 self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3349 chapter_time, chapter_title, duration)
3350 for contents in content_list)), [])
3352 def _extract_heatmap(self, data):
3353 return traverse_obj(data, (
3354 'frameworkUpdates', 'entityBatchUpdate', 'mutations',
3355 lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
3356 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
3357 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
3358 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
3359 'value': ('intensityScoreNormalized', {float_or_none}),
3360 })) or None
3362 def _extract_comment(self, entities, parent=None):
3363 comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))
3364 if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):
3365 return
3367 toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))
3368 time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''
3370 return {
3371 'id': comment_id,
3372 'parent': parent or 'root',
3373 **traverse_obj(comment_entity_payload, {
3374 'text': ('properties', 'content', 'content', {str}),
3375 'like_count': ('toolbar', 'likeCountA11y', {parse_count}),
3376 'author_id': ('author', 'channelId', {self.ucid_or_none}),
3377 'author': ('author', 'displayName', {str}),
3378 'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),
3379 'author_is_uploader': ('author', 'isCreator', {bool}),
3380 'author_is_verified': ('author', 'isVerified', {bool}),
3381 'author_url': ('author', 'channelCommand', 'innertubeCommand', (
3382 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'),
3383 ), {lambda x: urljoin('https://www.youtube.com', x)}),
3384 }, get_all=False),
3385 'is_favorited': (None if toolbar_entity_payload is None else
3386 toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
3387 '_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.
3388 'timestamp': self._parse_time_text(time_text),
3391 def _extract_comment_old(self, comment_renderer, parent=None):
3392 comment_id = comment_renderer.get('commentId')
3393 if not comment_id:
3394 return
3396 info = {
3397 'id': comment_id,
3398 'text': self._get_text(comment_renderer, 'contentText'),
3399 'like_count': self._get_count(comment_renderer, 'voteCount'),
3400 'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
3401 'author': self._get_text(comment_renderer, 'authorText'),
3402 'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
3403 'parent': parent or 'root',
3406 # Timestamp is an estimate calculated from the current time and time_text
3407 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3408 timestamp = self._parse_time_text(time_text)
3410 info.update({
3411 # FIXME: non-standard, but we need a way of showing that it is an estimate.
3412 '_time_text': time_text,
3413 'timestamp': timestamp,
3416 info['author_url'] = urljoin(
3417 'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
3418 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
3419 expected_type=str, get_all=False))
3421 author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
3422 if author_is_uploader is not None:
3423 info['author_is_uploader'] = author_is_uploader
3425 comment_abr = traverse_obj(
3426 comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
3427 if comment_abr is not None:
3428 info['is_favorited'] = 'creatorHeart' in comment_abr
3430 badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
3431 if self._has_badge(badges, BadgeType.VERIFIED):
3432 info['author_is_verified'] = True
3434 is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
3435 if is_pinned:
3436 info['is_pinned'] = True
3438 return info
3440 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3442 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
3444 def extract_header(contents):
3445 _continuation = None
3446 for content in contents:
3447 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
3448 expected_comment_count = self._get_count(
3449 comments_header_renderer, 'countText', 'commentsCount')
3451 if expected_comment_count is not None:
3452 tracker['est_total'] = expected_comment_count
3453 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3454 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
3456 sort_menu_item = try_get(
3457 comments_header_renderer,
3458 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3459 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3461 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3462 if not _continuation:
3463 continue
3465 sort_text = str_or_none(sort_menu_item.get('title'))
3466 if not sort_text:
3467 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
3468 self.to_screen(f'Sorting comments by {sort_text.lower()}')
3469 break
3470 return _continuation
3472 def extract_thread(contents, entity_payloads):
3473 if not parent:
3474 tracker['current_page_thread'] = 0
3475 for content in contents:
3476 if not parent and tracker['total_parent_comments'] >= max_parents:
3477 yield
3478 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
3480 # old comment format
3481 if not entity_payloads:
3482 comment_renderer = get_first(
3483 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3484 expected_type=dict, default={})
3486 comment = self._extract_comment_old(comment_renderer, parent)
3488 # new comment format
3489 else:
3490 view_model = (
3491 traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))
3492 or traverse_obj(content, ('commentViewModel', {dict})))
3493 comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))
3494 if not comment_keys:
3495 continue
3496 entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
3497 comment = self._extract_comment(entities, parent)
3498 if comment:
3499 comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
3501 if not comment:
3502 continue
3503 comment_id = comment['id']
3505 if comment.get('is_pinned'):
3506 tracker['pinned_comment_ids'].add(comment_id)
3507 # Sometimes YouTube may break and give us infinite looping comments.
3508 # See: https://github.com/yt-dlp/yt-dlp/issues/6290
3509 if comment_id in tracker['seen_comment_ids']:
3510 if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
3511 # Pinned comments may appear a second time in newest first sort
3512 # See: https://github.com/yt-dlp/yt-dlp/issues/6712
3513 continue
3514 self.report_warning(
3515 'Detected YouTube comments looping. Stopping comment extraction '
3516 f'{"for this thread" if parent else ""} as we probably cannot get any more.')
3517 yield
3518 else:
3519 tracker['seen_comment_ids'].add(comment['id'])
3521 tracker['running_total'] += 1
3522 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
3523 yield comment
3525 # Attempt to get the replies
3526 comment_replies_renderer = try_get(
3527 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3529 if comment_replies_renderer:
3530 tracker['current_page_thread'] += 1
3531 comment_entries_iter = self._comment_entries(
3532 comment_replies_renderer, ytcfg, video_id,
3533 parent=comment.get('id'), tracker=tracker)
3534 yield from itertools.islice(comment_entries_iter, min(
3535 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
3537 # Keeps track of counts across recursive calls
3538 if not tracker:
3539 tracker = {
3540 'running_total': 0,
3541 'est_total': None,
3542 'current_page_thread': 0,
3543 'total_parent_comments': 0,
3544 'total_reply_comments': 0,
3545 'seen_comment_ids': set(),
3546 'pinned_comment_ids': set(),
3549 # TODO: Deprecated
3550 # YouTube comments have a max depth of 2
3551 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3552 if max_depth:
3553 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3554 'Set max replies in the max-comments extractor argument instead')
3555 if max_depth == 1 and parent:
3556 return
3558 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = (
3559 int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 4)
3561 continuation = self._extract_continuation(root_continuation_data)
3563 response = None
3564 is_forced_continuation = False
3565 is_first_continuation = parent is None
3566 if is_first_continuation and not continuation:
3567 # Sometimes you can get comments by generating the continuation yourself,
3568 # even if YouTube initially reports them being disabled - e.g. stories comments.
3569 # Note: if the comment section is actually disabled, YouTube may return a response with
3570 # required check_get_keys missing. So we will disable that check initially in this case.
3571 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3572 is_forced_continuation = True
3574 continuation_items_path = (
3575 'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
3576 for page_num in itertools.count(0):
3577 if not continuation:
3578 break
3579 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3580 comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
3581 if page_num == 0:
3582 if is_first_continuation:
3583 note_prefix = 'Downloading comment section API JSON'
3584 else:
3585 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3586 tracker['current_page_thread'], comment_prog_str)
3587 else:
3588 note_prefix = '{}Downloading comment{} API JSON page {} {}'.format(
3589 ' ' if parent else '', ' replies' if parent else '',
3590 page_num, comment_prog_str)
3592 # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
3593 # Ignore check if YouTube says the comment count is 0.
3594 check_get_keys = None
3595 if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
3596 check_get_keys = [[*continuation_items_path, ..., (
3597 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]
3598 try:
3599 response = self._extract_response(
3600 item_id=None, query=continuation,
3601 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3602 check_get_keys=check_get_keys)
3603 except ExtractorError as e:
3604 # Ignore incomplete data error for replies if retries didn't work.
3605 # This is to allow any other parent comments and comment threads to be downloaded.
3606 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3607 if 'incomplete data' in str(e).lower() and parent:
3608 if self.get_param('ignoreerrors') in (True, 'only_download'):
3609 self.report_warning(
3610 'Received incomplete data for a comment reply thread and retrying did not help. '
3611 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
3612 return
3613 else:
3614 raise ExtractorError(
3615 'Incomplete data received for comment reply thread. '
3616 'Pass --ignore-errors to ignore and allow rest of comments to download.',
3617 expected=True)
3618 raise
3619 is_forced_continuation = False
3620 continuation = None
3621 mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))
3622 for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
3623 if is_first_continuation:
3624 continuation = extract_header(continuation_items)
3625 is_first_continuation = False
3626 if continuation:
3627 break
3628 continue
3630 for entry in extract_thread(continuation_items, mutations):
3631 if not entry:
3632 return
3633 yield entry
3634 continuation = self._extract_continuation({'contents': continuation_items})
3635 if continuation:
3636 break
3638 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3639 if message and not parent and tracker['running_total'] == 0:
3640 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3641 raise self.CommentsDisabled
3643 @staticmethod
3644 def _generate_comment_continuation(video_id):
3646 Generates initial comment section continuation token from given video id
3648 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3649 return base64.b64encode(token.encode()).decode()
3651 def _get_comments(self, ytcfg, video_id, contents, webpage):
3652 """Entry for comment extraction"""
3653 def _real_comment_extract(contents):
3654 renderer = next((
3655 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3656 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3657 yield from self._comment_entries(renderer, ytcfg, video_id)
3659 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3660 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3662 @staticmethod
3663 def _get_checkok_params():
3664 return {'contentCheckOk': True, 'racyCheckOk': True}
3666 @classmethod
3667 def _generate_player_context(cls, sts=None):
3668 context = {
3669 'html5Preference': 'HTML5_PREF_WANTS',
3671 if sts is not None:
3672 context['signatureTimestamp'] = sts
3673 return {
3674 'playbackContext': {
3675 'contentPlaybackContext': context,
3677 **cls._get_checkok_params(),
3680 @staticmethod
3681 def _is_agegated(player_response):
3682 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3683 return True
3685 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
3686 AGE_GATE_REASONS = (
3687 'confirm your age', 'age-restricted', 'inappropriate', # reason
3688 'age_verification_required', 'age_check_required', # status
3690 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3692 @staticmethod
3693 def _is_unplayable(player_response):
3694 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3696 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3698 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3699 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3700 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3701 headers = self.generate_api_headers(
3702 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3704 yt_query = {
3705 'videoId': video_id,
3708 default_pp = traverse_obj(
3709 INNERTUBE_CLIENTS, (_split_innertube_client(client)[0], 'PLAYER_PARAMS', {str}))
3710 if player_params := self._configuration_arg('player_params', [default_pp], casesense=True)[0]:
3711 yt_query['params'] = player_params
3713 yt_query.update(self._generate_player_context(sts))
3714 return self._extract_response(
3715 item_id=video_id, ep='player', query=yt_query,
3716 ytcfg=player_ytcfg, headers=headers, fatal=True,
3717 default_client=client,
3718 note='Downloading {} player API JSON'.format(client.replace('_', ' ').strip()),
3719 ) or None
3721 def _get_requested_clients(self, url, smuggled_data):
3722 requested_clients = []
3723 broken_clients = []
3724 default = ['ios', 'web']
3725 allowed_clients = sorted(
3726 (client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
3727 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3728 for client in self._configuration_arg('player_client'):
3729 if client == 'default':
3730 requested_clients.extend(default)
3731 elif client == 'all':
3732 requested_clients.extend(allowed_clients)
3733 elif client not in allowed_clients:
3734 self.report_warning(f'Skipping unsupported client {client}')
3735 elif client in self._BROKEN_CLIENTS.values():
3736 broken_clients.append(client)
3737 else:
3738 requested_clients.append(client)
3739 # Force deprioritization of _BROKEN_CLIENTS for format de-duplication
3740 requested_clients.extend(broken_clients)
3741 if not requested_clients:
3742 requested_clients = default
3744 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3745 for requested_client in requested_clients:
3746 _, base_client, variant = _split_innertube_client(requested_client)
3747 music_client = f'{base_client}_music'
3748 if variant != 'music' and music_client in INNERTUBE_CLIENTS:
3749 requested_clients.append(music_client)
3751 return orderedSet(requested_clients)
3753 def _invalid_player_response(self, pr, video_id):
3754 # YouTube may return a different video player response than expected.
3755 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3756 if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
3757 return pr_id
3759 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3760 initial_pr = ignore_initial_response = None
3761 if webpage:
3762 if 'web' in clients:
3763 experiments = traverse_obj(master_ytcfg, (
3764 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'serializedExperimentIds', {lambda x: x.split(',')}, ...))
3765 if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
3766 self.report_warning(
3767 'Webpage contains broken formats (poToken experiment detected). Ignoring initial player response')
3768 ignore_initial_response = True
3769 initial_pr = self._search_json(
3770 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3772 prs = []
3773 if initial_pr and not self._invalid_player_response(initial_pr, video_id):
3774 # Android player_response does not have microFormats which are needed for
3775 # extraction of some data. So we return the initial_pr with formats
3776 # stripped out even if not requested by the user
3777 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3778 prs.append({**initial_pr, 'streamingData': None})
3780 all_clients = set(clients)
3781 clients = clients[::-1]
3783 def append_client(*client_names):
3784 """ Append the first client name that exists but not already used """
3785 for client_name in client_names:
3786 actual_client = _split_innertube_client(client_name)[0]
3787 if actual_client in INNERTUBE_CLIENTS:
3788 if actual_client not in all_clients:
3789 clients.append(client_name)
3790 all_clients.add(actual_client)
3791 return
3793 tried_iframe_fallback = False
3794 player_url = None
3795 skipped_clients = {}
3796 while clients:
3797 client, base_client, variant = _split_innertube_client(clients.pop())
3798 player_ytcfg = {}
3799 if client == 'web':
3800 player_ytcfg = self._get_default_ytcfg() if ignore_initial_response else master_ytcfg
3801 elif 'configs' not in self._configuration_arg('player_skip'):
3802 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3804 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3805 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3806 if 'js' in self._configuration_arg('player_skip'):
3807 require_js_player = False
3808 player_url = None
3810 if not player_url and not tried_iframe_fallback and require_js_player:
3811 player_url = self._download_player_url(video_id)
3812 tried_iframe_fallback = True
3814 pr = initial_pr if client == 'web' and not ignore_initial_response else None
3815 for retry in self.RetryManager(fatal=False):
3816 try:
3817 pr = pr or self._extract_player_response(
3818 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg,
3819 player_url if require_js_player else None, initial_pr, smuggled_data)
3820 except ExtractorError as e:
3821 self.report_warning(e)
3822 break
3823 experiments = traverse_obj(pr, (
3824 'responseContext', 'serviceTrackingParams', lambda _, v: v['service'] == 'GFEEDBACK',
3825 'params', lambda _, v: v['key'] == 'e', 'value', {lambda x: x.split(',')}, ...))
3826 if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
3827 pr = None
3828 retry.error = ExtractorError('API returned broken formats (poToken experiment detected)', expected=True)
3829 if not pr:
3830 continue
3832 if pr_id := self._invalid_player_response(pr, video_id):
3833 skipped_clients[client] = pr_id
3834 elif pr:
3835 # Save client name for introspection later
3836 name = short_client_name(client)
3837 sd = traverse_obj(pr, ('streamingData', {dict})) or {}
3838 sd[STREAMING_DATA_CLIENT_NAME] = name
3839 for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
3840 f[STREAMING_DATA_CLIENT_NAME] = name
3841 prs.append(pr)
3843 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3844 if variant == 'tv_embedded' and self._is_unplayable(pr) and self.is_authenticated:
3845 append_client(f'{base_client}_creator')
3846 elif variant != 'tv_embedded' and self._is_agegated(pr):
3847 if self.is_authenticated:
3848 append_client(f'{base_client}_creator')
3849 append_client(f'tv_embedded.{base_client}')
3851 if skipped_clients:
3852 self.report_warning(
3853 f'Skipping player responses from {"/".join(skipped_clients)} clients '
3854 f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
3855 if not prs:
3856 raise ExtractorError(
3857 'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
3858 elif not prs:
3859 raise ExtractorError('Failed to extract any player response')
3860 return prs, player_url
3862 def _needs_live_processing(self, live_status, duration):
3863 if (live_status == 'is_live' and self.get_param('live_from_start')
3864 or live_status == 'post_live' and (duration or 0) > 2 * 3600):
3865 return live_status
3867 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
3868 CHUNK_SIZE = 10 << 20
3869 PREFERRED_LANG_VALUE = 10
3870 original_language = None
3871 itags, stream_ids = collections.defaultdict(set), []
3872 itag_qualities, res_qualities = {}, {0: None}
3873 q = qualities([
3874 # Normally tiny is the smallest video-only formats. But
3875 # audio-only formats with unknown quality may get tagged as tiny
3876 'tiny',
3877 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3878 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres',
3880 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
3881 format_types = self._configuration_arg('formats')
3882 all_formats = 'duplicate' in format_types
3883 if self._configuration_arg('include_duplicate_formats'):
3884 all_formats = True
3885 self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
3886 'Use formats=duplicate extractor argument instead')
3888 def build_fragments(f):
3889 return LazyList({
3890 'url': update_url_query(f['url'], {
3891 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}',
3893 } for range_start in range(0, f['filesize'], CHUNK_SIZE))
3895 for fmt in streaming_formats:
3896 if fmt.get('targetDurationSec'):
3897 continue
3899 itag = str_or_none(fmt.get('itag'))
3900 audio_track = fmt.get('audioTrack') or {}
3901 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
3902 if not all_formats:
3903 if stream_id in stream_ids:
3904 continue
3906 quality = fmt.get('quality')
3907 height = int_or_none(fmt.get('height'))
3908 if quality == 'tiny' or not quality:
3909 quality = fmt.get('audioQuality', '').lower() or quality
3910 # The 3gp format (17) in android client has a quality of "small",
3911 # but is actually worse than other formats
3912 if itag == '17':
3913 quality = 'tiny'
3914 if quality:
3915 if itag:
3916 itag_qualities[itag] = quality
3917 if height:
3918 res_qualities[height] = quality
3920 is_default = audio_track.get('audioIsDefault')
3921 is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
3922 language_code = audio_track.get('id', '').split('.')[0]
3923 if language_code and is_default:
3924 original_language = language_code
3926 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3927 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3928 # number of fragment that would subsequently requested with (`&sq=N`)
3929 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3930 continue
3932 fmt_url = fmt.get('url')
3933 if not fmt_url:
3934 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3935 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3936 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3937 if not all((sc, fmt_url, player_url, encrypted_sig)):
3938 continue
3939 try:
3940 fmt_url += '&{}={}'.format(
3941 traverse_obj(sc, ('sp', -1)) or 'signature',
3942 self._decrypt_signature(encrypted_sig, video_id, player_url),
3944 except ExtractorError as e:
3945 self.report_warning('Signature extraction failed: Some formats may be missing',
3946 video_id=video_id, only_once=True)
3947 self.write_debug(e, only_once=True)
3948 continue
3950 query = parse_qs(fmt_url)
3951 if query.get('n'):
3952 try:
3953 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3954 fmt_url = update_url_query(fmt_url, {
3955 'n': decrypt_nsig(query['n'][0], video_id, player_url),
3957 except ExtractorError as e:
3958 phantomjs_hint = ''
3959 if isinstance(e, JSInterpreter.Exception):
3960 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3961 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3962 if player_url:
3963 self.report_warning(
3964 f'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}'
3965 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3966 self.write_debug(e, only_once=True)
3967 else:
3968 self.report_warning(
3969 'Cannot decrypt nsig without player_url: Some formats may be missing',
3970 video_id=video_id, only_once=True)
3971 continue
3973 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3974 format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
3975 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3976 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3977 # Make sure to avoid false positives with small duration differences.
3978 # E.g. __2ABJjxzNo, ySuUZEjARPY
3979 is_damaged = try_call(lambda: format_duration < duration // 2)
3980 if is_damaged:
3981 self.report_warning(
3982 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3984 client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
3985 # _BROKEN_CLIENTS return videoplayback URLs that expire after 30 seconds
3986 # Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
3987 is_broken = client_name in self._BROKEN_CLIENTS
3988 if is_broken:
3989 self.report_warning(
3990 f'{video_id}: {self._BROKEN_CLIENTS[client_name]} client formats are broken '
3991 'and may yield HTTP Error 403. They will be deprioritized', only_once=True)
3993 name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
3994 fps = int_or_none(fmt.get('fps')) or 0
3995 dct = {
3996 'asr': int_or_none(fmt.get('audioSampleRate')),
3997 'filesize': int_or_none(fmt.get('contentLength')),
3998 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
3999 'format_note': join_nonempty(
4000 join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''),
4001 name, fmt.get('isDrc') and 'DRC',
4002 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
4003 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
4004 is_damaged and 'DAMAGED', is_broken and 'BROKEN',
4005 (self.get_param('verbose') or all_formats) and client_name,
4006 delim=', '),
4007 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
4008 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
4009 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
4010 'audio_channels': fmt.get('audioChannels'),
4011 'height': height,
4012 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
4013 'has_drm': bool(fmt.get('drmFamilies')),
4014 'tbr': tbr,
4015 'filesize_approx': filesize_from_tbr(tbr, format_duration),
4016 'url': fmt_url,
4017 'width': int_or_none(fmt.get('width')),
4018 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
4019 'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1,
4020 # Strictly de-prioritize broken, damaged and 3gp formats
4021 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
4023 mime_mobj = re.match(
4024 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
4025 if mime_mobj:
4026 dct['ext'] = mimetype2ext(mime_mobj.group(1))
4027 dct.update(parse_codecs(mime_mobj.group(2)))
4028 if itag:
4029 itags[itag].add(('https', dct.get('language')))
4030 stream_ids.append(stream_id)
4031 single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
4032 if single_stream and dct.get('ext'):
4033 dct['container'] = dct['ext'] + '_dash'
4035 if (all_formats or 'dashy' in format_types) and dct['filesize']:
4036 yield {
4037 **dct,
4038 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
4039 'protocol': 'http_dash_segments',
4040 'fragments': build_fragments(dct),
4042 if all_formats or 'dashy' not in format_types:
4043 dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
4044 yield dct
4046 needs_live_processing = self._needs_live_processing(live_status, duration)
4047 skip_bad_formats = 'incomplete' not in format_types
4048 if self._configuration_arg('include_incomplete_formats'):
4049 skip_bad_formats = False
4050 self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
4051 'Use formats=incomplete extractor argument instead')
4053 skip_manifests = set(self._configuration_arg('skip'))
4054 if (not self.get_param('youtube_include_hls_manifest', True)
4055 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
4056 or needs_live_processing and skip_bad_formats):
4057 skip_manifests.add('hls')
4059 if not self.get_param('youtube_include_dash_manifest', True):
4060 skip_manifests.add('dash')
4061 if self._configuration_arg('include_live_dash'):
4062 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
4063 'Use formats=incomplete extractor argument instead')
4064 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
4065 skip_manifests.add('dash')
4067 def process_manifest_format(f, proto, client_name, itag):
4068 key = (proto, f.get('language'))
4069 if not all_formats and key in itags[itag]:
4070 return False
4071 itags[itag].add(key)
4073 if itag and all_formats:
4074 f['format_id'] = f'{itag}-{proto}'
4075 elif any(p != proto for p, _ in itags[itag]):
4076 f['format_id'] = f'{itag}-{proto}'
4077 elif itag:
4078 f['format_id'] = itag
4080 if original_language and f.get('language') == original_language:
4081 f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
4082 f['language_preference'] = PREFERRED_LANG_VALUE
4084 if f.get('source_preference') is None:
4085 f['source_preference'] = -1
4087 if itag in ('616', '235'):
4088 f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
4089 f['source_preference'] += 100
4091 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
4092 if f['quality'] == -1 and f.get('height'):
4093 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
4094 if self.get_param('verbose') or all_formats:
4095 f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
4096 if f.get('fps') and f['fps'] <= 1:
4097 del f['fps']
4099 if proto == 'hls' and f.get('has_drm'):
4100 f['has_drm'] = 'maybe'
4101 f['source_preference'] -= 5
4102 return True
4104 subtitles = {}
4105 for sd in streaming_data:
4106 client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
4108 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
4109 if hls_manifest_url:
4110 fmts, subs = self._extract_m3u8_formats_and_subtitles(
4111 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
4112 subtitles = self._merge_subtitles(subs, subtitles)
4113 for f in fmts:
4114 if process_manifest_format(f, 'hls', client_name, self._search_regex(
4115 r'/itag/(\d+)', f['url'], 'itag', default=None)):
4116 yield f
4118 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
4119 if dash_manifest_url:
4120 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
4121 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
4122 for f in formats:
4123 if process_manifest_format(f, 'dash', client_name, f['format_id']):
4124 f['filesize'] = int_or_none(self._search_regex(
4125 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
4126 if needs_live_processing:
4127 f['is_from_start'] = True
4129 yield f
4130 yield subtitles
4132 def _extract_storyboard(self, player_responses, duration):
4133 spec = get_first(
4134 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
4135 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
4136 if not base_url:
4137 return
4138 L = len(spec) - 1
4139 for i, args in enumerate(spec):
4140 args = args.split('#')
4141 counts = list(map(int_or_none, args[:5]))
4142 if len(args) != 8 or not all(counts):
4143 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
4144 continue
4145 width, height, frame_count, cols, rows = counts
4146 N, sigh = args[6:]
4148 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
4149 fragment_count = frame_count / (cols * rows)
4150 fragment_duration = duration / fragment_count
4151 yield {
4152 'format_id': f'sb{i}',
4153 'format_note': 'storyboard',
4154 'ext': 'mhtml',
4155 'protocol': 'mhtml',
4156 'acodec': 'none',
4157 'vcodec': 'none',
4158 'url': url,
4159 'width': width,
4160 'height': height,
4161 'fps': frame_count / duration,
4162 'rows': rows,
4163 'columns': cols,
4164 'fragments': [{
4165 'url': url.replace('$M', str(j)),
4166 'duration': min(fragment_duration, duration - (j * fragment_duration)),
4167 } for j in range(math.ceil(fragment_count))],
4170 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
4171 webpage = None
4172 if 'webpage' not in self._configuration_arg('player_skip'):
4173 query = {'bpctr': '9999999999', 'has_verified': '1'}
4174 pp = self._configuration_arg('player_params', [None], casesense=True)[0]
4175 if pp:
4176 query['pp'] = pp
4177 webpage = self._download_webpage(
4178 webpage_url, video_id, fatal=False, query=query)
4180 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
4182 player_responses, player_url = self._extract_player_responses(
4183 self._get_requested_clients(url, smuggled_data),
4184 video_id, webpage, master_ytcfg, smuggled_data)
4186 return webpage, master_ytcfg, player_responses, player_url
4188 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
4189 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
4190 is_live = get_first(video_details, 'isLive')
4191 if is_live is None:
4192 is_live = get_first(live_broadcast_details, 'isLiveNow')
4193 live_content = get_first(video_details, 'isLiveContent')
4194 is_upcoming = get_first(video_details, 'isUpcoming')
4195 post_live = get_first(video_details, 'isPostLiveDvr')
4196 live_status = ('post_live' if post_live
4197 else 'is_live' if is_live
4198 else 'is_upcoming' if is_upcoming
4199 else 'was_live' if live_content
4200 else 'not_live' if False in (is_live, live_content)
4201 else None)
4202 streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
4203 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
4204 if all(f.get('has_drm') for f in formats):
4205 # If there are no formats that definitely don't have DRM, all have DRM
4206 for f in formats:
4207 f['has_drm'] = True
4209 return live_broadcast_details, live_status, streaming_data, formats, subtitles
4211 def _real_extract(self, url):
4212 url, smuggled_data = unsmuggle_url(url, {})
4213 video_id = self._match_id(url)
4215 base_url = self.http_scheme() + '//www.youtube.com/'
4216 webpage_url = base_url + 'watch?v=' + video_id
4218 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
4220 playability_statuses = traverse_obj(
4221 player_responses, (..., 'playabilityStatus'), expected_type=dict)
4223 trailer_video_id = get_first(
4224 playability_statuses,
4225 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4226 expected_type=str)
4227 if trailer_video_id:
4228 return self.url_result(
4229 trailer_video_id, self.ie_key(), trailer_video_id)
4231 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
4232 if webpage else (lambda x: None))
4234 video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)
4235 microformats = traverse_obj(
4236 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
4237 expected_type=dict)
4239 translated_title = self._get_text(microformats, (..., 'title'))
4240 video_title = (self._preferred_lang and translated_title
4241 or get_first(video_details, 'title') # primary
4242 or translated_title
4243 or search_meta(['og:title', 'twitter:title', 'title']))
4244 translated_description = self._get_text(microformats, (..., 'description'))
4245 original_description = get_first(video_details, 'shortDescription')
4246 video_description = (
4247 self._preferred_lang and translated_description
4248 # If original description is blank, it will be an empty string.
4249 # Do not prefer translated description in this case.
4250 or original_description if original_description is not None else translated_description)
4252 multifeed_metadata_list = get_first(
4253 player_responses,
4254 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
4255 expected_type=str)
4256 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
4257 if self.get_param('noplaylist'):
4258 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4259 else:
4260 entries = []
4261 feed_ids = []
4262 for feed in multifeed_metadata_list.split(','):
4263 # Unquote should take place before split on comma (,) since textual
4264 # fields may contain comma as well (see
4265 # https://github.com/ytdl-org/youtube-dl/issues/8536)
4266 feed_data = urllib.parse.parse_qs(
4267 urllib.parse.unquote_plus(feed))
4269 def feed_entry(name):
4270 return try_get(
4271 feed_data, lambda x: x[name][0], str)
4273 feed_id = feed_entry('id')
4274 if not feed_id:
4275 continue
4276 feed_title = feed_entry('title')
4277 title = video_title
4278 if feed_title:
4279 title += f' ({feed_title})'
4280 entries.append({
4281 '_type': 'url_transparent',
4282 'ie_key': 'Youtube',
4283 'url': smuggle_url(
4284 '{}watch?v={}'.format(base_url, feed_data['id'][0]),
4285 {'force_singlefeed': True}),
4286 'title': title,
4288 feed_ids.append(feed_id)
4289 self.to_screen(
4290 'Downloading multifeed video ({}) - add --no-playlist to just download video {}'.format(
4291 ', '.join(feed_ids), video_id))
4292 return self.playlist_result(
4293 entries, video_id, video_title, video_description)
4295 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
4296 or int_or_none(get_first(microformats, 'lengthSeconds'))
4297 or parse_duration(search_meta('duration')) or None)
4299 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
4300 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
4301 if live_status == 'post_live':
4302 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
4304 if not formats:
4305 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
4306 self.report_drm(video_id)
4307 pemr = get_first(
4308 playability_statuses,
4309 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
4310 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
4311 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
4312 if subreason:
4313 if subreason == 'The uploader has not made this video available in your country.':
4314 countries = get_first(microformats, 'availableCountries')
4315 if not countries:
4316 regions_allowed = search_meta('regionsAllowed')
4317 countries = regions_allowed.split(',') if regions_allowed else None
4318 self.raise_geo_restricted(subreason, countries, metadata_available=True)
4319 reason += f'. {subreason}'
4320 if reason:
4321 self.raise_no_formats(reason, expected=True)
4323 keywords = get_first(video_details, 'keywords', expected_type=list) or []
4324 if not keywords and webpage:
4325 keywords = [
4326 unescapeHTML(m.group('content'))
4327 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
4328 for keyword in keywords:
4329 if keyword.startswith('yt:stretch='):
4330 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
4331 if mobj:
4332 # NB: float is intentional for forcing float division
4333 w, h = (float(v) for v in mobj.groups())
4334 if w > 0 and h > 0:
4335 ratio = w / h
4336 for f in formats:
4337 if f.get('vcodec') != 'none':
4338 f['stretched_ratio'] = ratio
4339 break
4340 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
4341 thumbnail_url = search_meta(['og:image', 'twitter:image'])
4342 if thumbnail_url:
4343 thumbnails.append({
4344 'url': thumbnail_url,
4346 original_thumbnails = thumbnails.copy()
4348 # The best resolution thumbnails sometimes does not appear in the webpage
4349 # See: https://github.com/yt-dlp/yt-dlp/issues/340
4350 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
4351 thumbnail_names = [
4352 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
4353 # in resolution, these are not the custom thumbnail. So de-prioritize them
4354 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
4355 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3',
4357 n_thumbnail_names = len(thumbnail_names)
4358 thumbnails.extend({
4359 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4360 video_id=video_id, name=name, ext=ext,
4361 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
4362 } for name in thumbnail_names for ext in ('webp', 'jpg'))
4363 for thumb in thumbnails:
4364 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
4365 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
4366 self._remove_duplicate_formats(thumbnails)
4367 self._downloader._sort_thumbnails(original_thumbnails)
4369 category = get_first(microformats, 'category') or search_meta('genre')
4370 channel_id = self.ucid_or_none(str_or_none(
4371 get_first(video_details, 'channelId')
4372 or get_first(microformats, 'externalChannelId')
4373 or search_meta('channelId')))
4374 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4376 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4377 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4378 if not duration and live_end_time and live_start_time:
4379 duration = live_end_time - live_start_time
4381 needs_live_processing = self._needs_live_processing(live_status, duration)
4383 def is_bad_format(fmt):
4384 if needs_live_processing and not fmt.get('is_from_start'):
4385 return True
4386 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4387 and fmt.get('protocol') == 'http_dash_segments'):
4388 return True
4390 for fmt in filter(is_bad_format, formats):
4391 fmt['preference'] = (fmt.get('preference') or -1) - 10
4392 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
4394 if needs_live_processing:
4395 self._prepare_live_from_start_formats(
4396 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
4398 formats.extend(self._extract_storyboard(player_responses, duration))
4400 channel_handle = self.handle_from_url(owner_profile_url)
4402 info = {
4403 'id': video_id,
4404 'title': video_title,
4405 'formats': formats,
4406 'thumbnails': thumbnails,
4407 # The best thumbnail that we are sure exists. Prevents unnecessary
4408 # URL checking if user don't care about getting the best possible thumbnail
4409 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
4410 'description': video_description,
4411 'channel_id': channel_id,
4412 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),
4413 'duration': duration,
4414 'view_count': int_or_none(
4415 get_first((video_details, microformats), (..., 'viewCount'))
4416 or search_meta('interactionCount')),
4417 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
4418 'age_limit': 18 if (
4419 get_first(microformats, 'isFamilySafe') is False
4420 or search_meta('isFamilyFriendly') == 'false'
4421 or search_meta('og:restrictions:age') == '18+') else 0,
4422 'webpage_url': webpage_url,
4423 'categories': [category] if category else None,
4424 'tags': keywords,
4425 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4426 'live_status': live_status,
4427 'release_timestamp': live_start_time,
4428 '_format_sort_fields': ( # source_preference is lower for potentially damaged formats
4429 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'),
4432 subtitles = {}
4433 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
4434 if pctr:
4435 def get_lang_code(track):
4436 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4437 or track.get('languageCode'))
4439 # Converted into dicts to remove duplicates
4440 captions = {
4441 get_lang_code(sub): sub
4442 for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
4443 translation_languages = {
4444 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
4445 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
4447 def process_language(container, base_url, lang_code, sub_name, query):
4448 lang_subs = container.setdefault(lang_code, [])
4449 for fmt in self._SUBTITLE_FORMATS:
4450 query.update({
4451 'fmt': fmt,
4453 lang_subs.append({
4454 'ext': fmt,
4455 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
4456 'name': sub_name,
4459 # NB: Constructing the full subtitle dictionary is slow
4460 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4461 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
4462 for lang_code, caption_track in captions.items():
4463 base_url = caption_track.get('baseUrl')
4464 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
4465 if not base_url:
4466 continue
4467 lang_name = self._get_text(caption_track, 'name', max_runs=1)
4468 if caption_track.get('kind') != 'asr':
4469 if not lang_code:
4470 continue
4471 process_language(
4472 subtitles, base_url, lang_code, lang_name, {})
4473 if not caption_track.get('isTranslatable'):
4474 continue
4475 for trans_code, trans_name in translation_languages.items():
4476 if not trans_code:
4477 continue
4478 orig_trans_code = trans_code
4479 if caption_track.get('kind') != 'asr' and trans_code != 'und':
4480 if not get_translated_subs:
4481 continue
4482 trans_code += f'-{lang_code}'
4483 trans_name += format_field(lang_name, None, ' from %s')
4484 if lang_code == f'a-{orig_trans_code}':
4485 # Set audio language based on original subtitles
4486 for f in formats:
4487 if f.get('acodec') != 'none' and not f.get('language'):
4488 f['language'] = orig_trans_code
4489 # Add an "-orig" label to the original language so that it can be distinguished.
4490 # The subs are returned without "-orig" as well for compatibility
4491 process_language(
4492 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4493 # Setting tlang=lang returns damaged subtitles.
4494 process_language(automatic_captions, base_url, trans_code, trans_name,
4495 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
4497 info['automatic_captions'] = automatic_captions
4498 info['subtitles'] = subtitles
4500 parsed_url = urllib.parse.urlparse(url)
4501 for component in [parsed_url.fragment, parsed_url.query]:
4502 query = urllib.parse.parse_qs(component)
4503 for k, v in query.items():
4504 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4505 d_k += '_time'
4506 if d_k not in info and k in s_ks:
4507 info[d_k] = parse_duration(v[0])
4509 # Youtube Music Auto-generated description
4510 if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
4511 # XXX: Causes catastrophic backtracking if description has "·"
4512 # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
4513 # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
4514 # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
4515 mobj = re.search(
4516 r'''(?xs)
4517 (?=(?P<track>[^\n·]+))(?P=track)·
4518 (?=(?P<artist>[^\n]+))(?P=artist)\n+
4519 (?=(?P<album>[^\n]+))(?P=album)\n
4520 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4521 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4522 (.+?\nArtist\s*:\s*
4523 (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
4524 )?.+\nAuto-generated\ by\ YouTube\.\s*$
4525 ''', video_description)
4526 if mobj:
4527 release_year = mobj.group('release_year')
4528 release_date = mobj.group('release_date')
4529 if release_date:
4530 release_date = release_date.replace('-', '')
4531 if not release_year:
4532 release_year = release_date[:4]
4533 info.update({
4534 'album': mobj.group('album'.strip()),
4535 'artists': ([a] if (a := mobj.group('clean_artist'))
4536 else [a.strip() for a in mobj.group('artist').split('·')]),
4537 'track': mobj.group('track').strip(),
4538 'release_date': release_date,
4539 'release_year': int_or_none(release_year),
4542 initial_data = None
4543 if webpage:
4544 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
4545 if not traverse_obj(initial_data, 'contents'):
4546 self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
4547 initial_data = None
4548 if not initial_data:
4549 query = {'videoId': video_id}
4550 query.update(self._get_checkok_params())
4551 initial_data = self._extract_response(
4552 item_id=video_id, ep='next', fatal=False,
4553 ytcfg=master_ytcfg, query=query, check_get_keys='contents',
4554 headers=self.generate_api_headers(ytcfg=master_ytcfg),
4555 note='Downloading initial data API JSON')
4557 info['comment_count'] = traverse_obj(initial_data, (
4558 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4559 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount',
4560 ), (
4561 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4562 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo',
4563 ), expected_type=self._get_count, get_all=False)
4565 try: # This will error if there is no livechat
4566 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
4567 except (KeyError, IndexError, TypeError):
4568 pass
4569 else:
4570 info.setdefault('subtitles', {})['live_chat'] = [{
4571 # url is needed to set cookies
4572 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
4573 'video_id': video_id,
4574 'ext': 'json',
4575 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4576 else 'youtube_live_chat_replay'),
4579 if initial_data:
4580 info['chapters'] = (
4581 self._extract_chapters_from_json(initial_data, duration)
4582 or self._extract_chapters_from_engagement_panel(initial_data, duration)
4583 or self._extract_chapters_from_description(video_description, duration)
4584 or None)
4586 info['heatmap'] = self._extract_heatmap(initial_data)
4588 contents = traverse_obj(
4589 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4590 expected_type=list, default=[])
4592 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4593 if vpir:
4594 stl = vpir.get('superTitleLink')
4595 if stl:
4596 stl = self._get_text(stl)
4597 if try_get(
4598 vpir,
4599 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4600 info['location'] = stl
4601 else:
4602 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
4603 if mobj:
4604 info.update({
4605 'series': mobj.group(1),
4606 'season_number': int(mobj.group(2)),
4607 'episode_number': int(mobj.group(3)),
4609 for tlb in (try_get(
4610 vpir,
4611 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4612 list) or []):
4613 tbrs = variadic(
4614 traverse_obj(
4615 tlb, ('toggleButtonRenderer', ...),
4616 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
4617 for tbr in tbrs:
4618 for getter, regex in [(
4619 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4620 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4621 lambda x: x['accessibility'],
4622 lambda x: x['accessibilityData']['accessibilityData'],
4623 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4624 label = (try_get(tbr, getter, dict) or {}).get('label')
4625 if label:
4626 mobj = re.match(regex, label)
4627 if mobj:
4628 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4629 break
4631 info['like_count'] = traverse_obj(vpir, (
4632 'videoActions', 'menuRenderer', 'topLevelButtons', ...,
4633 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
4634 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
4635 'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)
4637 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4638 if vcr:
4639 vc = self._get_count(vcr, 'viewCount')
4640 # Upcoming premieres with waiting count are treated as live here
4641 if vcr.get('isLive'):
4642 info['concurrent_view_count'] = vc
4643 elif info.get('view_count') is None:
4644 info['view_count'] = vc
4646 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4647 if vsir:
4648 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4649 info.update({
4650 'channel': self._get_text(vor, 'title'),
4651 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4653 if not channel_handle:
4654 channel_handle = self.handle_from_url(
4655 traverse_obj(vor, (
4656 ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
4657 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
4658 {str}), get_all=False))
4660 rows = try_get(
4661 vsir,
4662 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4663 list) or []
4664 multiple_songs = False
4665 for row in rows:
4666 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4667 multiple_songs = True
4668 break
4669 for row in rows:
4670 mrr = row.get('metadataRowRenderer') or {}
4671 mrr_title = mrr.get('title')
4672 if not mrr_title:
4673 continue
4674 mrr_title = self._get_text(mrr, 'title')
4675 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4676 if mrr_title == 'License':
4677 info['license'] = mrr_contents_text
4678 elif not multiple_songs:
4679 if mrr_title == 'Album':
4680 info['album'] = mrr_contents_text
4681 elif mrr_title == 'Artist':
4682 info['artists'] = [mrr_contents_text] if mrr_contents_text else None
4683 elif mrr_title == 'Song':
4684 info['track'] = mrr_contents_text
4685 owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
4686 if self._has_badge(owner_badges, BadgeType.VERIFIED):
4687 info['channel_is_verified'] = True
4689 info.update({
4690 'uploader': info.get('channel'),
4691 'uploader_id': channel_handle,
4692 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4695 # We only want timestamp IF it has time precision AND a timezone
4696 # Currently the uploadDate in microformats appears to be in US/Pacific timezone.
4697 timestamp = (
4698 parse_iso8601(get_first(microformats, 'uploadDate'), timezone=NO_DEFAULT)
4699 or parse_iso8601(search_meta('uploadDate'), timezone=NO_DEFAULT)
4701 upload_date = (
4702 dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else
4704 unified_strdate(get_first(microformats, 'uploadDate'))
4705 or unified_strdate(search_meta('uploadDate'))
4708 # In the case we cannot get the timestamp:
4709 # The upload date for scheduled, live and past live streams / premieres in microformats
4710 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
4711 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
4712 if not upload_date or (not timestamp and live_status in ('not_live', None)):
4713 # this should be in UTC, as configured in the cookie/client context
4714 upload_date = strftime_or_none(
4715 self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
4717 info['upload_date'] = upload_date
4718 info['timestamp'] = timestamp
4720 if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
4721 # Newly uploaded videos' HLS formats are potentially problematic and need to be checked
4722 upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc)
4723 if upload_datetime >= datetime_from_str('today-2days'):
4724 for fmt in info['formats']:
4725 if fmt.get('protocol') == 'm3u8_native':
4726 fmt['__needs_testing'] = True
4728 for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
4729 v = info.get(s_k)
4730 if v:
4731 info[d_k] = v
4733 badges = self._extract_badges(traverse_obj(vpir, 'badges'))
4735 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4736 or get_first(video_details, 'isPrivate', expected_type=bool))
4738 info['availability'] = (
4739 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4740 else self._availability(
4741 is_private=is_private,
4742 needs_premium=(
4743 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4744 or False if initial_data and is_private is not None else None),
4745 needs_subscription=(
4746 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4747 or False if initial_data and is_private is not None else None),
4748 needs_auth=info['age_limit'] >= 18,
4749 is_unlisted=None if is_private is None else (
4750 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4751 or get_first(microformats, 'isUnlisted', expected_type=bool))))
4753 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4755 self.mark_watched(video_id, player_responses)
4757 return info
4760 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
4761 @staticmethod
4762 def passthrough_smuggled_data(func):
4763 def _smuggle(info, smuggled_data):
4764 if info.get('_type') not in ('url', 'url_transparent'):
4765 return info
4766 if smuggled_data.get('is_music_url'):
4767 parsed_url = urllib.parse.urlparse(info['url'])
4768 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4769 smuggled_data.pop('is_music_url')
4770 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4771 if smuggled_data:
4772 info['url'] = smuggle_url(info['url'], smuggled_data)
4773 return info
4775 @functools.wraps(func)
4776 def wrapper(self, url):
4777 url, smuggled_data = unsmuggle_url(url, {})
4778 if self.is_music_url(url):
4779 smuggled_data['is_music_url'] = True
4780 info_dict = func(self, url, smuggled_data)
4781 if smuggled_data:
4782 _smuggle(info_dict, smuggled_data)
4783 if info_dict.get('entries'):
4784 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
4785 return info_dict
4786 return wrapper
4788 @staticmethod
4789 def _extract_basic_item_renderer(item):
4790 # Modified from _extract_grid_item_renderer
4791 known_basic_renderers = (
4792 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer',
4794 for key, renderer in item.items():
4795 if not isinstance(renderer, dict):
4796 continue
4797 elif key in known_basic_renderers:
4798 return renderer
4799 elif key.startswith('grid') and key.endswith('Renderer'):
4800 return renderer
4802 def _extract_channel_renderer(self, renderer):
4803 channel_id = self.ucid_or_none(renderer['channelId'])
4804 title = self._get_text(renderer, 'title')
4805 channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
4806 channel_handle = self.handle_from_url(
4807 traverse_obj(renderer, (
4808 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
4809 ('browseEndpoint', 'canonicalBaseUrl')),
4810 {str}), get_all=False))
4811 if not channel_handle:
4812 # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
4813 channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
4814 return {
4815 '_type': 'url',
4816 'url': channel_url,
4817 'id': channel_id,
4818 'ie_key': YoutubeTabIE.ie_key(),
4819 'channel': title,
4820 'uploader': title,
4821 'channel_id': channel_id,
4822 'channel_url': channel_url,
4823 'title': title,
4824 'uploader_id': channel_handle,
4825 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4826 # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
4827 # However, in feed/channels this is set correctly to the subscriber count
4828 'channel_follower_count': traverse_obj(
4829 renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
4830 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
4831 'playlist_count': (
4832 # videoCountText may be the subscriber count
4833 self._get_count(renderer, 'videoCountText')
4834 if self._get_count(renderer, 'subscriberCountText') is not None else None),
4835 'description': self._get_text(renderer, 'descriptionSnippet'),
4836 'channel_is_verified': True if self._has_badge(
4837 self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
4840 def _grid_entries(self, grid_renderer):
4841 for item in grid_renderer['items']:
4842 if not isinstance(item, dict):
4843 continue
4844 renderer = self._extract_basic_item_renderer(item)
4845 if not isinstance(renderer, dict):
4846 continue
4847 title = self._get_text(renderer, 'title')
4849 # playlist
4850 playlist_id = renderer.get('playlistId')
4851 if playlist_id:
4852 yield self.url_result(
4853 f'https://www.youtube.com/playlist?list={playlist_id}',
4854 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4855 video_title=title)
4856 continue
4857 # video
4858 video_id = renderer.get('videoId')
4859 if video_id:
4860 yield self._extract_video(renderer)
4861 continue
4862 # channel
4863 channel_id = renderer.get('channelId')
4864 if channel_id:
4865 yield self._extract_channel_renderer(renderer)
4866 continue
4867 # generic endpoint URL support
4868 ep_url = urljoin('https://www.youtube.com/', try_get(
4869 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4870 str))
4871 if ep_url:
4872 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4873 if ie.suitable(ep_url):
4874 yield self.url_result(
4875 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4876 break
4878 def _music_reponsive_list_entry(self, renderer):
4879 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4880 if video_id:
4881 title = traverse_obj(renderer, (
4882 'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
4883 'text', 'runs', 0, 'text'))
4884 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4885 ie=YoutubeIE.ie_key(), video_id=video_id, title=title)
4886 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4887 if playlist_id:
4888 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4889 if video_id:
4890 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4891 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4892 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4893 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4894 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4895 if browse_id:
4896 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4897 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4899 def _shelf_entries_from_content(self, shelf_renderer):
4900 content = shelf_renderer.get('content')
4901 if not isinstance(content, dict):
4902 return
4903 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4904 if renderer:
4905 # TODO: add support for nested playlists so each shelf is processed
4906 # as separate playlist
4907 # TODO: this includes only first N items
4908 yield from self._grid_entries(renderer)
4909 renderer = content.get('horizontalListRenderer')
4910 if renderer:
4911 # TODO: handle case
4912 pass
4914 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4915 ep = try_get(
4916 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4917 str)
4918 shelf_url = urljoin('https://www.youtube.com', ep)
4919 if shelf_url:
4920 # Skipping links to another channels, note that checking for
4921 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4922 # will not work
4923 if skip_channels and '/channels?' in shelf_url:
4924 return
4925 title = self._get_text(shelf_renderer, 'title')
4926 yield self.url_result(shelf_url, video_title=title)
4927 # Shelf may not contain shelf URL, fallback to extraction from content
4928 yield from self._shelf_entries_from_content(shelf_renderer)
4930 def _playlist_entries(self, video_list_renderer):
4931 for content in video_list_renderer['contents']:
4932 if not isinstance(content, dict):
4933 continue
4934 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4935 if not isinstance(renderer, dict):
4936 continue
4937 video_id = renderer.get('videoId')
4938 if not video_id:
4939 continue
4940 yield self._extract_video(renderer)
4942 def _rich_entries(self, rich_grid_renderer):
4943 renderer = traverse_obj(
4944 rich_grid_renderer,
4945 ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
4946 video_id = renderer.get('videoId')
4947 if video_id:
4948 yield self._extract_video(renderer)
4949 return
4950 playlist_id = renderer.get('playlistId')
4951 if playlist_id:
4952 yield self.url_result(
4953 f'https://www.youtube.com/playlist?list={playlist_id}',
4954 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4955 video_title=self._get_text(renderer, 'title'))
4956 return
4958 def _video_entry(self, video_renderer):
4959 video_id = video_renderer.get('videoId')
4960 if video_id:
4961 return self._extract_video(video_renderer)
4963 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4964 url = urljoin('https://youtube.com', traverse_obj(
4965 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4966 if url:
4967 return self.url_result(
4968 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4970 def _post_thread_entries(self, post_thread_renderer):
4971 post_renderer = try_get(
4972 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4973 if not post_renderer:
4974 return
4975 # video attachment
4976 video_renderer = try_get(
4977 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4978 video_id = video_renderer.get('videoId')
4979 if video_id:
4980 entry = self._extract_video(video_renderer)
4981 if entry:
4982 yield entry
4983 # playlist attachment
4984 playlist_id = try_get(
4985 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4986 if playlist_id:
4987 yield self.url_result(
4988 f'https://www.youtube.com/playlist?list={playlist_id}',
4989 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4990 # inline video links
4991 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4992 for run in runs:
4993 if not isinstance(run, dict):
4994 continue
4995 ep_url = try_get(
4996 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4997 if not ep_url:
4998 continue
4999 if not YoutubeIE.suitable(ep_url):
5000 continue
5001 ep_video_id = YoutubeIE._match_id(ep_url)
5002 if video_id == ep_video_id:
5003 continue
5004 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
5006 def _post_thread_continuation_entries(self, post_thread_continuation):
5007 contents = post_thread_continuation.get('contents')
5008 if not isinstance(contents, list):
5009 return
5010 for content in contents:
5011 renderer = content.get('backstagePostThreadRenderer')
5012 if isinstance(renderer, dict):
5013 yield from self._post_thread_entries(renderer)
5014 continue
5015 renderer = content.get('videoRenderer')
5016 if isinstance(renderer, dict):
5017 yield self._video_entry(renderer)
5019 r''' # unused
5020 def _rich_grid_entries(self, contents):
5021 for content in contents:
5022 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
5023 if video_renderer:
5024 entry = self._video_entry(video_renderer)
5025 if entry:
5026 yield entry
5029 def _report_history_entries(self, renderer):
5030 for url in traverse_obj(renderer, (
5031 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
5032 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
5033 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
5034 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
5036 def _extract_entries(self, parent_renderer, continuation_list):
5037 # continuation_list is modified in-place with continuation_list = [continuation_token]
5038 continuation_list[:] = [None]
5039 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
5040 for content in contents:
5041 if not isinstance(content, dict):
5042 continue
5043 is_renderer = traverse_obj(
5044 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
5045 expected_type=dict)
5046 if not is_renderer:
5047 if content.get('richItemRenderer'):
5048 for entry in self._rich_entries(content['richItemRenderer']):
5049 yield entry
5050 continuation_list[0] = self._extract_continuation(parent_renderer)
5051 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
5052 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
5053 yield from self._report_history_entries(table)
5054 continuation_list[0] = self._extract_continuation(table)
5055 continue
5057 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
5058 for isr_content in isr_contents:
5059 if not isinstance(isr_content, dict):
5060 continue
5062 known_renderers = {
5063 'playlistVideoListRenderer': self._playlist_entries,
5064 'gridRenderer': self._grid_entries,
5065 'reelShelfRenderer': self._grid_entries,
5066 'shelfRenderer': self._shelf_entries,
5067 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
5068 'backstagePostThreadRenderer': self._post_thread_entries,
5069 'videoRenderer': lambda x: [self._video_entry(x)],
5070 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
5071 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
5072 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
5073 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
5075 for key, renderer in isr_content.items():
5076 if key not in known_renderers:
5077 continue
5078 for entry in known_renderers[key](renderer):
5079 if entry:
5080 yield entry
5081 continuation_list[0] = self._extract_continuation(renderer)
5082 break
5084 if not continuation_list[0]:
5085 continuation_list[0] = self._extract_continuation(is_renderer)
5087 if not continuation_list[0]:
5088 continuation_list[0] = self._extract_continuation(parent_renderer)
5090 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
5091 continuation_list = [None]
5092 extract_entries = lambda x: self._extract_entries(x, continuation_list)
5093 tab_content = try_get(tab, lambda x: x['content'], dict)
5094 if not tab_content:
5095 return
5096 parent_renderer = (
5097 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
5098 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
5099 yield from extract_entries(parent_renderer)
5100 continuation = continuation_list[0]
5101 seen_continuations = set()
5102 for page_num in itertools.count(1):
5103 if not continuation:
5104 break
5105 continuation_token = continuation.get('continuation')
5106 if continuation_token is not None and continuation_token in seen_continuations:
5107 self.write_debug('Detected YouTube feed looping - assuming end of feed.')
5108 break
5109 seen_continuations.add(continuation_token)
5110 headers = self.generate_api_headers(
5111 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
5112 response = self._extract_response(
5113 item_id=f'{item_id} page {page_num}',
5114 query=continuation, headers=headers, ytcfg=ytcfg,
5115 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
5117 if not response:
5118 break
5119 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
5120 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
5121 visitor_data = self._extract_visitor_data(response) or visitor_data
5123 known_renderers = {
5124 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
5125 'gridPlaylistRenderer': (self._grid_entries, 'items'),
5126 'gridVideoRenderer': (self._grid_entries, 'items'),
5127 'gridChannelRenderer': (self._grid_entries, 'items'),
5128 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
5129 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
5130 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
5131 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
5132 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
5133 'playlistVideoListContinuation': (self._playlist_entries, None),
5134 'gridContinuation': (self._grid_entries, None),
5135 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
5136 'sectionListContinuation': (extract_entries, None), # for feeds
5139 continuation_items = traverse_obj(response, (
5140 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
5141 'appendContinuationItemsAction', 'continuationItems',
5142 ), 'continuationContents', get_all=False)
5143 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
5145 video_items_renderer = None
5146 for key in continuation_item:
5147 if key not in known_renderers:
5148 continue
5149 func, parent_key = known_renderers[key]
5150 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
5151 continuation_list = [None]
5152 yield from func(video_items_renderer)
5153 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
5155 if not video_items_renderer:
5156 break
5158 @staticmethod
5159 def _extract_selected_tab(tabs, fatal=True):
5160 for tab_renderer in tabs:
5161 if tab_renderer.get('selected'):
5162 return tab_renderer
5163 if fatal:
5164 raise ExtractorError('Unable to find selected tab')
5166 @staticmethod
5167 def _extract_tab_renderers(response):
5168 return traverse_obj(
5169 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
5171 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
5172 metadata = self._extract_metadata_from_tabs(item_id, data)
5174 selected_tab = self._extract_selected_tab(tabs)
5175 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
5176 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
5178 return self.playlist_result(
5179 self._entries(
5180 selected_tab, metadata['id'], ytcfg,
5181 self._extract_account_syncid(ytcfg, data),
5182 self._extract_visitor_data(data, ytcfg)),
5183 **metadata)
5185 def _extract_metadata_from_tabs(self, item_id, data):
5186 info = {'id': item_id}
5188 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
5189 if metadata_renderer:
5190 channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
5191 ('channelUrl', {self.ucid_from_url}))
5192 info.update({
5193 'channel': metadata_renderer.get('title'),
5194 'channel_id': channel_id,
5196 if info['channel_id']:
5197 info['id'] = info['channel_id']
5198 else:
5199 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
5201 # pageHeaderViewModel slow rollout began April 2024
5202 page_header_view_model = traverse_obj(data, (
5203 'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict}))
5205 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
5206 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
5207 def _get_uncropped(url):
5208 return url_or_none((url or '').split('=')[0] + '=s0')
5210 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
5211 if avatar_thumbnails:
5212 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
5213 if uncropped_avatar:
5214 avatar_thumbnails.append({
5215 'url': uncropped_avatar,
5216 'id': 'avatar_uncropped',
5217 'preference': 1,
5220 channel_banners = (
5221 self._extract_thumbnails(data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
5222 or self._extract_thumbnails(
5223 page_header_view_model, ('banner', 'imageBannerViewModel', 'image'), final_key='sources'))
5224 for banner in channel_banners:
5225 banner['preference'] = -10
5227 if channel_banners:
5228 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
5229 if uncropped_banner:
5230 channel_banners.append({
5231 'url': uncropped_banner,
5232 'id': 'banner_uncropped',
5233 'preference': -5,
5236 # Deprecated - remove primary_sidebar_renderer when layout discontinued
5237 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
5238 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
5240 primary_thumbnails = self._extract_thumbnails(
5241 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
5242 playlist_thumbnails = self._extract_thumbnails(
5243 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
5245 info.update({
5246 'title': (traverse_obj(metadata_renderer, 'title')
5247 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
5248 or info['id']),
5249 'availability': self._extract_availability(data),
5250 'channel_follower_count': (
5251 self._get_count(data, ('header', ..., 'subscriberCountText'))
5252 or traverse_obj(page_header_view_model, (
5253 'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts',
5254 lambda _, v: 'subscribers' in v['text']['content'], 'text', 'content', {parse_count}, any))),
5255 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
5256 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))
5257 or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),
5258 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
5261 channel_handle = (
5262 traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)
5263 or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))
5265 if channel_handle:
5266 info.update({
5267 'uploader_id': channel_handle,
5268 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
5271 channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
5272 if self._has_badge(channel_badges, BadgeType.VERIFIED):
5273 info['channel_is_verified'] = True
5274 # Playlist stats is a text runs array containing [video count, view count, last updated].
5275 # last updated or (view count and last updated) may be missing.
5276 playlist_stats = get_first(
5277 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
5279 last_updated_unix = self._parse_time_text(
5280 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
5281 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
5282 info['modified_date'] = strftime_or_none(last_updated_unix)
5284 info['view_count'] = self._get_count(playlist_stats, 1)
5285 if info['view_count'] is None: # 0 is allowed
5286 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
5287 if info['view_count'] is None:
5288 info['view_count'] = self._get_count(data, (
5289 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',
5290 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))
5292 info['playlist_count'] = self._get_count(playlist_stats, 0)
5293 if info['playlist_count'] is None: # 0 is allowed
5294 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
5296 if not info.get('channel_id'):
5297 owner = traverse_obj(playlist_header_renderer, 'ownerText')
5298 if not owner: # Deprecated
5299 owner = traverse_obj(
5300 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
5301 ('videoOwner', 'videoOwnerRenderer', 'title'))
5302 owner_text = self._get_text(owner)
5303 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
5304 info.update({
5305 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
5306 'channel_id': self.ucid_or_none(browse_ep.get('browseId')),
5307 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))),
5310 info.update({
5311 'uploader': info['channel'],
5312 'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),
5313 'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),
5316 return info
5318 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
5319 first_id = last_id = response = None
5320 for page_num in itertools.count(1):
5321 videos = list(self._playlist_entries(playlist))
5322 if not videos:
5323 return
5324 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
5325 if start >= len(videos):
5326 return
5327 yield from videos[start:]
5328 first_id = first_id or videos[0]['id']
5329 last_id = videos[-1]['id']
5330 watch_endpoint = try_get(
5331 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
5332 headers = self.generate_api_headers(
5333 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5334 visitor_data=self._extract_visitor_data(response, data, ytcfg))
5335 query = {
5336 'playlistId': playlist_id,
5337 'videoId': watch_endpoint.get('videoId') or last_id,
5338 'index': watch_endpoint.get('index') or len(videos),
5339 'params': watch_endpoint.get('params') or 'OAE%3D',
5341 response = self._extract_response(
5342 item_id=f'{playlist_id} page {page_num}',
5343 query=query, ep='next', headers=headers, ytcfg=ytcfg,
5344 check_get_keys='contents',
5346 playlist = try_get(
5347 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
5349 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
5350 title = playlist.get('title') or try_get(
5351 data, lambda x: x['titleText']['simpleText'], str)
5352 playlist_id = playlist.get('playlistId') or item_id
5354 # Delegating everything except mix playlists to regular tab-based playlist URL
5355 playlist_url = urljoin(url, try_get(
5356 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
5357 str))
5359 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
5360 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
5361 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
5363 if playlist_url and playlist_url != url and not is_known_unviewable:
5364 return self.url_result(
5365 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
5366 video_title=title)
5368 return self.playlist_result(
5369 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
5370 playlist_id=playlist_id, playlist_title=title)
5372 def _extract_availability(self, data):
5374 Gets the availability of a given playlist/tab.
5375 Note: Unless YouTube tells us explicitly, we do not assume it is public
5376 @param data: response
5378 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
5379 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
5380 player_header_privacy = playlist_header_renderer.get('privacy')
5382 badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
5384 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
5385 privacy_setting_icon = get_first(
5386 (playlist_header_renderer, sidebar_renderer),
5387 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
5388 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
5389 expected_type=str)
5391 microformats_is_unlisted = traverse_obj(
5392 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
5394 return (
5395 'public' if (
5396 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
5397 or player_header_privacy == 'PUBLIC'
5398 or privacy_setting_icon == 'PRIVACY_PUBLIC')
5399 else self._availability(
5400 is_private=(
5401 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
5402 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
5403 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
5404 is_unlisted=(
5405 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
5406 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
5407 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
5408 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
5409 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
5410 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
5411 needs_auth=False))
5413 @staticmethod
5414 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
5415 sidebar_renderer = try_get(
5416 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
5417 for item in sidebar_renderer:
5418 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
5419 if renderer:
5420 return renderer
5422 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
5424 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
5426 is_playlist = bool(traverse_obj(
5427 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5428 if not is_playlist:
5429 return
5430 headers = self.generate_api_headers(
5431 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5432 visitor_data=self._extract_visitor_data(data, ytcfg))
5433 query = {
5434 'params': 'wgYCCAA=',
5435 'browseId': f'VL{item_id}',
5437 return self._extract_response(
5438 item_id=item_id, headers=headers, query=query,
5439 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
5440 note='Redownloading playlist API JSON with unavailable videos')
5442 @functools.cached_property
5443 def skip_webpage(self):
5444 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
5446 def _extract_webpage(self, url, item_id, fatal=True):
5447 webpage, data = None, None
5448 for retry in self.RetryManager(fatal=fatal):
5449 try:
5450 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
5451 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
5452 except ExtractorError as e:
5453 if isinstance(e.cause, network_exceptions):
5454 if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
5455 retry.error = e
5456 continue
5457 self._error_or_warning(e, fatal=fatal)
5458 break
5460 try:
5461 self._extract_and_report_alerts(data)
5462 except ExtractorError as e:
5463 self._error_or_warning(e, fatal=fatal)
5464 break
5466 # Sometimes youtube returns a webpage with incomplete ytInitialData
5467 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5468 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5469 retry.error = ExtractorError('Incomplete yt initial data received')
5470 data = None
5471 continue
5473 return webpage, data
5475 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5476 """Use if failed to extract ytcfg (and data) from initial webpage"""
5477 if not ytcfg and self.is_authenticated:
5478 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5479 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5480 raise ExtractorError(
5481 f'{msg}. If you are not downloading private content, or '
5482 'your cookies are only for the first account and channel,'
5483 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5484 expected=True)
5485 self.report_warning(msg, only_once=True)
5487 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5488 data = None
5489 if not self.skip_webpage:
5490 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5491 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
5492 # Reject webpage data if redirected to home page without explicitly requesting
5493 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
5494 if (url != 'https://www.youtube.com/feed/recommended'
5495 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5496 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5497 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5498 if fatal:
5499 raise ExtractorError(msg, expected=True)
5500 self.report_warning(msg, only_once=True)
5501 if not data:
5502 self._report_playlist_authcheck(ytcfg, fatal=fatal)
5503 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5504 return data, ytcfg
5506 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5507 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5508 resolve_response = self._extract_response(
5509 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5510 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5511 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5512 for ep_key, ep in endpoints.items():
5513 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5514 if params:
5515 return self._extract_response(
5516 item_id=item_id, query=params, ep=ep, headers=headers,
5517 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
5518 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
5519 err_note = 'Failed to resolve url (does the playlist exist?)'
5520 if fatal:
5521 raise ExtractorError(err_note, expected=True)
5522 self.report_warning(err_note, item_id)
5524 _SEARCH_PARAMS = None
5526 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
5527 data = {'query': query}
5528 if params is NO_DEFAULT:
5529 params = self._SEARCH_PARAMS
5530 if params:
5531 data['params'] = params
5533 content_keys = (
5534 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5535 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5536 # ytmusic search
5537 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5538 ('continuationContents', ),
5540 display_id = f'query "{query}"'
5541 check_get_keys = tuple({keys[0] for keys in content_keys})
5542 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5543 self._report_playlist_authcheck(ytcfg, fatal=False)
5545 continuation_list = [None]
5546 search = None
5547 for page_num in itertools.count(1):
5548 data.update(continuation_list[0] or {})
5549 headers = self.generate_api_headers(
5550 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
5551 search = self._extract_response(
5552 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5553 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
5554 slr_contents = traverse_obj(search, *content_keys)
5555 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
5556 if not continuation_list[0]:
5557 break
5560 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5561 IE_DESC = 'YouTube Tabs'
5562 _VALID_URL = r'''(?x:
5563 https?://
5564 (?!consent\.)(?:\w+\.)?
5566 youtube(?:kids)?\.com|
5567 {invidious}
5570 (?P<channel_type>channel|c|user|browse)/|
5571 (?P<not_channel>
5572 feed/|hashtag/|
5573 (?:playlist|watch)\?.*?\blist=
5575 (?!(?:{reserved_names})\b) # Direct URLs
5577 (?P<id>[^/?\#&]+)
5578 )'''.format(
5579 reserved_names=YoutubeBaseInfoExtractor._RESERVED_NAMES,
5580 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5582 IE_NAME = 'youtube:tab'
5584 _TESTS = [{
5585 'note': 'playlists, multipage',
5586 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5587 'playlist_mincount': 94,
5588 'info_dict': {
5589 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5590 'title': 'Igor Kleiner Ph.D. - Playlists',
5591 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5592 'uploader': 'Igor Kleiner Ph.D.',
5593 'uploader_id': '@IgorDataScience',
5594 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5595 'channel': 'Igor Kleiner Ph.D.',
5596 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5597 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5598 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5599 'channel_follower_count': int,
5601 }, {
5602 'note': 'playlists, multipage, different order',
5603 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5604 'playlist_mincount': 94,
5605 'info_dict': {
5606 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5607 'title': 'Igor Kleiner Ph.D. - Playlists',
5608 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5609 'uploader': 'Igor Kleiner Ph.D.',
5610 'uploader_id': '@IgorDataScience',
5611 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5612 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5613 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5614 'channel': 'Igor Kleiner Ph.D.',
5615 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5616 'channel_follower_count': int,
5618 }, {
5619 'note': 'playlists, series',
5620 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5621 'playlist_mincount': 5,
5622 'info_dict': {
5623 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5624 'title': '3Blue1Brown - Playlists',
5625 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
5626 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5627 'channel': '3Blue1Brown',
5628 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5629 'uploader_id': '@3blue1brown',
5630 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5631 'uploader': '3Blue1Brown',
5632 'tags': ['Mathematics'],
5633 'channel_follower_count': int,
5634 'channel_is_verified': True,
5636 }, {
5637 'note': 'playlists, singlepage',
5638 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5639 'playlist_mincount': 4,
5640 'info_dict': {
5641 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5642 'title': 'ThirstForScience - Playlists',
5643 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5644 'uploader': 'ThirstForScience',
5645 'uploader_url': 'https://www.youtube.com/@ThirstForScience',
5646 'uploader_id': '@ThirstForScience',
5647 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5648 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5649 'tags': 'count:12',
5650 'channel': 'ThirstForScience',
5651 'channel_follower_count': int,
5653 }, {
5654 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5655 'only_matching': True,
5656 }, {
5657 'note': 'basic, single video playlist',
5658 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5659 'info_dict': {
5660 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5661 'title': 'youtube-dl public playlist',
5662 'description': '',
5663 'tags': [],
5664 'view_count': int,
5665 'modified_date': '20201130',
5666 'channel': 'Sergey M.',
5667 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5668 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5669 'availability': 'public',
5670 'uploader': 'Sergey M.',
5671 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5672 'uploader_id': '@sergeym.6173',
5674 'playlist_count': 1,
5675 }, {
5676 'note': 'empty playlist',
5677 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5678 'info_dict': {
5679 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5680 'title': 'youtube-dl empty playlist',
5681 'tags': [],
5682 'channel': 'Sergey M.',
5683 'description': '',
5684 'modified_date': '20230921',
5685 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5686 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5687 'availability': 'unlisted',
5688 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5689 'uploader_id': '@sergeym.6173',
5690 'uploader': 'Sergey M.',
5692 'playlist_count': 0,
5693 }, {
5694 'note': 'Home tab',
5695 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5696 'info_dict': {
5697 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5698 'title': 'lex will - Home',
5699 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5700 'uploader': 'lex will',
5701 'uploader_id': '@lexwill718',
5702 'channel': 'lex will',
5703 'tags': ['bible', 'history', 'prophesy'],
5704 'uploader_url': 'https://www.youtube.com/@lexwill718',
5705 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5706 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5707 'channel_follower_count': int,
5709 'playlist_mincount': 2,
5710 }, {
5711 'note': 'Videos tab',
5712 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5713 'info_dict': {
5714 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5715 'title': 'lex will - Videos',
5716 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5717 'uploader': 'lex will',
5718 'uploader_id': '@lexwill718',
5719 'tags': ['bible', 'history', 'prophesy'],
5720 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5721 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5722 'uploader_url': 'https://www.youtube.com/@lexwill718',
5723 'channel': 'lex will',
5724 'channel_follower_count': int,
5726 'playlist_mincount': 975,
5727 }, {
5728 'note': 'Videos tab, sorted by popular',
5729 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5730 'info_dict': {
5731 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5732 'title': 'lex will - Videos',
5733 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5734 'uploader': 'lex will',
5735 'uploader_id': '@lexwill718',
5736 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5737 'uploader_url': 'https://www.youtube.com/@lexwill718',
5738 'channel': 'lex will',
5739 'tags': ['bible', 'history', 'prophesy'],
5740 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5741 'channel_follower_count': int,
5743 'playlist_mincount': 199,
5744 }, {
5745 'note': 'Playlists tab',
5746 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5747 'info_dict': {
5748 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5749 'title': 'lex will - Playlists',
5750 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5751 'uploader': 'lex will',
5752 'uploader_id': '@lexwill718',
5753 'uploader_url': 'https://www.youtube.com/@lexwill718',
5754 'channel': 'lex will',
5755 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5756 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5757 'tags': ['bible', 'history', 'prophesy'],
5758 'channel_follower_count': int,
5760 'playlist_mincount': 17,
5761 }, {
5762 'note': 'Community tab',
5763 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5764 'info_dict': {
5765 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5766 'title': 'lex will - Community',
5767 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5768 'channel': 'lex will',
5769 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5770 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5771 'tags': ['bible', 'history', 'prophesy'],
5772 'channel_follower_count': int,
5773 'uploader_url': 'https://www.youtube.com/@lexwill718',
5774 'uploader_id': '@lexwill718',
5775 'uploader': 'lex will',
5777 'playlist_mincount': 18,
5778 }, {
5779 'note': 'Channels tab',
5780 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5781 'info_dict': {
5782 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5783 'title': 'lex will - Channels',
5784 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5785 'channel': 'lex will',
5786 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5787 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5788 'tags': ['bible', 'history', 'prophesy'],
5789 'channel_follower_count': int,
5790 'uploader_url': 'https://www.youtube.com/@lexwill718',
5791 'uploader_id': '@lexwill718',
5792 'uploader': 'lex will',
5794 'playlist_mincount': 12,
5795 }, {
5796 'note': 'Search tab',
5797 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5798 'playlist_mincount': 40,
5799 'info_dict': {
5800 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5801 'title': '3Blue1Brown - Search - linear algebra',
5802 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
5803 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5804 'tags': ['Mathematics'],
5805 'channel': '3Blue1Brown',
5806 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5807 'channel_follower_count': int,
5808 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5809 'uploader_id': '@3blue1brown',
5810 'uploader': '3Blue1Brown',
5811 'channel_is_verified': True,
5813 }, {
5814 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5815 'only_matching': True,
5816 }, {
5817 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5818 'only_matching': True,
5819 }, {
5820 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5821 'only_matching': True,
5822 }, {
5823 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5824 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5825 'info_dict': {
5826 'title': '29C3: Not my department',
5827 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5828 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
5829 'tags': [],
5830 'view_count': int,
5831 'modified_date': '20150605',
5832 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5833 'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
5834 'channel': 'Christiaan008',
5835 'availability': 'public',
5836 'uploader_id': '@ChRiStIaAn008',
5837 'uploader': 'Christiaan008',
5838 'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
5840 'playlist_count': 96,
5841 }, {
5842 'note': 'Large playlist',
5843 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5844 'info_dict': {
5845 'title': 'Uploads from Cauchemar',
5846 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5847 'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
5848 'tags': [],
5849 'modified_date': r're:\d{8}',
5850 'channel': 'Cauchemar',
5851 'view_count': int,
5852 'description': '',
5853 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5854 'availability': 'public',
5855 'uploader_id': '@Cauchemar89',
5856 'uploader': 'Cauchemar',
5857 'uploader_url': 'https://www.youtube.com/@Cauchemar89',
5859 'playlist_mincount': 1123,
5860 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5861 }, {
5862 'note': 'even larger playlist, 8832 videos',
5863 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5864 'only_matching': True,
5865 }, {
5866 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5867 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5868 'info_dict': {
5869 'title': 'Uploads from Interstellar Movie',
5870 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5871 'tags': [],
5872 'view_count': int,
5873 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5874 'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
5875 'channel': 'Interstellar Movie',
5876 'description': '',
5877 'modified_date': r're:\d{8}',
5878 'availability': 'public',
5879 'uploader_id': '@InterstellarMovie',
5880 'uploader': 'Interstellar Movie',
5881 'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
5883 'playlist_mincount': 21,
5884 }, {
5885 'note': 'Playlist with "show unavailable videos" button',
5886 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5887 'info_dict': {
5888 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5889 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5890 'view_count': int,
5891 'channel': 'Phim Siêu Nhân Nhật Bản',
5892 'tags': [],
5893 'description': '',
5894 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5895 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5896 'modified_date': r're:\d{8}',
5897 'availability': 'public',
5898 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
5899 'uploader_id': '@phimsieunhannhatban',
5900 'uploader': 'Phim Siêu Nhân Nhật Bản',
5902 'playlist_mincount': 200,
5903 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5904 }, {
5905 'note': 'Playlist with unavailable videos in page 7',
5906 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5907 'info_dict': {
5908 'title': 'Uploads from BlankTV',
5909 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5910 'channel': 'BlankTV',
5911 'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
5912 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5913 'view_count': int,
5914 'tags': [],
5915 'modified_date': r're:\d{8}',
5916 'description': '',
5917 'availability': 'public',
5918 'uploader_id': '@blanktv',
5919 'uploader': 'BlankTV',
5920 'uploader_url': 'https://www.youtube.com/@blanktv',
5922 'playlist_mincount': 1000,
5923 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5924 }, {
5925 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5926 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5927 'info_dict': {
5928 'title': 'Data Analysis with Dr Mike Pound',
5929 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5930 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5931 'tags': [],
5932 'view_count': int,
5933 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5934 'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
5935 'channel': 'Computerphile',
5936 'availability': 'public',
5937 'modified_date': '20190712',
5938 'uploader_id': '@Computerphile',
5939 'uploader': 'Computerphile',
5940 'uploader_url': 'https://www.youtube.com/@Computerphile',
5942 'playlist_mincount': 11,
5943 }, {
5944 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5945 'only_matching': True,
5946 }, {
5947 'note': 'Playlist URL that does not actually serve a playlist',
5948 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5949 'info_dict': {
5950 'id': 'FqZTN594JQw',
5951 'ext': 'webm',
5952 'title': "Smiley's People 01 detective, Adventure Series, Action",
5953 'upload_date': '20150526',
5954 'license': 'Standard YouTube License',
5955 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5956 'categories': ['People & Blogs'],
5957 'tags': list,
5958 'view_count': int,
5959 'like_count': int,
5961 'params': {
5962 'skip_download': True,
5964 'skip': 'This video is not available.',
5965 'add_ie': [YoutubeIE.ie_key()],
5966 }, {
5967 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5968 'only_matching': True,
5969 }, {
5970 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5971 'only_matching': True,
5972 }, {
5973 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5974 'info_dict': {
5975 'id': 'hGkQjiJLjWQ', # This will keep changing
5976 'ext': 'mp4',
5977 'title': str,
5978 'upload_date': r're:\d{8}',
5979 'description': str,
5980 'categories': ['News & Politics'],
5981 'tags': list,
5982 'like_count': int,
5983 'release_timestamp': int,
5984 'channel': 'Sky News',
5985 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5986 'age_limit': 0,
5987 'view_count': int,
5988 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
5989 'playable_in_embed': True,
5990 'release_date': r're:\d+',
5991 'availability': 'public',
5992 'live_status': 'is_live',
5993 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5994 'channel_follower_count': int,
5995 'concurrent_view_count': int,
5996 'uploader_url': 'https://www.youtube.com/@SkyNews',
5997 'uploader_id': '@SkyNews',
5998 'uploader': 'Sky News',
5999 'channel_is_verified': True,
6001 'params': {
6002 'skip_download': True,
6004 'expected_warnings': ['Ignoring subtitle tracks found in '],
6005 }, {
6006 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
6007 'info_dict': {
6008 'id': 'a48o2S1cPoo',
6009 'ext': 'mp4',
6010 'title': 'The Young Turks - Live Main Show',
6011 'upload_date': '20150715',
6012 'license': 'Standard YouTube License',
6013 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
6014 'categories': ['News & Politics'],
6015 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
6016 'like_count': int,
6018 'params': {
6019 'skip_download': True,
6021 'only_matching': True,
6022 }, {
6023 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
6024 'only_matching': True,
6025 }, {
6026 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
6027 'only_matching': True,
6028 }, {
6029 'note': 'A channel that is not live. Should raise error',
6030 'url': 'https://www.youtube.com/user/numberphile/live',
6031 'only_matching': True,
6032 }, {
6033 'url': 'https://www.youtube.com/feed/trending',
6034 'only_matching': True,
6035 }, {
6036 'url': 'https://www.youtube.com/feed/library',
6037 'only_matching': True,
6038 }, {
6039 'url': 'https://www.youtube.com/feed/history',
6040 'only_matching': True,
6041 }, {
6042 'url': 'https://www.youtube.com/feed/subscriptions',
6043 'only_matching': True,
6044 }, {
6045 'url': 'https://www.youtube.com/feed/watch_later',
6046 'only_matching': True,
6047 }, {
6048 'note': 'Recommended - redirects to home page.',
6049 'url': 'https://www.youtube.com/feed/recommended',
6050 'only_matching': True,
6051 }, {
6052 'note': 'inline playlist with not always working continuations',
6053 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
6054 'only_matching': True,
6055 }, {
6056 'url': 'https://www.youtube.com/course',
6057 'only_matching': True,
6058 }, {
6059 'url': 'https://www.youtube.com/zsecurity',
6060 'only_matching': True,
6061 }, {
6062 'url': 'http://www.youtube.com/NASAgovVideo/videos',
6063 'only_matching': True,
6064 }, {
6065 'url': 'https://www.youtube.com/TheYoungTurks/live',
6066 'only_matching': True,
6067 }, {
6068 'url': 'https://www.youtube.com/hashtag/cctv9',
6069 'info_dict': {
6070 'id': 'cctv9',
6071 'title': 'cctv9 - All',
6072 'tags': [],
6074 'playlist_mincount': 300, # not consistent but should be over 300
6075 }, {
6076 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
6077 'only_matching': True,
6078 }, {
6079 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
6080 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6081 'only_matching': True,
6082 }, {
6083 'note': '/browse/ should redirect to /channel/',
6084 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
6085 'only_matching': True,
6086 }, {
6087 'note': 'VLPL, should redirect to playlist?list=PL...',
6088 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6089 'info_dict': {
6090 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6091 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
6092 'title': 'NCS : All Releases 💿',
6093 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
6094 'modified_date': r're:\d{8}',
6095 'view_count': int,
6096 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
6097 'tags': [],
6098 'channel': 'NoCopyrightSounds',
6099 'availability': 'public',
6100 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
6101 'uploader': 'NoCopyrightSounds',
6102 'uploader_id': '@NoCopyrightSounds',
6104 'playlist_mincount': 166,
6105 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
6106 }, {
6107 # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
6108 'note': 'Topic, should redirect to playlist?list=UU...',
6109 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6110 'info_dict': {
6111 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6112 'title': 'Uploads from Royalty Free Music - Topic',
6113 'tags': [],
6114 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6115 'channel': 'Royalty Free Music - Topic',
6116 'view_count': int,
6117 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6118 'modified_date': r're:\d{8}',
6119 'description': '',
6120 'availability': 'public',
6121 'uploader': 'Royalty Free Music - Topic',
6123 'playlist_mincount': 101,
6124 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
6125 }, {
6126 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
6127 # Treat as a general feed
6128 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
6129 'info_dict': {
6130 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
6131 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
6132 'tags': [],
6134 'playlist_mincount': 9,
6135 }, {
6136 'note': 'Youtube music Album',
6137 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
6138 'info_dict': {
6139 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
6140 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
6141 'tags': [],
6142 'view_count': int,
6143 'description': '',
6144 'availability': 'unlisted',
6145 'modified_date': r're:\d{8}',
6147 'playlist_count': 50,
6148 'expected_warnings': ['YouTube Music is not directly supported'],
6149 }, {
6150 'note': 'unlisted single video playlist',
6151 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6152 'info_dict': {
6153 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6154 'title': 'yt-dlp unlisted playlist test',
6155 'availability': 'unlisted',
6156 'tags': [],
6157 'modified_date': '20220418',
6158 'channel': 'colethedj',
6159 'view_count': int,
6160 'description': '',
6161 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
6162 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
6163 'uploader_url': 'https://www.youtube.com/@colethedj1894',
6164 'uploader_id': '@colethedj1894',
6165 'uploader': 'colethedj',
6167 'playlist': [{
6168 'info_dict': {
6169 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
6170 'id': 'BaW_jenozKc',
6171 '_type': 'url',
6172 'ie_key': 'Youtube',
6173 'duration': 10,
6174 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
6175 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
6176 'view_count': int,
6177 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
6178 'channel': 'Philipp Hagemeister',
6179 'uploader_id': '@PhilippHagemeister',
6180 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
6181 'uploader': 'Philipp Hagemeister',
6184 'playlist_count': 1,
6185 'params': {'extract_flat': True},
6186 }, {
6187 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
6188 'url': 'https://www.youtube.com/feed/recommended',
6189 'info_dict': {
6190 'id': 'recommended',
6191 'title': 'recommended',
6192 'tags': [],
6194 'playlist_mincount': 50,
6195 'params': {
6196 'skip_download': True,
6197 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6199 }, {
6200 'note': 'API Fallback: /videos tab, sorted by oldest first',
6201 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
6202 'info_dict': {
6203 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6204 'title': 'Cody\'sLab - Videos',
6205 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
6206 'channel': 'Cody\'sLab',
6207 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6208 'tags': [],
6209 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6210 'channel_follower_count': int,
6212 'playlist_mincount': 650,
6213 'params': {
6214 'skip_download': True,
6215 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6217 'skip': 'Query for sorting no longer works',
6218 }, {
6219 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
6220 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6221 'info_dict': {
6222 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6223 'title': 'Uploads from Royalty Free Music - Topic',
6224 'modified_date': r're:\d{8}',
6225 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6226 'description': '',
6227 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6228 'tags': [],
6229 'channel': 'Royalty Free Music - Topic',
6230 'view_count': int,
6231 'availability': 'public',
6232 'uploader': 'Royalty Free Music - Topic',
6234 'playlist_mincount': 101,
6235 'params': {
6236 'skip_download': True,
6237 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6239 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
6240 }, {
6241 'note': 'non-standard redirect to regional channel',
6242 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
6243 'only_matching': True,
6244 }, {
6245 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
6246 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6247 'info_dict': {
6248 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6249 'modified_date': '20220407',
6250 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
6251 'tags': [],
6252 'availability': 'unlisted',
6253 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
6254 'channel': 'pukkandan',
6255 'description': 'Test for collaborative playlist',
6256 'title': 'yt-dlp test - collaborative playlist',
6257 'view_count': int,
6258 'uploader_url': 'https://www.youtube.com/@pukkandan',
6259 'uploader_id': '@pukkandan',
6260 'uploader': 'pukkandan',
6262 'playlist_mincount': 2,
6263 }, {
6264 'note': 'translated tab name',
6265 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
6266 'info_dict': {
6267 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6268 'tags': [],
6269 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6270 'description': 'test description',
6271 'title': 'cole-dlp-test-acc - 再生リスト',
6272 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6273 'channel': 'cole-dlp-test-acc',
6274 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6275 'uploader_id': '@coletdjnz',
6276 'uploader': 'cole-dlp-test-acc',
6278 'playlist_mincount': 1,
6279 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6280 'expected_warnings': ['Preferring "ja"'],
6281 }, {
6282 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
6283 'note': 'preferred lang set with playlist with translated video titles',
6284 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6285 'info_dict': {
6286 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6287 'tags': [],
6288 'view_count': int,
6289 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6290 'channel': 'cole-dlp-test-acc',
6291 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6292 'description': 'test',
6293 'title': 'dlp test playlist',
6294 'availability': 'public',
6295 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6296 'uploader_id': '@coletdjnz',
6297 'uploader': 'cole-dlp-test-acc',
6299 'playlist_mincount': 1,
6300 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6301 'expected_warnings': ['Preferring "ja"'],
6302 }, {
6303 # shorts audio pivot for 2GtVksBMYFM.
6304 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
6305 'info_dict': {
6306 'id': 'sfv_audio_pivot',
6307 'title': 'sfv_audio_pivot',
6308 'tags': [],
6310 'playlist_mincount': 50,
6312 }, {
6313 # Channel with a real live tab (not to be mistaken with streams tab)
6314 # Do not treat like it should redirect to live stream
6315 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
6316 'info_dict': {
6317 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
6318 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
6319 'tags': [],
6321 'playlist_mincount': 20,
6322 }, {
6323 # Tab name is not the same as tab id
6324 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
6325 'info_dict': {
6326 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6327 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
6328 'tags': [],
6330 'playlist_mincount': 8,
6331 }, {
6332 # Home tab id is literally home. Not to get mistaken with featured
6333 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
6334 'info_dict': {
6335 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6336 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
6337 'tags': [],
6339 'playlist_mincount': 8,
6340 }, {
6341 # Should get three playlists for videos, shorts and streams tabs
6342 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6343 'info_dict': {
6344 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6345 'title': 'Polka Ch. 尾丸ポルカ',
6346 'channel_follower_count': int,
6347 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6348 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6349 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
6350 'channel': 'Polka Ch. 尾丸ポルカ',
6351 'tags': 'count:35',
6352 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
6353 'uploader': 'Polka Ch. 尾丸ポルカ',
6354 'uploader_id': '@OmaruPolka',
6355 'channel_is_verified': True,
6357 'playlist_count': 3,
6358 }, {
6359 # Shorts tab with channel with handle
6360 # TODO: fix channel description
6361 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
6362 'info_dict': {
6363 'id': 'UC0intLFzLaudFG-xAvUEO-A',
6364 'title': 'Not Just Bikes - Shorts',
6365 'tags': 'count:10',
6366 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
6367 'description': 'md5:5e82545b3a041345927a92d0585df247',
6368 'channel_follower_count': int,
6369 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
6370 'channel': 'Not Just Bikes',
6371 'uploader_url': 'https://www.youtube.com/@NotJustBikes',
6372 'uploader': 'Not Just Bikes',
6373 'uploader_id': '@NotJustBikes',
6374 'channel_is_verified': True,
6376 'playlist_mincount': 10,
6377 }, {
6378 # Streams tab
6379 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
6380 'info_dict': {
6381 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6382 'title': '中村悠一 - Live',
6383 'tags': 'count:7',
6384 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6385 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
6386 'channel': '中村悠一',
6387 'channel_follower_count': int,
6388 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
6389 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
6390 'uploader_id': '@Yuichi-Nakamura',
6391 'uploader': '中村悠一',
6393 'playlist_mincount': 60,
6394 }, {
6395 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
6396 # See test_youtube_lists
6397 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
6398 'only_matching': True,
6399 }, {
6400 # No uploads and no UCID given. Should fail with no uploads error
6401 # See test_youtube_lists
6402 'url': 'https://www.youtube.com/news',
6403 'only_matching': True,
6404 }, {
6405 # No videos tab but has a shorts tab
6406 'url': 'https://www.youtube.com/c/TKFShorts',
6407 'info_dict': {
6408 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6409 'title': 'Shorts Break - Shorts',
6410 'tags': 'count:48',
6411 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6412 'channel': 'Shorts Break',
6413 'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
6414 'channel_follower_count': int,
6415 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
6416 'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
6417 'uploader': 'Shorts Break',
6418 'uploader_id': '@ShortsBreak_Official',
6420 'playlist_mincount': 30,
6421 }, {
6422 # Trending Now Tab. tab id is empty
6423 'url': 'https://www.youtube.com/feed/trending',
6424 'info_dict': {
6425 'id': 'trending',
6426 'title': 'trending - Now',
6427 'tags': [],
6429 'playlist_mincount': 30,
6430 }, {
6431 # Trending Gaming Tab. tab id is empty
6432 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
6433 'info_dict': {
6434 'id': 'trending',
6435 'title': 'trending - Gaming',
6436 'tags': [],
6438 'playlist_mincount': 30,
6439 }, {
6440 # Shorts url result in shorts tab
6441 # TODO: Fix channel id extraction
6442 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6443 'info_dict': {
6444 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6445 'title': 'cole-dlp-test-acc - Shorts',
6446 'channel': 'cole-dlp-test-acc',
6447 'description': 'test description',
6448 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6449 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6450 'tags': [],
6451 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6452 'uploader_id': '@coletdjnz',
6453 'uploader': 'cole-dlp-test-acc',
6455 'playlist': [{
6456 'info_dict': {
6457 # Channel data is not currently available for short renderers (as of 2023-03-01)
6458 '_type': 'url',
6459 'ie_key': 'Youtube',
6460 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6461 'id': 'sSM9J5YH_60',
6462 'title': 'SHORT short',
6463 'view_count': int,
6464 'thumbnails': list,
6467 'params': {'extract_flat': True},
6468 }, {
6469 # Live video status should be extracted
6470 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6471 'info_dict': {
6472 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6473 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO: should be Minecraft - Live or Minecraft - Topic - Live
6474 'tags': [],
6476 'playlist': [{
6477 'info_dict': {
6478 '_type': 'url',
6479 'ie_key': 'Youtube',
6480 'url': 'startswith:https://www.youtube.com/watch?v=',
6481 'id': str,
6482 'title': str,
6483 'live_status': 'is_live',
6484 'channel_id': str,
6485 'channel_url': str,
6486 'concurrent_view_count': int,
6487 'channel': str,
6488 'uploader': str,
6489 'uploader_url': str,
6490 'uploader_id': str,
6491 'channel_is_verified': bool, # this will keep changing
6494 'params': {'extract_flat': True, 'playlist_items': '1'},
6495 'playlist_mincount': 1,
6496 }, {
6497 # Channel renderer metadata. Contains number of videos on the channel
6498 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6499 'info_dict': {
6500 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6501 'title': 'cole-dlp-test-acc - Channels',
6502 'channel': 'cole-dlp-test-acc',
6503 'description': 'test description',
6504 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6505 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6506 'tags': [],
6507 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6508 'uploader_id': '@coletdjnz',
6509 'uploader': 'cole-dlp-test-acc',
6511 'playlist': [{
6512 'info_dict': {
6513 '_type': 'url',
6514 'ie_key': 'YoutubeTab',
6515 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6516 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6517 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6518 'title': 'PewDiePie',
6519 'channel': 'PewDiePie',
6520 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6521 'thumbnails': list,
6522 'channel_follower_count': int,
6523 'playlist_count': int,
6524 'uploader': 'PewDiePie',
6525 'uploader_url': 'https://www.youtube.com/@PewDiePie',
6526 'uploader_id': '@PewDiePie',
6527 'channel_is_verified': True,
6530 'params': {'extract_flat': True},
6531 }, {
6532 'url': 'https://www.youtube.com/@3blue1brown/about',
6533 'info_dict': {
6534 'id': '@3blue1brown',
6535 'tags': ['Mathematics'],
6536 'title': '3Blue1Brown',
6537 'channel_follower_count': int,
6538 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6539 'channel': '3Blue1Brown',
6540 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
6541 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
6542 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6543 'uploader_id': '@3blue1brown',
6544 'uploader': '3Blue1Brown',
6545 'channel_is_verified': True,
6547 'playlist_count': 0,
6548 }, {
6549 # Podcasts tab, with rich entry playlistRenderers
6550 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
6551 'info_dict': {
6552 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6553 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6554 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
6555 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
6556 'title': '99 Percent Invisible - Podcasts',
6557 'uploader': '99 Percent Invisible',
6558 'channel_follower_count': int,
6559 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6560 'tags': [],
6561 'channel': '99 Percent Invisible',
6562 'uploader_id': '@99percentinvisiblepodcast',
6564 'playlist_count': 0,
6565 }, {
6566 # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
6567 'url': 'https://www.youtube.com/@AHimitsu/releases',
6568 'info_dict': {
6569 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6570 'channel': 'A Himitsu',
6571 'uploader_url': 'https://www.youtube.com/@AHimitsu',
6572 'title': 'A Himitsu - Releases',
6573 'uploader_id': '@AHimitsu',
6574 'uploader': 'A Himitsu',
6575 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6576 'tags': 'count:12',
6577 'description': 'I make music',
6578 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
6579 'channel_follower_count': int,
6580 'channel_is_verified': True,
6582 'playlist_mincount': 10,
6583 }, {
6584 # Playlist with only shorts, shown as reel renderers
6585 # FIXME: future: YouTube currently doesn't give continuation for this,
6586 # may do in future.
6587 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
6588 'info_dict': {
6589 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
6590 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
6591 'view_count': int,
6592 'uploader_id': '@BangyShorts',
6593 'description': '',
6594 'uploader_url': 'https://www.youtube.com/@BangyShorts',
6595 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
6596 'channel': 'Bangy Shorts',
6597 'uploader': 'Bangy Shorts',
6598 'tags': [],
6599 'availability': 'public',
6600 'modified_date': r're:\d{8}',
6601 'title': 'Uploads from Bangy Shorts',
6603 'playlist_mincount': 100,
6604 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6605 }, {
6606 'note': 'Tags containing spaces',
6607 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6608 'playlist_count': 3,
6609 'info_dict': {
6610 'id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6611 'channel': 'Markiplier',
6612 'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6613 'title': 'Markiplier',
6614 'channel_follower_count': int,
6615 'description': 'md5:0c010910558658824402809750dc5d97',
6616 'uploader_id': '@markiplier',
6617 'uploader_url': 'https://www.youtube.com/@markiplier',
6618 'uploader': 'Markiplier',
6619 'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6620 'channel_is_verified': True,
6621 'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',
6622 'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',
6623 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
6624 'mark fischbach'],
6628 @classmethod
6629 def suitable(cls, url):
6630 return False if YoutubeIE.suitable(url) else super().suitable(url)
6632 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6634 def _get_url_mobj(self, url):
6635 mobj = self._URL_RE.match(url).groupdict()
6636 mobj.update((k, '') for k, v in mobj.items() if v is None)
6637 return mobj
6639 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
6640 tab_name = (tab.get('title') or '').lower()
6641 tab_url = urljoin(base_url, traverse_obj(
6642 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6644 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
6645 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
6646 if tab_id:
6647 return {
6648 'TAB_ID_SPONSORSHIPS': 'membership',
6649 }.get(tab_id, tab_id), tab_name
6651 # Fallback to tab name if we cannot get the tab id.
6652 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6653 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
6654 if tab_name:
6655 self.write_debug(f'Falling back to selected tab name: {tab_name}')
6656 return {
6657 'home': 'featured',
6658 'live': 'streams',
6659 }.get(tab_name, tab_name), tab_name
6661 def _has_tab(self, tabs, tab_id):
6662 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
6664 def _empty_playlist(self, item_id, data):
6665 return self.playlist_result([], item_id, **self._extract_metadata_from_tabs(item_id, data))
6667 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6668 def _real_extract(self, url, smuggled_data):
6669 item_id = self._match_id(url)
6670 url = urllib.parse.urlunparse(
6671 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
6672 compat_opts = self.get_param('compat_opts', [])
6674 mobj = self._get_url_mobj(url)
6675 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
6676 if is_channel and smuggled_data.get('is_music_url'):
6677 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6678 return self.url_result(
6679 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
6680 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6681 mdata = self._extract_tab_endpoint(
6682 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
6683 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6684 get_all=False, expected_type=str)
6685 if not murl:
6686 raise ExtractorError('Failed to resolve album to playlist')
6687 return self.url_result(murl, YoutubeTabIE)
6688 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6689 return self.url_result(
6690 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
6692 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
6693 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
6694 url = f'{pre}/videos{post}'
6695 if smuggled_data.get('is_music_url'):
6696 self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')
6698 # Handle both video/playlist URLs
6699 qs = parse_qs(url)
6700 video_id, playlist_id = (traverse_obj(qs, (key, 0)) for key in ('v', 'list'))
6701 if not video_id and mobj['not_channel'].startswith('watch'):
6702 if not playlist_id:
6703 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
6704 raise ExtractorError('A video URL was given without video ID', expected=True)
6705 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6706 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
6707 return self.url_result(
6708 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
6710 if not self._yes_playlist(playlist_id, video_id):
6711 return self.url_result(
6712 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6714 data, ytcfg = self._extract_data(url, display_id)
6716 # YouTube may provide a non-standard redirect to the regional channel
6717 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
6718 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
6719 redirect_url = traverse_obj(
6720 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6721 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
6722 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
6723 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6724 return self.url_result(redirect_url, YoutubeTabIE)
6726 tabs, extra_tabs = self._extract_tab_renderers(data), []
6727 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
6728 selected_tab = self._extract_selected_tab(tabs)
6729 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6730 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6732 # /about is no longer a tab
6733 if original_tab_id == 'about':
6734 return self._empty_playlist(item_id, data)
6736 if not original_tab_id and selected_tab_name:
6737 self.to_screen('Downloading all uploads of the channel. '
6738 'To download only the videos in a specific tab, pass the tab\'s URL')
6739 if self._has_tab(tabs, 'streams'):
6740 extra_tabs.append(''.join((pre, '/streams', post)))
6741 if self._has_tab(tabs, 'shorts'):
6742 extra_tabs.append(''.join((pre, '/shorts', post)))
6743 # XXX: Members-only tab should also be extracted
6745 if not extra_tabs and selected_tab_id != 'videos':
6746 # Channel does not have streams, shorts or videos tabs
6747 if item_id[:2] != 'UC':
6748 return self._empty_playlist(item_id, data)
6750 # Topic channels don't have /videos. Use the equivalent playlist instead
6751 pl_id = f'UU{item_id[2:]}'
6752 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6753 try:
6754 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6755 except ExtractorError:
6756 return self._empty_playlist(item_id, data)
6757 else:
6758 item_id, url = pl_id, pl_url
6759 self.to_screen(
6760 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6762 elif extra_tabs and selected_tab_id != 'videos':
6763 # When there are shorts/live tabs but not videos tab
6764 url, data = f'{pre}{post}', None
6766 elif (original_tab_id or 'videos') != selected_tab_id:
6767 if original_tab_id == 'live':
6768 # Live tab should have redirected to the video
6769 # Except in the case the channel has an actual live tab
6770 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
6771 raise UserNotLive(video_id=item_id)
6772 elif selected_tab_name:
6773 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6775 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6776 url = f'{pre}{post}'
6778 # YouTube sometimes provides a button to reload playlist with unavailable videos.
6779 if 'no-youtube-unavailable-videos' not in compat_opts:
6780 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
6781 self._extract_and_report_alerts(data, only_once=True)
6783 tabs, entries = self._extract_tab_renderers(data), []
6784 if tabs:
6785 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6786 entries[0].update({
6787 'extractor_key': YoutubeTabIE.ie_key(),
6788 'extractor': YoutubeTabIE.IE_NAME,
6789 'webpage_url': url,
6791 if self.get_param('playlist_items') == '0':
6792 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6793 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6794 entries.extend(map(self._real_extract, extra_tabs))
6796 if len(entries) == 1:
6797 return entries[0]
6798 elif entries:
6799 metadata = self._extract_metadata_from_tabs(item_id, data)
6800 uploads_url = 'the Uploads (UU) playlist URL'
6801 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6802 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6803 self.to_screen(
6804 'Downloading as multiple playlists, separated by tabs. '
6805 f'To download as a single playlist instead, pass {uploads_url}')
6806 return self.playlist_result(entries, item_id, **metadata)
6808 # Inline playlist
6809 playlist = traverse_obj(
6810 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
6811 if playlist:
6812 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
6814 video_id = traverse_obj(
6815 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
6816 if video_id:
6817 if tab != '/live': # live tab is expected to redirect to video
6818 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
6819 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6821 raise ExtractorError('Unable to recognize tab page')
6824 class YoutubePlaylistIE(InfoExtractor):
6825 IE_DESC = 'YouTube playlists'
6826 _VALID_URL = r'''(?x)(?:
6827 (?:https?://)?
6828 (?:\w+\.)?
6831 youtube(?:kids)?\.com|
6832 {invidious}
6834 /.*?\?.*?\blist=
6836 (?P<id>{playlist_id})
6837 )'''.format(
6838 playlist_id=YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6839 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6841 IE_NAME = 'youtube:playlist'
6842 _TESTS = [{
6843 'note': 'issue #673',
6844 'url': 'PLBB231211A4F62143',
6845 'info_dict': {
6846 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6847 'id': 'PLBB231211A4F62143',
6848 'uploader': 'Wickman',
6849 'uploader_id': '@WickmanVT',
6850 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
6851 'view_count': int,
6852 'uploader_url': 'https://www.youtube.com/@WickmanVT',
6853 'modified_date': r're:\d{8}',
6854 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6855 'channel': 'Wickman',
6856 'tags': [],
6857 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
6858 'availability': 'public',
6860 'playlist_mincount': 29,
6861 }, {
6862 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6863 'info_dict': {
6864 'title': 'YDL_safe_search',
6865 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6867 'playlist_count': 2,
6868 'skip': 'This playlist is private',
6869 }, {
6870 'note': 'embedded',
6871 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6872 'playlist_count': 4,
6873 'info_dict': {
6874 'title': 'JODA15',
6875 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6876 'uploader': 'milan',
6877 'uploader_id': '@milan5503',
6878 'description': '',
6879 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6880 'tags': [],
6881 'modified_date': '20140919',
6882 'view_count': int,
6883 'channel': 'milan',
6884 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6885 'uploader_url': 'https://www.youtube.com/@milan5503',
6886 'availability': 'public',
6888 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
6889 }, {
6890 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6891 'playlist_mincount': 455,
6892 'info_dict': {
6893 'title': '2018 Chinese New Singles (11/6 updated)',
6894 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6895 'uploader': 'LBK',
6896 'uploader_id': '@music_king',
6897 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
6898 'channel': 'LBK',
6899 'view_count': int,
6900 'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
6901 'tags': [],
6902 'uploader_url': 'https://www.youtube.com/@music_king',
6903 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6904 'modified_date': r're:\d{8}',
6905 'availability': 'public',
6907 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6908 }, {
6909 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6910 'only_matching': True,
6911 }, {
6912 # music album playlist
6913 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6914 'only_matching': True,
6917 @classmethod
6918 def suitable(cls, url):
6919 if YoutubeTabIE.suitable(url):
6920 return False
6921 from ..utils import parse_qs
6922 qs = parse_qs(url)
6923 if qs.get('v', [None])[0]:
6924 return False
6925 return super().suitable(url)
6927 def _real_extract(self, url):
6928 playlist_id = self._match_id(url)
6929 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
6930 url = update_url_query(
6931 'https://www.youtube.com/playlist',
6932 parse_qs(url) or {'list': playlist_id})
6933 if is_music_url:
6934 url = smuggle_url(url, {'is_music_url': True})
6935 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6938 class YoutubeYtBeIE(InfoExtractor):
6939 IE_DESC = 'youtu.be'
6940 _VALID_URL = rf'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P<playlist_id>{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})'
6941 _TESTS = [{
6942 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6943 'info_dict': {
6944 'id': 'yeWKywCrFtk',
6945 'ext': 'mp4',
6946 'title': 'Small Scale Baler and Braiding Rugs',
6947 'uploader': 'Backus-Page House Museum',
6948 'uploader_id': '@backuspagemuseum',
6949 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
6950 'upload_date': '20161008',
6951 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6952 'categories': ['Nonprofits & Activism'],
6953 'tags': list,
6954 'like_count': int,
6955 'age_limit': 0,
6956 'playable_in_embed': True,
6957 'thumbnail': r're:^https?://.*\.webp',
6958 'channel': 'Backus-Page House Museum',
6959 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6960 'live_status': 'not_live',
6961 'view_count': int,
6962 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6963 'availability': 'public',
6964 'duration': 59,
6965 'comment_count': int,
6966 'channel_follower_count': int,
6968 'params': {
6969 'noplaylist': True,
6970 'skip_download': True,
6972 }, {
6973 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
6974 'only_matching': True,
6977 def _real_extract(self, url):
6978 mobj = self._match_valid_url(url)
6979 video_id = mobj.group('id')
6980 playlist_id = mobj.group('playlist_id')
6981 return self.url_result(
6982 update_url_query('https://www.youtube.com/watch', {
6983 'v': video_id,
6984 'list': playlist_id,
6985 'feature': 'youtu.be',
6986 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6989 class YoutubeLivestreamEmbedIE(InfoExtractor):
6990 IE_DESC = 'YouTube livestream embeds'
6991 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6992 _TESTS = [{
6993 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6994 'only_matching': True,
6997 def _real_extract(self, url):
6998 channel_id = self._match_id(url)
6999 return self.url_result(
7000 f'https://www.youtube.com/channel/{channel_id}/live',
7001 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
7004 class YoutubeYtUserIE(InfoExtractor):
7005 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
7006 IE_NAME = 'youtube:user'
7007 _VALID_URL = r'ytuser:(?P<id>.+)'
7008 _TESTS = [{
7009 'url': 'ytuser:phihag',
7010 'only_matching': True,
7013 def _real_extract(self, url):
7014 user_id = self._match_id(url)
7015 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
7018 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
7019 IE_NAME = 'youtube:favorites'
7020 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
7021 _VALID_URL = r':ytfav(?:ou?rite)?s?'
7022 _LOGIN_REQUIRED = True
7023 _TESTS = [{
7024 'url': ':ytfav',
7025 'only_matching': True,
7026 }, {
7027 'url': ':ytfavorites',
7028 'only_matching': True,
7031 def _real_extract(self, url):
7032 return self.url_result(
7033 'https://www.youtube.com/playlist?list=LL',
7034 ie=YoutubeTabIE.ie_key())
7037 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
7038 IE_NAME = 'youtube:notif'
7039 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
7040 _VALID_URL = r':ytnotif(?:ication)?s?'
7041 _LOGIN_REQUIRED = True
7042 _TESTS = [{
7043 'url': ':ytnotif',
7044 'only_matching': True,
7045 }, {
7046 'url': ':ytnotifications',
7047 'only_matching': True,
7050 def _extract_notification_menu(self, response, continuation_list):
7051 notification_list = traverse_obj(
7052 response,
7053 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
7054 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
7055 expected_type=list) or []
7056 continuation_list[0] = None
7057 for item in notification_list:
7058 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
7059 if entry:
7060 yield entry
7061 continuation = item.get('continuationItemRenderer')
7062 if continuation:
7063 continuation_list[0] = continuation
7065 def _extract_notification_renderer(self, notification):
7066 video_id = traverse_obj(
7067 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
7068 url = f'https://www.youtube.com/watch?v={video_id}'
7069 channel_id = None
7070 if not video_id:
7071 browse_ep = traverse_obj(
7072 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
7073 channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))
7074 post_id = self._search_regex(
7075 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
7076 'post id', default=None)
7077 if not channel_id or not post_id:
7078 return
7079 # The direct /post url redirects to this in the browser
7080 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
7082 channel = traverse_obj(
7083 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
7084 expected_type=str)
7085 notification_title = self._get_text(notification, 'shortMessage')
7086 if notification_title:
7087 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
7088 # TODO: handle recommended videos
7089 title = self._search_regex(
7090 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
7091 'video title', default=None)
7092 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
7093 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
7094 else None)
7095 return {
7096 '_type': 'url',
7097 'url': url,
7098 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
7099 'video_id': video_id,
7100 'title': title,
7101 'channel_id': channel_id,
7102 'channel': channel,
7103 'uploader': channel,
7104 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
7105 'timestamp': timestamp,
7108 def _notification_menu_entries(self, ytcfg):
7109 continuation_list = [None]
7110 response = None
7111 for page in itertools.count(1):
7112 ctoken = traverse_obj(
7113 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
7114 response = self._extract_response(
7115 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
7116 ep='notification/get_notification_menu', check_get_keys='actions',
7117 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
7118 yield from self._extract_notification_menu(response, continuation_list)
7119 if not continuation_list[0]:
7120 break
7122 def _real_extract(self, url):
7123 display_id = 'notifications'
7124 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
7125 self._report_playlist_authcheck(ytcfg)
7126 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
7129 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
7130 IE_DESC = 'YouTube search'
7131 IE_NAME = 'youtube:search'
7132 _SEARCH_KEY = 'ytsearch'
7133 _SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only
7134 _TESTS = [{
7135 'url': 'ytsearch5:youtube-dl test video',
7136 'playlist_count': 5,
7137 'info_dict': {
7138 'id': 'youtube-dl test video',
7139 'title': 'youtube-dl test video',
7141 }, {
7142 'note': 'Suicide/self-harm search warning',
7143 'url': 'ytsearch1:i hate myself and i wanna die',
7144 'playlist_count': 1,
7145 'info_dict': {
7146 'id': 'i hate myself and i wanna die',
7147 'title': 'i hate myself and i wanna die',
7152 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
7153 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
7154 _SEARCH_KEY = 'ytsearchdate'
7155 IE_DESC = 'YouTube search, newest videos first'
7156 _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
7157 _TESTS = [{
7158 'url': 'ytsearchdate5:youtube-dl test video',
7159 'playlist_count': 5,
7160 'info_dict': {
7161 'id': 'youtube-dl test video',
7162 'title': 'youtube-dl test video',
7167 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
7168 IE_DESC = 'YouTube search URLs with sorting and filter support'
7169 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
7170 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7171 _TESTS = [{
7172 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
7173 'playlist_mincount': 5,
7174 'info_dict': {
7175 'id': 'youtube-dl test video',
7176 'title': 'youtube-dl test video',
7178 }, {
7179 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
7180 'playlist_mincount': 5,
7181 'info_dict': {
7182 'id': 'python',
7183 'title': 'python',
7185 }, {
7186 'url': 'https://www.youtube.com/results?search_query=%23cats',
7187 'playlist_mincount': 1,
7188 'info_dict': {
7189 'id': '#cats',
7190 'title': '#cats',
7191 # The test suite does not have support for nested playlists
7192 # 'entries': [{
7193 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
7194 # 'title': '#cats',
7195 # }],
7197 }, {
7198 # Channel results
7199 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
7200 'info_dict': {
7201 'id': 'kurzgesagt',
7202 'title': 'kurzgesagt',
7204 'playlist': [{
7205 'info_dict': {
7206 '_type': 'url',
7207 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7208 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7209 'ie_key': 'YoutubeTab',
7210 'channel': 'Kurzgesagt – In a Nutshell',
7211 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
7212 'title': 'Kurzgesagt – In a Nutshell',
7213 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7214 # No longer available for search as it is set to the handle.
7215 # 'playlist_count': int,
7216 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7217 'thumbnails': list,
7218 'uploader_id': '@kurzgesagt',
7219 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
7220 'uploader': 'Kurzgesagt – In a Nutshell',
7221 'channel_is_verified': True,
7222 'channel_follower_count': int,
7225 'params': {'extract_flat': True, 'playlist_items': '1'},
7226 'playlist_mincount': 1,
7227 }, {
7228 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
7229 'only_matching': True,
7232 def _real_extract(self, url):
7233 qs = parse_qs(url)
7234 query = (qs.get('search_query') or qs.get('q'))[0]
7235 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
7238 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
7239 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
7240 IE_NAME = 'youtube:music:search_url'
7241 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7242 _TESTS = [{
7243 'url': 'https://music.youtube.com/search?q=royalty+free+music',
7244 'playlist_count': 16,
7245 'info_dict': {
7246 'id': 'royalty free music',
7247 'title': 'royalty free music',
7249 }, {
7250 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
7251 'playlist_mincount': 30,
7252 'info_dict': {
7253 'id': 'royalty free music - songs',
7254 'title': 'royalty free music - songs',
7256 'params': {'extract_flat': 'in_playlist'},
7257 }, {
7258 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
7259 'playlist_mincount': 30,
7260 'info_dict': {
7261 'id': 'royalty free music - community playlists',
7262 'title': 'royalty free music - community playlists',
7264 'params': {'extract_flat': 'in_playlist'},
7267 _SECTIONS = {
7268 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
7269 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
7270 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
7271 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
7272 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
7273 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
7276 def _real_extract(self, url):
7277 qs = parse_qs(url)
7278 query = (qs.get('search_query') or qs.get('q'))[0]
7279 params = qs.get('sp', (None,))[0]
7280 if params:
7281 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
7282 else:
7283 section = urllib.parse.unquote_plus(([*url.split('#'), ''])[1]).lower()
7284 params = self._SECTIONS.get(section)
7285 if not params:
7286 section = None
7287 title = join_nonempty(query, section, delim=' - ')
7288 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
7291 class YoutubeFeedsInfoExtractor(InfoExtractor):
7293 Base class for feed extractors
7294 Subclasses must re-define the _FEED_NAME property.
7296 _LOGIN_REQUIRED = True
7297 _FEED_NAME = 'feeds'
7299 def _real_initialize(self):
7300 YoutubeBaseInfoExtractor._check_login_required(self)
7302 @classproperty
7303 def IE_NAME(cls):
7304 return f'youtube:{cls._FEED_NAME}'
7306 def _real_extract(self, url):
7307 return self.url_result(
7308 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
7311 class YoutubeWatchLaterIE(InfoExtractor):
7312 IE_NAME = 'youtube:watchlater'
7313 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
7314 _VALID_URL = r':ytwatchlater'
7315 _TESTS = [{
7316 'url': ':ytwatchlater',
7317 'only_matching': True,
7320 def _real_extract(self, url):
7321 return self.url_result(
7322 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
7325 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
7326 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
7327 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
7328 _FEED_NAME = 'recommended'
7329 _LOGIN_REQUIRED = False
7330 _TESTS = [{
7331 'url': ':ytrec',
7332 'only_matching': True,
7333 }, {
7334 'url': ':ytrecommended',
7335 'only_matching': True,
7336 }, {
7337 'url': 'https://youtube.com',
7338 'only_matching': True,
7342 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
7343 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
7344 _VALID_URL = r':ytsub(?:scription)?s?'
7345 _FEED_NAME = 'subscriptions'
7346 _TESTS = [{
7347 'url': ':ytsubs',
7348 'only_matching': True,
7349 }, {
7350 'url': ':ytsubscriptions',
7351 'only_matching': True,
7355 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
7356 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
7357 _VALID_URL = r':ythis(?:tory)?'
7358 _FEED_NAME = 'history'
7359 _TESTS = [{
7360 'url': ':ythistory',
7361 'only_matching': True,
7365 class YoutubeShortsAudioPivotIE(InfoExtractor):
7366 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
7367 IE_NAME = 'youtube:shorts:pivot:audio'
7368 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
7369 _TESTS = [{
7370 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
7371 'only_matching': True,
7374 @staticmethod
7375 def _generate_audio_pivot_params(video_id):
7377 Generates sfv_audio_pivot browse params for this video id
7379 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
7380 return urllib.parse.quote(base64.b64encode(pb_params).decode())
7382 def _real_extract(self, url):
7383 video_id = self._match_id(url)
7384 return self.url_result(
7385 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
7386 ie=YoutubeTabIE)
7389 class YoutubeTruncatedURLIE(InfoExtractor):
7390 IE_NAME = 'youtube:truncated_url'
7391 IE_DESC = False # Do not list
7392 _VALID_URL = r'''(?x)
7393 (?:https?://)?
7394 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
7395 (?:watch\?(?:
7396 feature=[a-z_]+|
7397 annotation_id=annotation_[^&]+|
7398 x-yt-cl=[0-9]+|
7399 hl=[^&]*|
7400 t=[0-9]+
7403 attribution_link\?a=[^&]+
7408 _TESTS = [{
7409 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
7410 'only_matching': True,
7411 }, {
7412 'url': 'https://www.youtube.com/watch?',
7413 'only_matching': True,
7414 }, {
7415 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
7416 'only_matching': True,
7417 }, {
7418 'url': 'https://www.youtube.com/watch?feature=foo',
7419 'only_matching': True,
7420 }, {
7421 'url': 'https://www.youtube.com/watch?hl=en-GB',
7422 'only_matching': True,
7423 }, {
7424 'url': 'https://www.youtube.com/watch?t=2372',
7425 'only_matching': True,
7428 def _real_extract(self, url):
7429 raise ExtractorError(
7430 'Did you forget to quote the URL? Remember that & is a meta '
7431 'character in most shells, so you want to put the URL in quotes, '
7432 'like youtube-dl '
7433 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
7434 ' or simply youtube-dl BaW_jenozKc .',
7435 expected=True)
7438 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
7439 IE_NAME = 'youtube:clip'
7440 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
7441 _TESTS = [{
7442 # FIXME: Other metadata should be extracted from the clip, not from the base video
7443 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
7444 'info_dict': {
7445 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
7446 'ext': 'mp4',
7447 'section_start': 29.0,
7448 'section_end': 39.7,
7449 'duration': 10.7,
7450 'age_limit': 0,
7451 'availability': 'public',
7452 'categories': ['Gaming'],
7453 'channel': 'Scott The Woz',
7454 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
7455 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
7456 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
7457 'like_count': int,
7458 'playable_in_embed': True,
7459 'tags': 'count:17',
7460 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
7461 'title': 'Mobile Games on Console - Scott The Woz',
7462 'upload_date': '20210920',
7463 'uploader': 'Scott The Woz',
7464 'uploader_id': '@ScottTheWoz',
7465 'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
7466 'view_count': int,
7467 'live_status': 'not_live',
7468 'channel_follower_count': int,
7469 'chapters': 'count:20',
7470 'comment_count': int,
7471 'heatmap': 'count:100',
7475 def _real_extract(self, url):
7476 clip_id = self._match_id(url)
7477 _, data = self._extract_webpage(url, clip_id)
7479 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
7480 if not video_id:
7481 raise ExtractorError('Unable to find video ID')
7483 clip_data = traverse_obj(data, (
7484 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
7485 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
7486 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
7487 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
7489 return {
7490 '_type': 'url_transparent',
7491 'url': f'https://www.youtube.com/watch?v={video_id}',
7492 'ie_key': YoutubeIE.ie_key(),
7493 'id': clip_id,
7494 'section_start': int(clip_data['startTimeMs']) / 1000,
7495 'section_end': int(clip_data['endTimeMs']) / 1000,
7499 class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
7500 IE_NAME = 'youtube:consent'
7501 IE_DESC = False # Do not list
7502 _VALID_URL = r'https?://consent\.youtube\.com/m\?'
7503 _TESTS = [{
7504 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
7505 'info_dict': {
7506 'id': 'qVv6vCqciTM',
7507 'ext': 'mp4',
7508 'age_limit': 0,
7509 'uploader_id': '@sana_natori',
7510 'comment_count': int,
7511 'chapters': 'count:13',
7512 'upload_date': '20221223',
7513 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
7514 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
7515 'uploader_url': 'https://www.youtube.com/@sana_natori',
7516 'like_count': int,
7517 'release_date': '20221223',
7518 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
7519 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
7520 'view_count': int,
7521 'playable_in_embed': True,
7522 'duration': 4438,
7523 'availability': 'public',
7524 'channel_follower_count': int,
7525 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
7526 'categories': ['Entertainment'],
7527 'live_status': 'was_live',
7528 'release_timestamp': 1671793345,
7529 'channel': 'さなちゃんねる',
7530 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
7531 'uploader': 'さなちゃんねる',
7532 'channel_is_verified': True,
7533 'heatmap': 'count:100',
7535 'add_ie': ['Youtube'],
7536 'params': {'skip_download': 'Youtube'},
7539 def _real_extract(self, url):
7540 redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
7541 if not redirect_url:
7542 raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
7543 return self.url_result(redirect_url)
7546 class YoutubeTruncatedIDIE(InfoExtractor):
7547 IE_NAME = 'youtube:truncated_id'
7548 IE_DESC = False # Do not list
7549 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
7551 _TESTS = [{
7552 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7553 'only_matching': True,
7556 def _real_extract(self, url):
7557 video_id = self._match_id(url)
7558 raise ExtractorError(
7559 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
7560 expected=True)