[build] Bump PyInstaller version pin to `>=6.11.1` (#11507)
[yt-dlp3.git] / yt_dlp / YoutubeDL.py
blob3186a999deb877ec457bff809b4780bac3e85395
1 import collections
2 import contextlib
3 import copy
4 import datetime as dt
5 import errno
6 import fileinput
7 import functools
8 import http.cookiejar
9 import io
10 import itertools
11 import json
12 import locale
13 import operator
14 import os
15 import random
16 import re
17 import shutil
18 import string
19 import subprocess
20 import sys
21 import tempfile
22 import time
23 import tokenize
24 import traceback
25 import unicodedata
27 from .cache import Cache
28 from .compat import urllib # isort: split
29 from .compat import compat_os_name, urllib_req_to_req
30 from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies
31 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
32 from .downloader.rtmp import rtmpdump_version
33 from .extractor import gen_extractor_classes, get_info_extractor
34 from .extractor.common import UnsupportedURLIE
35 from .extractor.openload import PhantomJSwrapper
36 from .minicurses import format_text
37 from .networking import HEADRequest, Request, RequestDirector
38 from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
39 from .networking.exceptions import (
40 HTTPError,
41 NoSupportingHandlers,
42 RequestError,
43 SSLError,
44 network_exceptions,
46 from .networking.impersonate import ImpersonateRequestHandler
47 from .plugins import directories as plugin_directories
48 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
49 from .postprocessor import (
50 EmbedThumbnailPP,
51 FFmpegFixupDuplicateMoovPP,
52 FFmpegFixupDurationPP,
53 FFmpegFixupM3u8PP,
54 FFmpegFixupM4aPP,
55 FFmpegFixupStretchedPP,
56 FFmpegFixupTimestampPP,
57 FFmpegMergerPP,
58 FFmpegPostProcessor,
59 FFmpegVideoConvertorPP,
60 MoveFilesAfterDownloadPP,
61 get_postprocessor,
63 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
64 from .update import (
65 REPOSITORY,
66 _get_system_deprecation,
67 _make_label,
68 current_git_head,
69 detect_variant,
71 from .utils import (
72 DEFAULT_OUTTMPL,
73 IDENTITY,
74 LINK_TEMPLATES,
75 MEDIA_EXTENSIONS,
76 NO_DEFAULT,
77 NUMBER_RE,
78 OUTTMPL_TYPES,
79 POSTPROCESS_WHEN,
80 STR_FORMAT_RE_TMPL,
81 STR_FORMAT_TYPES,
82 ContentTooShortError,
83 DateRange,
84 DownloadCancelled,
85 DownloadError,
86 EntryNotInPlaylist,
87 ExistingVideoReached,
88 ExtractorError,
89 FormatSorter,
90 GeoRestrictedError,
91 ISO3166Utils,
92 LazyList,
93 MaxDownloadsReached,
94 Namespace,
95 PagedList,
96 PlaylistEntries,
97 Popen,
98 PostProcessingError,
99 ReExtractInfo,
100 RejectedVideoReached,
101 SameFileError,
102 UnavailableVideoError,
103 UserNotLive,
104 YoutubeDLError,
105 age_restricted,
106 bug_reports_message,
107 date_from_str,
108 deprecation_warning,
109 determine_ext,
110 determine_protocol,
111 encode_compat_str,
112 encodeFilename,
113 escapeHTML,
114 expand_path,
115 extract_basic_auth,
116 filter_dict,
117 float_or_none,
118 format_bytes,
119 format_decimal_suffix,
120 format_field,
121 formatSeconds,
122 get_compatible_ext,
123 get_domain,
124 int_or_none,
125 iri_to_uri,
126 is_path_like,
127 join_nonempty,
128 locked_file,
129 make_archive_id,
130 make_dir,
131 number_of_digits,
132 orderedSet,
133 orderedSet_from_options,
134 parse_filesize,
135 preferredencoding,
136 prepend_extension,
137 remove_terminal_sequences,
138 render_table,
139 replace_extension,
140 sanitize_filename,
141 sanitize_path,
142 sanitize_url,
143 shell_quote,
144 str_or_none,
145 strftime_or_none,
146 subtitles_filename,
147 supports_terminal_sequences,
148 system_identifier,
149 filesize_from_tbr,
150 timetuple_from_msec,
151 to_high_limit_path,
152 traverse_obj,
153 try_call,
154 try_get,
155 url_basename,
156 variadic,
157 windows_enable_vt_mode,
158 write_json_file,
159 write_string,
161 from .utils._utils import _UnsafeExtensionError, _YDLLogger
162 from .utils.networking import (
163 HTTPHeaderDict,
164 clean_headers,
165 clean_proxies,
166 std_headers,
168 from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
170 if compat_os_name == 'nt':
171 import ctypes
174 def _catch_unsafe_extension_error(func):
175 @functools.wraps(func)
176 def wrapper(self, *args, **kwargs):
177 try:
178 return func(self, *args, **kwargs)
179 except _UnsafeExtensionError as error:
180 self.report_error(
181 f'The extracted extension ({error.extension!r}) is unusual '
182 'and will be skipped for safety reasons. '
183 f'If you believe this is an error{bug_reports_message(",")}')
185 return wrapper
188 class YoutubeDL:
189 """YoutubeDL class.
191 YoutubeDL objects are the ones responsible of downloading the
192 actual video file and writing it to disk if the user has requested
193 it, among some other tasks. In most cases there should be one per
194 program. As, given a video URL, the downloader doesn't know how to
195 extract all the needed information, task that InfoExtractors do, it
196 has to pass the URL to one of them.
198 For this, YoutubeDL objects have a method that allows
199 InfoExtractors to be registered in a given order. When it is passed
200 a URL, the YoutubeDL object handles it to the first InfoExtractor it
201 finds that reports being able to handle it. The InfoExtractor extracts
202 all the information about the video or videos the URL refers to, and
203 YoutubeDL process the extracted information, possibly using a File
204 Downloader to download the video.
206 YoutubeDL objects accept a lot of parameters. In order not to saturate
207 the object constructor with arguments, it receives a dictionary of
208 options instead. These options are available through the params
209 attribute for the InfoExtractors to use. The YoutubeDL also
210 registers itself as the downloader in charge for the InfoExtractors
211 that are added to it, so this is a "mutual registration".
213 Available options:
215 username: Username for authentication purposes.
216 password: Password for authentication purposes.
217 videopassword: Password for accessing a video.
218 ap_mso: Adobe Pass multiple-system operator identifier.
219 ap_username: Multiple-system operator account username.
220 ap_password: Multiple-system operator account password.
221 usenetrc: Use netrc for authentication instead.
222 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
223 netrc_cmd: Use a shell command to get credentials
224 verbose: Print additional info to stdout.
225 quiet: Do not print messages to stdout.
226 no_warnings: Do not print out anything for warnings.
227 forceprint: A dict with keys WHEN mapped to a list of templates to
228 print to stdout. The allowed keys are video or any of the
229 items in utils.POSTPROCESS_WHEN.
230 For compatibility, a single list is also accepted
231 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
232 a list of tuples with (template, filename)
233 forcejson: Force printing info_dict as JSON.
234 dump_single_json: Force printing the info_dict of the whole playlist
235 (or video) as a single JSON line.
236 force_write_download_archive: Force writing download archive regardless
237 of 'skip_download' or 'simulate'.
238 simulate: Do not download the video files. If unset (or None),
239 simulate only if listsubtitles, listformats or list_thumbnails is used
240 format: Video format code. see "FORMAT SELECTION" for more details.
241 You can also pass a function. The function takes 'ctx' as
242 argument and returns the formats to download.
243 See "build_format_selector" for an implementation
244 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
245 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
246 extracting metadata even if the video is not actually
247 available for download (experimental)
248 format_sort: A list of fields by which to sort the video formats.
249 See "Sorting Formats" for more details.
250 format_sort_force: Force the given format_sort. see "Sorting Formats"
251 for more details.
252 prefer_free_formats: Whether to prefer video formats with free containers
253 over non-free ones of the same quality.
254 allow_multiple_video_streams: Allow multiple video streams to be merged
255 into a single file
256 allow_multiple_audio_streams: Allow multiple audio streams to be merged
257 into a single file
258 check_formats Whether to test if the formats are downloadable.
259 Can be True (check all), False (check none),
260 'selected' (check selected formats),
261 or None (check only if requested by extractor)
262 paths: Dictionary of output paths. The allowed keys are 'home'
263 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
264 outtmpl: Dictionary of templates for output names. Allowed keys
265 are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
266 For compatibility with youtube-dl, a single string can also be used
267 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
268 restrictfilenames: Do not allow "&" and spaces in file names
269 trim_file_name: Limit length of filename (extension excluded)
270 windowsfilenames: Force the filenames to be windows compatible
271 ignoreerrors: Do not stop on download/postprocessing errors.
272 Can be 'only_download' to ignore only download errors.
273 Default is 'only_download' for CLI, but False for API
274 skip_playlist_after_errors: Number of allowed failures until the rest of
275 the playlist is skipped
276 allowed_extractors: List of regexes to match against extractor names that are allowed
277 overwrites: Overwrite all video and metadata files if True,
278 overwrite only non-video files if None
279 and don't overwrite any file if False
280 playlist_items: Specific indices of playlist to download.
281 playlistrandom: Download playlist items in random order.
282 lazy_playlist: Process playlist entries as they are received.
283 matchtitle: Download only matching titles.
284 rejecttitle: Reject downloads for matching titles.
285 logger: Log messages to a logging.Logger instance.
286 logtostderr: Print everything to stderr instead of stdout.
287 consoletitle: Display progress in the console window's titlebar.
288 writedescription: Write the video description to a .description file
289 writeinfojson: Write the video description to a .info.json file
290 clean_infojson: Remove internal metadata from the infojson
291 getcomments: Extract video comments. This will not be written to disk
292 unless writeinfojson is also given
293 writeannotations: Write the video annotations to a .annotations.xml file
294 writethumbnail: Write the thumbnail image to a file
295 allow_playlist_files: Whether to write playlists' description, infojson etc
296 also to disk when using the 'write*' options
297 write_all_thumbnails: Write all thumbnail formats to files
298 writelink: Write an internet shortcut file, depending on the
299 current platform (.url/.webloc/.desktop)
300 writeurllink: Write a Windows internet shortcut file (.url)
301 writewebloclink: Write a macOS internet shortcut file (.webloc)
302 writedesktoplink: Write a Linux internet shortcut file (.desktop)
303 writesubtitles: Write the video subtitles to a file
304 writeautomaticsub: Write the automatically generated subtitles to a file
305 listsubtitles: Lists all available subtitles for the video
306 subtitlesformat: The format code for subtitles
307 subtitleslangs: List of languages of the subtitles to download (can be regex).
308 The list may contain "all" to refer to all the available
309 subtitles. The language can be prefixed with a "-" to
310 exclude it from the requested languages, e.g. ['all', '-live_chat']
311 keepvideo: Keep the video file after post-processing
312 daterange: A utils.DateRange object, download only if the upload_date is in the range.
313 skip_download: Skip the actual download of the video file
314 cachedir: Location of the cache files in the filesystem.
315 False to disable filesystem cache.
316 noplaylist: Download single video instead of a playlist if in doubt.
317 age_limit: An integer representing the user's age in years.
318 Unsuitable videos for the given age are skipped.
319 min_views: An integer representing the minimum view count the video
320 must have in order to not be skipped.
321 Videos without view count information are always
322 downloaded. None for no limit.
323 max_views: An integer representing the maximum view count.
324 Videos that are more popular than that are not
325 downloaded.
326 Videos without view count information are always
327 downloaded. None for no limit.
328 download_archive: A set, or the name of a file where all downloads are recorded.
329 Videos already present in the file are not downloaded again.
330 break_on_existing: Stop the download process after attempting to download a
331 file that is in the archive.
332 break_per_url: Whether break_on_reject and break_on_existing
333 should act on each input URL as opposed to for the entire queue
334 cookiefile: File name or text stream from where cookies should be read and dumped to
335 cookiesfrombrowser: A tuple containing the name of the browser, the profile
336 name/path from where cookies are loaded, the name of the keyring,
337 and the container name, e.g. ('chrome', ) or
338 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
339 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
340 support RFC 5746 secure renegotiation
341 nocheckcertificate: Do not verify SSL certificates
342 client_certificate: Path to client certificate file in PEM format. May include the private key
343 client_certificate_key: Path to private key file for client certificate
344 client_certificate_password: Password for client certificate private key, if encrypted.
345 If not provided and the key is encrypted, yt-dlp will ask interactively
346 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
347 (Only supported by some extractors)
348 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
349 http_headers: A dictionary of custom headers to be used for all requests
350 proxy: URL of the proxy server to use
351 geo_verification_proxy: URL of the proxy to use for IP address verification
352 on geo-restricted sites.
353 socket_timeout: Time to wait for unresponsive hosts, in seconds
354 bidi_workaround: Work around buggy terminals without bidirectional text
355 support, using fridibi
356 debug_printtraffic:Print out sent and received HTTP traffic
357 default_search: Prepend this string if an input url is not valid.
358 'auto' for elaborate guessing
359 encoding: Use this encoding instead of the system-specified.
360 extract_flat: Whether to resolve and process url_results further
361 * False: Always process. Default for API
362 * True: Never process
363 * 'in_playlist': Do not process inside playlist/multi_video
364 * 'discard': Always process, but don't return the result
365 from inside playlist/multi_video
366 * 'discard_in_playlist': Same as "discard", but only for
367 playlists (not multi_video). Default for CLI
368 wait_for_video: If given, wait for scheduled streams to become available.
369 The value should be a tuple containing the range
370 (min_secs, max_secs) to wait between retries
371 postprocessors: A list of dictionaries, each with an entry
372 * key: The name of the postprocessor. See
373 yt_dlp/postprocessor/__init__.py for a list.
374 * when: When to run the postprocessor. Allowed values are
375 the entries of utils.POSTPROCESS_WHEN
376 Assumed to be 'post_process' if not given
377 progress_hooks: A list of functions that get called on download
378 progress, with a dictionary with the entries
379 * status: One of "downloading", "error", or "finished".
380 Check this first and ignore unknown values.
381 * info_dict: The extracted info_dict
383 If status is one of "downloading", or "finished", the
384 following properties may also be present:
385 * filename: The final filename (always present)
386 * tmpfilename: The filename we're currently writing to
387 * downloaded_bytes: Bytes on disk
388 * total_bytes: Size of the whole file, None if unknown
389 * total_bytes_estimate: Guess of the eventual file size,
390 None if unavailable.
391 * elapsed: The number of seconds since download started.
392 * eta: The estimated time in seconds, None if unknown
393 * speed: The download speed in bytes/second, None if
394 unknown
395 * fragment_index: The counter of the currently
396 downloaded video fragment.
397 * fragment_count: The number of fragments (= individual
398 files that will be merged)
400 Progress hooks are guaranteed to be called at least once
401 (with status "finished") if the download is successful.
402 postprocessor_hooks: A list of functions that get called on postprocessing
403 progress, with a dictionary with the entries
404 * status: One of "started", "processing", or "finished".
405 Check this first and ignore unknown values.
406 * postprocessor: Name of the postprocessor
407 * info_dict: The extracted info_dict
409 Progress hooks are guaranteed to be called at least twice
410 (with status "started" and "finished") if the processing is successful.
411 merge_output_format: "/" separated list of extensions to use when merging formats.
412 final_ext: Expected final extension; used to detect when the file was
413 already downloaded and converted
414 fixup: Automatically correct known faults of the file.
415 One of:
416 - "never": do nothing
417 - "warn": only emit a warning
418 - "detect_or_warn": check whether we can do anything
419 about it, warn otherwise (default)
420 source_address: Client-side IP address to bind to.
421 impersonate: Client to impersonate for requests.
422 An ImpersonateTarget (from yt_dlp.networking.impersonate)
423 sleep_interval_requests: Number of seconds to sleep between requests
424 during extraction
425 sleep_interval: Number of seconds to sleep before each download when
426 used alone or a lower bound of a range for randomized
427 sleep before each download (minimum possible number
428 of seconds to sleep) when used along with
429 max_sleep_interval.
430 max_sleep_interval:Upper bound of a range for randomized sleep before each
431 download (maximum possible number of seconds to sleep).
432 Must only be used along with sleep_interval.
433 Actual sleep time will be a random float from range
434 [sleep_interval; max_sleep_interval].
435 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
436 listformats: Print an overview of available video formats and exit.
437 list_thumbnails: Print a table of all thumbnails and exit.
438 match_filter: A function that gets called for every video with the signature
439 (info_dict, *, incomplete: bool) -> Optional[str]
440 For backward compatibility with youtube-dl, the signature
441 (info_dict) -> Optional[str] is also allowed.
442 - If it returns a message, the video is ignored.
443 - If it returns None, the video is downloaded.
444 - If it returns utils.NO_DEFAULT, the user is interactively
445 asked whether to download the video.
446 - Raise utils.DownloadCancelled(msg) to abort remaining
447 downloads when a video is rejected.
448 match_filter_func in utils/_utils.py is one example for this.
449 color: A Dictionary with output stream names as keys
450 and their respective color policy as values.
451 Can also just be a single color policy,
452 in which case it applies to all outputs.
453 Valid stream names are 'stdout' and 'stderr'.
454 Valid color policies are one of 'always', 'auto',
455 'no_color', 'never', 'auto-tty' or 'no_color-tty'.
456 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
457 HTTP header
458 geo_bypass_country:
459 Two-letter ISO 3166-2 country code that will be used for
460 explicit geographic restriction bypassing via faking
461 X-Forwarded-For HTTP header
462 geo_bypass_ip_block:
463 IP range in CIDR notation that will be used similarly to
464 geo_bypass_country
465 external_downloader: A dictionary of protocol keys and the executable of the
466 external downloader to use for it. The allowed protocols
467 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
468 Set the value to 'native' to use the native downloader
469 compat_opts: Compatibility options. See "Differences in default behavior".
470 The following options do not work when used through the API:
471 filename, abort-on-error, multistreams, no-live-chat,
472 format-sort, no-clean-infojson, no-playlist-metafiles,
473 no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort.
474 Refer __init__.py for their implementation
475 progress_template: Dictionary of templates for progress outputs.
476 Allowed keys are 'download', 'postprocess',
477 'download-title' (console title) and 'postprocess-title'.
478 The template is mapped on a dictionary with keys 'progress' and 'info'
479 retry_sleep_functions: Dictionary of functions that takes the number of attempts
480 as argument and returns the time to sleep in seconds.
481 Allowed keys are 'http', 'fragment', 'file_access'
482 download_ranges: A callback function that gets called for every video with
483 the signature (info_dict, ydl) -> Iterable[Section].
484 Only the returned sections will be downloaded.
485 Each Section is a dict with the following keys:
486 * start_time: Start time of the section in seconds
487 * end_time: End time of the section in seconds
488 * title: Section title (Optional)
489 * index: Section number (Optional)
490 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
491 noprogress: Do not print the progress bar
492 live_from_start: Whether to download livestreams videos from the start
494 The following parameters are not used by YoutubeDL itself, they are used by
495 the downloader (see yt_dlp/downloader/common.py):
496 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
497 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
498 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
499 external_downloader_args, concurrent_fragment_downloads, progress_delta.
501 The following options are used by the post processors:
502 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
503 to the binary or its containing directory.
504 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
505 and a list of additional command-line arguments for the
506 postprocessor/executable. The dict can also have "PP+EXE" keys
507 which are used when the given exe is used by the given PP.
508 Use 'default' as the name for arguments to passed to all PP
509 For compatibility with youtube-dl, a single list of args
510 can also be used
512 The following options are used by the extractors:
513 extractor_retries: Number of times to retry for known errors (default: 3)
514 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
515 hls_split_discontinuity: Split HLS playlists into different formats at
516 discontinuities such as ad breaks (default: False)
517 extractor_args: A dictionary of arguments to be passed to the extractors.
518 See "EXTRACTOR ARGUMENTS" for details.
519 E.g. {'youtube': {'skip': ['dash', 'hls']}}
520 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
522 The following options are deprecated and may be removed in the future:
524 break_on_reject: Stop the download process when encountering a video that
525 has been filtered out.
526 - `raise DownloadCancelled(msg)` in match_filter instead
527 force_generic_extractor: Force downloader to use the generic extractor
528 - Use allowed_extractors = ['generic', 'default']
529 playliststart: - Use playlist_items
530 Playlist item to start at.
531 playlistend: - Use playlist_items
532 Playlist item to end at.
533 playlistreverse: - Use playlist_items
534 Download playlist items in reverse order.
535 forceurl: - Use forceprint
536 Force printing final URL.
537 forcetitle: - Use forceprint
538 Force printing title.
539 forceid: - Use forceprint
540 Force printing ID.
541 forcethumbnail: - Use forceprint
542 Force printing thumbnail URL.
543 forcedescription: - Use forceprint
544 Force printing description.
545 forcefilename: - Use forceprint
546 Force printing final filename.
547 forceduration: - Use forceprint
548 Force printing duration.
549 allsubtitles: - Use subtitleslangs = ['all']
550 Downloads all the subtitles of the video
551 (requires writesubtitles or writeautomaticsub)
552 include_ads: - Doesn't work
553 Download ads as well
554 call_home: - Not implemented
555 Boolean, true if we are allowed to contact the
556 yt-dlp servers for debugging.
557 post_hooks: - Register a custom postprocessor
558 A list of functions that get called as the final step
559 for each video file, after all postprocessors have been
560 called. The filename will be passed as the only argument.
561 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
562 Use the native HLS downloader instead of ffmpeg/avconv
563 if True, otherwise use ffmpeg/avconv if False, otherwise
564 use downloader suggested by extractor if None.
565 prefer_ffmpeg: - avconv support is deprecated
566 If False, use avconv instead of ffmpeg if both are available,
567 otherwise prefer ffmpeg.
568 youtube_include_dash_manifest: - Use extractor_args
569 If True (default), DASH manifests and related
570 data will be downloaded and processed by extractor.
571 You can reduce network I/O by disabling it if you don't
572 care about DASH. (only for youtube)
573 youtube_include_hls_manifest: - Use extractor_args
574 If True (default), HLS manifests and related
575 data will be downloaded and processed by extractor.
576 You can reduce network I/O by disabling it if you don't
577 care about HLS. (only for youtube)
578 no_color: Same as `color='no_color'`
579 no_overwrites: Same as `overwrites=False`
582 _NUMERIC_FIELDS = {
583 'width', 'height', 'asr', 'audio_channels', 'fps',
584 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
585 'timestamp', 'release_timestamp',
586 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
587 'average_rating', 'comment_count', 'age_limit',
588 'start_time', 'end_time',
589 'chapter_number', 'season_number', 'episode_number',
590 'track_number', 'disc_number', 'release_year',
593 _format_fields = {
594 # NB: Keep in sync with the docstring of extractor/common.py
595 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
596 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
597 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
598 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
599 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
600 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url',
601 'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version',
602 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
604 _deprecated_multivalue_fields = {
605 'album_artist': 'album_artists',
606 'artist': 'artists',
607 'composer': 'composers',
608 'creator': 'creators',
609 'genre': 'genres',
611 _format_selection_exts = {
612 'audio': set(MEDIA_EXTENSIONS.common_audio),
613 'video': {*MEDIA_EXTENSIONS.common_video, '3gp'},
614 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
617 def __init__(self, params=None, auto_init=True):
618 """Create a FileDownloader object with the given options.
619 @param auto_init Whether to load the default extractors and print header (if verbose).
620 Set to 'no_verbose_header' to not print the header
622 if params is None:
623 params = {}
624 self.params = params
625 self._ies = {}
626 self._ies_instances = {}
627 self._pps = {k: [] for k in POSTPROCESS_WHEN}
628 self._printed_messages = set()
629 self._first_webpage_request = True
630 self._post_hooks = []
631 self._progress_hooks = []
632 self._postprocessor_hooks = []
633 self._download_retcode = 0
634 self._num_downloads = 0
635 self._num_videos = 0
636 self._playlist_level = 0
637 self._playlist_urls = set()
638 self.cache = Cache(self)
639 self.__header_cookies = []
641 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
642 self._out_files = Namespace(
643 out=stdout,
644 error=sys.stderr,
645 screen=sys.stderr if self.params.get('quiet') else stdout,
646 console=None if compat_os_name == 'nt' else next(
647 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None),
650 try:
651 windows_enable_vt_mode()
652 except Exception as e:
653 self.write_debug(f'Failed to enable VT mode: {e}')
655 if self.params.get('no_color'):
656 if self.params.get('color') is not None:
657 self.params.setdefault('_warnings', []).append(
658 'Overwriting params from "color" with "no_color"')
659 self.params['color'] = 'no_color'
661 term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
662 base_no_color = bool(os.getenv('NO_COLOR'))
664 def process_color_policy(stream):
665 stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
666 policy = traverse_obj(self.params, ('color', (stream_name, None), {str}, any)) or 'auto'
667 if policy in ('auto', 'auto-tty', 'no_color-tty'):
668 no_color = base_no_color
669 if policy.endswith('tty'):
670 no_color = policy.startswith('no_color')
671 if term_allow_color and supports_terminal_sequences(stream):
672 return 'no_color' if no_color else True
673 return False
674 assert policy in ('always', 'never', 'no_color'), policy
675 return {'always': True, 'never': False}.get(policy, policy)
677 self._allow_colors = Namespace(**{
678 name: process_color_policy(stream)
679 for name, stream in self._out_files.items_ if name != 'console'
682 system_deprecation = _get_system_deprecation()
683 if system_deprecation:
684 self.deprecated_feature(system_deprecation.replace('\n', '\n '))
686 if self.params.get('allow_unplayable_formats'):
687 self.report_warning(
688 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
689 'This is a developer option intended for debugging. \n'
690 ' If you experience any issues while using this option, '
691 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
693 if self.params.get('bidi_workaround', False):
694 try:
695 import pty
696 master, slave = pty.openpty()
697 width = shutil.get_terminal_size().columns
698 width_args = [] if width is None else ['-w', str(width)]
699 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
700 try:
701 self._output_process = Popen(['bidiv', *width_args], **sp_kwargs)
702 except OSError:
703 self._output_process = Popen(['fribidi', '-c', 'UTF-8', *width_args], **sp_kwargs)
704 self._output_channel = os.fdopen(master, 'rb')
705 except OSError as ose:
706 if ose.errno == errno.ENOENT:
707 self.report_warning(
708 'Could not find fribidi executable, ignoring --bidi-workaround. '
709 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
710 else:
711 raise
713 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
714 self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
715 self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
716 self.params['http_headers'].pop('Cookie', None)
718 if auto_init and auto_init != 'no_verbose_header':
719 self.print_debug_header()
721 def check_deprecated(param, option, suggestion):
722 if self.params.get(param) is not None:
723 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
724 return True
725 return False
727 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
728 if self.params.get('geo_verification_proxy') is None:
729 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
731 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
732 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
733 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
735 for msg in self.params.get('_warnings', []):
736 self.report_warning(msg)
737 for msg in self.params.get('_deprecation_warnings', []):
738 self.deprecated_feature(msg)
740 if impersonate_target := self.params.get('impersonate'):
741 if not self._impersonate_target_available(impersonate_target):
742 raise YoutubeDLError(
743 f'Impersonate target "{impersonate_target}" is not available. '
744 f'Use --list-impersonate-targets to see available targets. '
745 f'You may be missing dependencies required to support this target.')
747 if 'list-formats' in self.params['compat_opts']:
748 self.params['listformats_table'] = False
750 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
751 # nooverwrites was unnecessarily changed to overwrites
752 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
753 # This ensures compatibility with both keys
754 self.params['overwrites'] = not self.params['nooverwrites']
755 elif self.params.get('overwrites') is None:
756 self.params.pop('overwrites', None)
757 else:
758 self.params['nooverwrites'] = not self.params['overwrites']
760 if self.params.get('simulate') is None and any((
761 self.params.get('list_thumbnails'),
762 self.params.get('listformats'),
763 self.params.get('listsubtitles'),
765 self.params['simulate'] = 'list_only'
767 self.params.setdefault('forceprint', {})
768 self.params.setdefault('print_to_file', {})
770 # Compatibility with older syntax
771 if not isinstance(params['forceprint'], dict):
772 self.params['forceprint'] = {'video': params['forceprint']}
774 if auto_init:
775 self.add_default_info_extractors()
777 if (sys.platform != 'win32'
778 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
779 and not self.params.get('restrictfilenames', False)):
780 # Unicode filesystem API will throw errors (#1474, #13027)
781 self.report_warning(
782 'Assuming --restrict-filenames since file system encoding '
783 'cannot encode all characters. '
784 'Set the LC_ALL environment variable to fix this.')
785 self.params['restrictfilenames'] = True
787 self._parse_outtmpl()
789 # Creating format selector here allows us to catch syntax errors before the extraction
790 self.format_selector = (
791 self.params.get('format') if self.params.get('format') in (None, '-')
792 else self.params['format'] if callable(self.params['format'])
793 else self.build_format_selector(self.params['format']))
795 hooks = {
796 'post_hooks': self.add_post_hook,
797 'progress_hooks': self.add_progress_hook,
798 'postprocessor_hooks': self.add_postprocessor_hook,
800 for opt, fn in hooks.items():
801 for ph in self.params.get(opt, []):
802 fn(ph)
804 for pp_def_raw in self.params.get('postprocessors', []):
805 pp_def = dict(pp_def_raw)
806 when = pp_def.pop('when', 'post_process')
807 self.add_post_processor(
808 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
809 when=when)
811 def preload_download_archive(fn):
812 """Preload the archive, if any is specified"""
813 archive = set()
814 if fn is None:
815 return archive
816 elif not is_path_like(fn):
817 return fn
819 self.write_debug(f'Loading archive file {fn!r}')
820 try:
821 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
822 for line in archive_file:
823 archive.add(line.strip())
824 except OSError as ioe:
825 if ioe.errno != errno.ENOENT:
826 raise
827 return archive
829 self.archive = preload_download_archive(self.params.get('download_archive'))
831 def warn_if_short_id(self, argv):
832 # short YouTube ID starting with dash?
833 idxs = [
834 i for i, a in enumerate(argv)
835 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
836 if idxs:
837 correct_argv = (
838 ['yt-dlp']
839 + [a for i, a in enumerate(argv) if i not in idxs]
840 + ['--'] + [argv[i] for i in idxs]
842 self.report_warning(
843 'Long argument string detected. '
844 f'Use -- to separate parameters and URLs, like this:\n{shell_quote(correct_argv)}')
846 def add_info_extractor(self, ie):
847 """Add an InfoExtractor object to the end of the list."""
848 ie_key = ie.ie_key()
849 self._ies[ie_key] = ie
850 if not isinstance(ie, type):
851 self._ies_instances[ie_key] = ie
852 ie.set_downloader(self)
854 def get_info_extractor(self, ie_key):
856 Get an instance of an IE with name ie_key, it will try to get one from
857 the _ies list, if there's no instance it will create a new one and add
858 it to the extractor list.
860 ie = self._ies_instances.get(ie_key)
861 if ie is None:
862 ie = get_info_extractor(ie_key)()
863 self.add_info_extractor(ie)
864 return ie
866 def add_default_info_extractors(self):
868 Add the InfoExtractors returned by gen_extractors to the end of the list
870 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
871 all_ies['end'] = UnsupportedURLIE()
872 try:
873 ie_names = orderedSet_from_options(
874 self.params.get('allowed_extractors', ['default']), {
875 'all': list(all_ies),
876 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
877 }, use_regex=True)
878 except re.error as e:
879 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
880 for name in ie_names:
881 self.add_info_extractor(all_ies[name])
882 self.write_debug(f'Loaded {len(ie_names)} extractors')
884 def add_post_processor(self, pp, when='post_process'):
885 """Add a PostProcessor object to the end of the chain."""
886 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
887 self._pps[when].append(pp)
888 pp.set_downloader(self)
890 def add_post_hook(self, ph):
891 """Add the post hook"""
892 self._post_hooks.append(ph)
894 def add_progress_hook(self, ph):
895 """Add the download progress hook"""
896 self._progress_hooks.append(ph)
898 def add_postprocessor_hook(self, ph):
899 """Add the postprocessing progress hook"""
900 self._postprocessor_hooks.append(ph)
901 for pps in self._pps.values():
902 for pp in pps:
903 pp.add_progress_hook(ph)
905 def _bidi_workaround(self, message):
906 if not hasattr(self, '_output_channel'):
907 return message
909 assert hasattr(self, '_output_process')
910 assert isinstance(message, str)
911 line_count = message.count('\n') + 1
912 self._output_process.stdin.write((message + '\n').encode())
913 self._output_process.stdin.flush()
914 res = ''.join(self._output_channel.readline().decode()
915 for _ in range(line_count))
916 return res[:-len('\n')]
918 def _write_string(self, message, out=None, only_once=False):
919 if only_once:
920 if message in self._printed_messages:
921 return
922 self._printed_messages.add(message)
923 write_string(message, out=out, encoding=self.params.get('encoding'))
925 def to_stdout(self, message, skip_eol=False, quiet=None):
926 """Print message to stdout"""
927 if quiet is not None:
928 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
929 'Use "YoutubeDL.to_screen" instead')
930 if skip_eol is not False:
931 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
932 'Use "YoutubeDL.to_screen" instead')
933 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
935 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
936 """Print message to screen if not in quiet mode"""
937 if self.params.get('logger'):
938 self.params['logger'].debug(message)
939 return
940 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
941 return
942 self._write_string(
943 '{}{}'.format(self._bidi_workaround(message), ('' if skip_eol else '\n')),
944 self._out_files.screen, only_once=only_once)
946 def to_stderr(self, message, only_once=False):
947 """Print message to stderr"""
948 assert isinstance(message, str)
949 if self.params.get('logger'):
950 self.params['logger'].error(message)
951 else:
952 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
954 def _send_console_code(self, code):
955 if compat_os_name == 'nt' or not self._out_files.console:
956 return
957 self._write_string(code, self._out_files.console)
959 def to_console_title(self, message):
960 if not self.params.get('consoletitle', False):
961 return
962 message = remove_terminal_sequences(message)
963 if compat_os_name == 'nt':
964 if ctypes.windll.kernel32.GetConsoleWindow():
965 # c_wchar_p() might not be necessary if `message` is
966 # already of type unicode()
967 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
968 else:
969 self._send_console_code(f'\033]0;{message}\007')
971 def save_console_title(self):
972 if not self.params.get('consoletitle') or self.params.get('simulate'):
973 return
974 self._send_console_code('\033[22;0t') # Save the title on stack
976 def restore_console_title(self):
977 if not self.params.get('consoletitle') or self.params.get('simulate'):
978 return
979 self._send_console_code('\033[23;0t') # Restore the title from stack
981 def __enter__(self):
982 self.save_console_title()
983 return self
985 def save_cookies(self):
986 if self.params.get('cookiefile') is not None:
987 self.cookiejar.save()
989 def __exit__(self, *args):
990 self.restore_console_title()
991 self.close()
993 def close(self):
994 self.save_cookies()
995 if '_request_director' in self.__dict__:
996 self._request_director.close()
997 del self._request_director
999 def trouble(self, message=None, tb=None, is_error=True):
1000 """Determine action to take when a download problem appears.
1002 Depending on if the downloader has been configured to ignore
1003 download errors or not, this method may throw an exception or
1004 not when errors are found, after printing the message.
1006 @param tb If given, is additional traceback information
1007 @param is_error Whether to raise error according to ignorerrors
1009 if message is not None:
1010 self.to_stderr(message)
1011 if self.params.get('verbose'):
1012 if tb is None:
1013 if sys.exc_info()[0]: # if .trouble has been called from an except block
1014 tb = ''
1015 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
1016 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
1017 tb += encode_compat_str(traceback.format_exc())
1018 else:
1019 tb_data = traceback.format_list(traceback.extract_stack())
1020 tb = ''.join(tb_data)
1021 if tb:
1022 self.to_stderr(tb)
1023 if not is_error:
1024 return
1025 if not self.params.get('ignoreerrors'):
1026 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
1027 exc_info = sys.exc_info()[1].exc_info
1028 else:
1029 exc_info = sys.exc_info()
1030 raise DownloadError(message, exc_info)
1031 self._download_retcode = 1
1033 Styles = Namespace(
1034 HEADERS='yellow',
1035 EMPHASIS='light blue',
1036 FILENAME='green',
1037 ID='green',
1038 DELIM='blue',
1039 ERROR='red',
1040 BAD_FORMAT='light red',
1041 WARNING='yellow',
1042 SUPPRESS='light black',
1045 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
1046 text = str(text)
1047 if test_encoding:
1048 original_text = text
1049 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1050 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1051 text = text.encode(encoding, 'ignore').decode(encoding)
1052 if fallback is not None and text != original_text:
1053 text = fallback
1054 return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1056 def _format_out(self, *args, **kwargs):
1057 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1059 def _format_screen(self, *args, **kwargs):
1060 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1062 def _format_err(self, *args, **kwargs):
1063 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1065 def report_warning(self, message, only_once=False):
1067 Print the message to stderr, it will be prefixed with 'WARNING:'
1068 If stderr is a tty file the 'WARNING:' will be colored
1070 if self.params.get('logger') is not None:
1071 self.params['logger'].warning(message)
1072 else:
1073 if self.params.get('no_warnings'):
1074 return
1075 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1077 def deprecation_warning(self, message, *, stacklevel=0):
1078 deprecation_warning(
1079 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1081 def deprecated_feature(self, message):
1082 if self.params.get('logger') is not None:
1083 self.params['logger'].warning(f'Deprecated Feature: {message}')
1084 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1086 def report_error(self, message, *args, **kwargs):
1088 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1089 in red if stderr is a tty file.
1091 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1093 def write_debug(self, message, only_once=False):
1094 """Log debug message or Print message to stderr"""
1095 if not self.params.get('verbose', False):
1096 return
1097 message = f'[debug] {message}'
1098 if self.params.get('logger'):
1099 self.params['logger'].debug(message)
1100 else:
1101 self.to_stderr(message, only_once)
1103 def report_file_already_downloaded(self, file_name):
1104 """Report file has already been fully downloaded."""
1105 try:
1106 self.to_screen(f'[download] {file_name} has already been downloaded')
1107 except UnicodeEncodeError:
1108 self.to_screen('[download] The file has already been downloaded')
1110 def report_file_delete(self, file_name):
1111 """Report that existing file will be deleted."""
1112 try:
1113 self.to_screen(f'Deleting existing file {file_name}')
1114 except UnicodeEncodeError:
1115 self.to_screen('Deleting existing file')
1117 def raise_no_formats(self, info, forced=False, *, msg=None):
1118 has_drm = info.get('_has_drm')
1119 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1120 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1121 if forced or not ignored:
1122 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1123 expected=has_drm or ignored or expected)
1124 else:
1125 self.report_warning(msg)
1127 def parse_outtmpl(self):
1128 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1129 self._parse_outtmpl()
1130 return self.params['outtmpl']
1132 def _parse_outtmpl(self):
1133 sanitize = IDENTITY
1134 if self.params.get('restrictfilenames'): # Remove spaces in the default template
1135 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1137 outtmpl = self.params.setdefault('outtmpl', {})
1138 if not isinstance(outtmpl, dict):
1139 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1140 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1142 def get_output_path(self, dir_type='', filename=None):
1143 paths = self.params.get('paths', {})
1144 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1145 path = os.path.join(
1146 expand_path(paths.get('home', '').strip()),
1147 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1148 filename or '')
1149 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1151 @staticmethod
1152 def _outtmpl_expandpath(outtmpl):
1153 # expand_path translates '%%' into '%' and '$$' into '$'
1154 # correspondingly that is not what we want since we need to keep
1155 # '%%' intact for template dict substitution step. Working around
1156 # with boundary-alike separator hack.
1157 sep = ''.join(random.choices(string.ascii_letters, k=32))
1158 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1160 # outtmpl should be expand_path'ed before template dict substitution
1161 # because meta fields may contain env variables we don't want to
1162 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1163 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1164 return expand_path(outtmpl).replace(sep, '')
1166 @staticmethod
1167 def escape_outtmpl(outtmpl):
1168 """ Escape any remaining strings like %s, %abc% etc. """
1169 return re.sub(
1170 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1171 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1172 outtmpl)
1174 @classmethod
1175 def validate_outtmpl(cls, outtmpl):
1176 """ @return None or Exception object """
1177 outtmpl = re.sub(
1178 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1179 lambda mobj: f'{mobj.group(0)[:-1]}s',
1180 cls._outtmpl_expandpath(outtmpl))
1181 try:
1182 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1183 return None
1184 except ValueError as err:
1185 return err
1187 @staticmethod
1188 def _copy_infodict(info_dict):
1189 info_dict = dict(info_dict)
1190 info_dict.pop('__postprocessors', None)
1191 info_dict.pop('__pending_error', None)
1192 return info_dict
1194 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1195 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1196 @param sanitize Whether to sanitize the output as a filename.
1197 For backward compatibility, a function can also be passed
1200 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1202 info_dict = self._copy_infodict(info_dict)
1203 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1204 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1205 if info_dict.get('duration', None) is not None
1206 else None)
1207 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1208 info_dict['video_autonumber'] = self._num_videos
1209 if info_dict.get('resolution') is None:
1210 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1212 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1213 # of %(field)s to %(field)0Nd for backward compatibility
1214 field_size_compat_map = {
1215 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1216 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1217 'autonumber': self.params.get('autonumber_size') or 5,
1220 TMPL_DICT = {}
1221 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1222 MATH_FUNCTIONS = {
1223 '+': float.__add__,
1224 '-': float.__sub__,
1225 '*': float.__mul__,
1227 # Field is of the form key1.key2...
1228 # where keys (except first) can be string, int, slice or "{field, ...}"
1229 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} # noqa: UP031
1230 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { # noqa: UP031
1231 'inner': FIELD_INNER_RE,
1232 'field': rf'\w*(?:\.{FIELD_INNER_RE})*',
1234 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1235 MATH_OPERATORS_RE = r'(?:{})'.format('|'.join(map(re.escape, MATH_FUNCTIONS.keys())))
1236 INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1237 (?P<negate>-)?
1238 (?P<fields>{FIELD_RE})
1239 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1240 (?:>(?P<strf_format>.+?))?
1241 (?P<remaining>
1242 (?P<alternate>(?<!\\),[^|&)]+)?
1243 (?:&(?P<replacement>.*?))?
1244 (?:\|(?P<default>.*?))?
1245 )$''')
1247 def _from_user_input(field):
1248 if field == ':':
1249 return ...
1250 elif ':' in field:
1251 return slice(*map(int_or_none, field.split(':')))
1252 elif int_or_none(field) is not None:
1253 return int(field)
1254 return field
1256 def _traverse_infodict(fields):
1257 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1258 for f in ([x] if x.startswith('{') else x.split('.'))]
1259 for i in (0, -1):
1260 if fields and not fields[i]:
1261 fields.pop(i)
1263 for i, f in enumerate(fields):
1264 if not f.startswith('{'):
1265 fields[i] = _from_user_input(f)
1266 continue
1267 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1268 fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}
1270 return traverse_obj(info_dict, fields, traverse_string=True)
1272 def get_value(mdict):
1273 # Object traversal
1274 value = _traverse_infodict(mdict['fields'])
1275 # Negative
1276 if mdict['negate']:
1277 value = float_or_none(value)
1278 if value is not None:
1279 value *= -1
1280 # Do maths
1281 offset_key = mdict['maths']
1282 if offset_key:
1283 value = float_or_none(value)
1284 operator = None
1285 while offset_key:
1286 item = re.match(
1287 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1288 offset_key).group(0)
1289 offset_key = offset_key[len(item):]
1290 if operator is None:
1291 operator = MATH_FUNCTIONS[item]
1292 continue
1293 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1294 offset = float_or_none(item)
1295 if offset is None:
1296 offset = float_or_none(_traverse_infodict(item))
1297 try:
1298 value = operator(value, multiplier * offset)
1299 except (TypeError, ZeroDivisionError):
1300 return None
1301 operator = None
1302 # Datetime formatting
1303 if mdict['strf_format']:
1304 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1306 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1307 if sanitize and value == '':
1308 value = None
1309 return value
1311 na = self.params.get('outtmpl_na_placeholder', 'NA')
1313 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1314 return sanitize_filename(str(value), restricted=restricted, is_id=(
1315 bool(re.search(r'(^|[_.])id(\.|$)', key))
1316 if 'filename-sanitization' in self.params['compat_opts']
1317 else NO_DEFAULT))
1319 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1320 sanitize = bool(sanitize)
1322 def _dumpjson_default(obj):
1323 if isinstance(obj, (set, LazyList)):
1324 return list(obj)
1325 return repr(obj)
1327 class _ReplacementFormatter(string.Formatter):
1328 def get_field(self, field_name, args, kwargs):
1329 if field_name.isdigit():
1330 return args[0], -1
1331 raise ValueError('Unsupported field')
1333 replacement_formatter = _ReplacementFormatter()
1335 def create_key(outer_mobj):
1336 if not outer_mobj.group('has_key'):
1337 return outer_mobj.group(0)
1338 key = outer_mobj.group('key')
1339 mobj = re.match(INTERNAL_FORMAT_RE, key)
1340 value, replacement, default, last_field = None, None, na, ''
1341 while mobj:
1342 mobj = mobj.groupdict()
1343 default = mobj['default'] if mobj['default'] is not None else default
1344 value = get_value(mobj)
1345 last_field, replacement = mobj['fields'], mobj['replacement']
1346 if value is None and mobj['alternate']:
1347 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1348 else:
1349 break
1351 if None not in (value, replacement):
1352 try:
1353 value = replacement_formatter.format(replacement, value)
1354 except ValueError:
1355 value, default = None, na
1357 fmt = outer_mobj.group('format')
1358 if fmt == 's' and last_field in field_size_compat_map and isinstance(value, int):
1359 fmt = f'0{field_size_compat_map[last_field]:d}d'
1361 flags = outer_mobj.group('conversion') or ''
1362 str_fmt = f'{fmt[:-1]}s'
1363 if value is None:
1364 value, fmt = default, 's'
1365 elif fmt[-1] == 'l': # list
1366 delim = '\n' if '#' in flags else ', '
1367 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1368 elif fmt[-1] == 'j': # json
1369 value, fmt = json.dumps(
1370 value, default=_dumpjson_default,
1371 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1372 elif fmt[-1] == 'h': # html
1373 value, fmt = escapeHTML(str(value)), str_fmt
1374 elif fmt[-1] == 'q': # quoted
1375 value = map(str, variadic(value) if '#' in flags else [value])
1376 value, fmt = shell_quote(value, shell=True), str_fmt
1377 elif fmt[-1] == 'B': # bytes
1378 value = f'%{str_fmt}'.encode() % str(value).encode()
1379 value, fmt = value.decode('utf-8', 'ignore'), 's'
1380 elif fmt[-1] == 'U': # unicode normalized
1381 value, fmt = unicodedata.normalize(
1382 # "+" = compatibility equivalence, "#" = NFD
1383 'NF{}{}'.format('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1384 value), str_fmt
1385 elif fmt[-1] == 'D': # decimal suffix
1386 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1387 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1388 factor=1024 if '#' in flags else 1000)
1389 elif fmt[-1] == 'S': # filename sanitization
1390 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1391 elif fmt[-1] == 'c':
1392 if value:
1393 value = str(value)[0]
1394 else:
1395 fmt = str_fmt
1396 elif fmt[-1] not in 'rsa': # numeric
1397 value = float_or_none(value)
1398 if value is None:
1399 value, fmt = default, 's'
1401 if sanitize:
1402 # If value is an object, sanitize might convert it to a string
1403 # So we convert it to repr first
1404 if fmt[-1] == 'r':
1405 value, fmt = repr(value), str_fmt
1406 elif fmt[-1] == 'a':
1407 value, fmt = ascii(value), str_fmt
1408 if fmt[-1] in 'csra':
1409 value = sanitizer(last_field, value)
1411 key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
1412 TMPL_DICT[key] = value
1413 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1415 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1417 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1418 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1419 return self.escape_outtmpl(outtmpl) % info_dict
1421 @_catch_unsafe_extension_error
1422 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1423 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1424 if outtmpl is None:
1425 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1426 try:
1427 outtmpl = self._outtmpl_expandpath(outtmpl)
1428 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1429 if not filename:
1430 return None
1432 if tmpl_type in ('', 'temp'):
1433 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1434 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1435 filename = replace_extension(filename, ext, final_ext)
1436 elif tmpl_type:
1437 force_ext = OUTTMPL_TYPES[tmpl_type]
1438 if force_ext:
1439 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1441 # https://github.com/blackjack4494/youtube-dlc/issues/85
1442 trim_file_name = self.params.get('trim_file_name', False)
1443 if trim_file_name:
1444 no_ext, *ext = filename.rsplit('.', 2)
1445 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1447 return filename
1448 except ValueError as err:
1449 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1450 return None
1452 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1453 """Generate the output filename"""
1454 if outtmpl:
1455 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1456 dir_type = None
1457 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1458 if not filename and dir_type not in ('', 'temp'):
1459 return ''
1461 if warn:
1462 if not self.params.get('paths'):
1463 pass
1464 elif filename == '-':
1465 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1466 elif os.path.isabs(filename):
1467 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1468 if filename == '-' or not filename:
1469 return filename
1471 return self.get_output_path(dir_type, filename)
1473 def _match_entry(self, info_dict, incomplete=False, silent=False):
1474 """Returns None if the file should be downloaded"""
1475 _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1476 assert incomplete or _type == 'video', 'Only video result can be considered complete'
1478 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1480 def check_filter():
1481 if _type in ('playlist', 'multi_video'):
1482 return
1483 elif _type in ('url', 'url_transparent') and not try_call(
1484 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1485 return
1487 if 'title' in info_dict:
1488 # This can happen when we're just evaluating the playlist
1489 title = info_dict['title']
1490 matchtitle = self.params.get('matchtitle', False)
1491 if matchtitle:
1492 if not re.search(matchtitle, title, re.IGNORECASE):
1493 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1494 rejecttitle = self.params.get('rejecttitle', False)
1495 if rejecttitle:
1496 if re.search(rejecttitle, title, re.IGNORECASE):
1497 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1499 date = info_dict.get('upload_date')
1500 if date is not None:
1501 date_range = self.params.get('daterange', DateRange())
1502 if date not in date_range:
1503 return f'{date_from_str(date).isoformat()} upload date is not in range {date_range}'
1504 view_count = info_dict.get('view_count')
1505 if view_count is not None:
1506 min_views = self.params.get('min_views')
1507 if min_views is not None and view_count < min_views:
1508 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1509 max_views = self.params.get('max_views')
1510 if max_views is not None and view_count > max_views:
1511 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1512 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1513 return f'Skipping "{video_title}" because it is age restricted'
1515 match_filter = self.params.get('match_filter')
1516 if match_filter is None:
1517 return None
1519 cancelled = None
1520 try:
1521 try:
1522 ret = match_filter(info_dict, incomplete=incomplete)
1523 except TypeError:
1524 # For backward compatibility
1525 ret = None if incomplete else match_filter(info_dict)
1526 except DownloadCancelled as err:
1527 if err.msg is not NO_DEFAULT:
1528 raise
1529 ret, cancelled = err.msg, err
1531 if ret is NO_DEFAULT:
1532 while True:
1533 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1534 reply = input(self._format_screen(
1535 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1536 if reply in {'y', ''}:
1537 return None
1538 elif reply == 'n':
1539 if cancelled:
1540 raise type(cancelled)(f'Skipping {video_title}')
1541 return f'Skipping {video_title}'
1542 return ret
1544 if self.in_download_archive(info_dict):
1545 reason = ''.join((
1546 format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
1547 format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1548 'has already been recorded in the archive'))
1549 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1550 else:
1551 try:
1552 reason = check_filter()
1553 except DownloadCancelled as e:
1554 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1555 else:
1556 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1557 if reason is not None:
1558 if not silent:
1559 self.to_screen('[download] ' + reason)
1560 if self.params.get(break_opt, False):
1561 raise break_err()
1562 return reason
1564 @staticmethod
1565 def add_extra_info(info_dict, extra_info):
1566 """Set the keys from extra_info in info dict if they are missing"""
1567 for key, value in extra_info.items():
1568 info_dict.setdefault(key, value)
1570 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1571 process=True, force_generic_extractor=False):
1573 Extract and return the information dictionary of the URL
1575 Arguments:
1576 @param url URL to extract
1578 Keyword arguments:
1579 @param download Whether to download videos
1580 @param process Whether to resolve all unresolved references (URLs, playlist items).
1581 Must be True for download to work
1582 @param ie_key Use only the extractor with this key
1584 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1585 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1588 if extra_info is None:
1589 extra_info = {}
1591 if not ie_key and force_generic_extractor:
1592 ie_key = 'Generic'
1594 if ie_key:
1595 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1596 else:
1597 ies = self._ies
1599 for key, ie in ies.items():
1600 if not ie.suitable(url):
1601 continue
1603 if not ie.working():
1604 self.report_warning('The program functionality for this site has been marked as broken, '
1605 'and will probably not work.')
1607 temp_id = ie.get_temp_id(url)
1608 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1609 self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1610 'has already been recorded in the archive')
1611 if self.params.get('break_on_existing', False):
1612 raise ExistingVideoReached
1613 break
1614 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1615 else:
1616 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1617 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1618 tb=False if extractors_restricted else None)
1620 def _handle_extraction_exceptions(func):
1621 @functools.wraps(func)
1622 def wrapper(self, *args, **kwargs):
1623 while True:
1624 try:
1625 return func(self, *args, **kwargs)
1626 except (CookieLoadError, DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1627 raise
1628 except ReExtractInfo as e:
1629 if e.expected:
1630 self.to_screen(f'{e}; Re-extracting data')
1631 else:
1632 self.to_stderr('\r')
1633 self.report_warning(f'{e}; Re-extracting data')
1634 continue
1635 except GeoRestrictedError as e:
1636 msg = e.msg
1637 if e.countries:
1638 msg += '\nThis video is available in {}.'.format(', '.join(
1639 map(ISO3166Utils.short2full, e.countries)))
1640 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1641 self.report_error(msg)
1642 except ExtractorError as e: # An error we somewhat expected
1643 self.report_error(str(e), e.format_traceback())
1644 except Exception as e:
1645 if self.params.get('ignoreerrors'):
1646 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1647 else:
1648 raise
1649 break
1650 return wrapper
1652 def _wait_for_video(self, ie_result={}):
1653 if (not self.params.get('wait_for_video')
1654 or ie_result.get('_type', 'video') != 'video'
1655 or ie_result.get('formats') or ie_result.get('url')):
1656 return
1658 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1659 last_msg = ''
1661 def progress(msg):
1662 nonlocal last_msg
1663 full_msg = f'{msg}\n'
1664 if not self.params.get('noprogress'):
1665 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1666 elif last_msg:
1667 return
1668 self.to_screen(full_msg, skip_eol=True)
1669 last_msg = msg
1671 min_wait, max_wait = self.params.get('wait_for_video')
1672 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1673 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1674 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1675 self.report_warning('Release time of video is not known')
1676 elif ie_result and (diff or 0) <= 0:
1677 self.report_warning('Video should already be available according to extracted info')
1678 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1679 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1681 wait_till = time.time() + diff
1682 try:
1683 while True:
1684 diff = wait_till - time.time()
1685 if diff <= 0:
1686 progress('')
1687 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1688 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1689 time.sleep(1)
1690 except KeyboardInterrupt:
1691 progress('')
1692 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1693 except BaseException as e:
1694 if not isinstance(e, ReExtractInfo):
1695 self.to_screen('')
1696 raise
1698 def _load_cookies(self, data, *, autoscope=True):
1699 """Loads cookies from a `Cookie` header
1701 This tries to work around the security vulnerability of passing cookies to every domain.
1702 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1704 @param data The Cookie header as string to load the cookies from
1705 @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1706 If `True`, save cookies for later to be stored in the jar with a limited scope
1707 If a URL, save cookies in the jar with the domain of the URL
1709 for cookie in LenientSimpleCookie(data).values():
1710 if autoscope and any(cookie.values()):
1711 raise ValueError('Invalid syntax in Cookie Header')
1713 domain = cookie.get('domain') or ''
1714 expiry = cookie.get('expires')
1715 if expiry == '': # 0 is valid
1716 expiry = None
1717 prepared_cookie = http.cookiejar.Cookie(
1718 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1719 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1720 cookie.get('secure') or False, expiry, False, None, None, {})
1722 if domain:
1723 self.cookiejar.set_cookie(prepared_cookie)
1724 elif autoscope is True:
1725 self.deprecated_feature(
1726 'Passing cookies as a header is a potential security risk; '
1727 'they will be scoped to the domain of the downloaded urls. '
1728 'Please consider loading cookies from a file or browser instead.')
1729 self.__header_cookies.append(prepared_cookie)
1730 elif autoscope:
1731 self.report_warning(
1732 'The extractor result contains an unscoped cookie as an HTTP header. '
1733 f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1734 only_once=True)
1735 self._apply_header_cookies(autoscope, [prepared_cookie])
1736 else:
1737 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1738 tb=False, is_error=False)
1740 def _apply_header_cookies(self, url, cookies=None):
1741 """Applies stray header cookies to the provided url
1743 This loads header cookies and scopes them to the domain provided in `url`.
1744 While this is not ideal, it helps reduce the risk of them being sent
1745 to an unintended destination while mostly maintaining compatibility.
1747 parsed = urllib.parse.urlparse(url)
1748 if not parsed.hostname:
1749 return
1751 for cookie in map(copy.copy, cookies or self.__header_cookies):
1752 cookie.domain = f'.{parsed.hostname}'
1753 self.cookiejar.set_cookie(cookie)
1755 @_handle_extraction_exceptions
1756 def __extract_info(self, url, ie, download, extra_info, process):
1757 self._apply_header_cookies(url)
1759 try:
1760 ie_result = ie.extract(url)
1761 except UserNotLive as e:
1762 if process:
1763 if self.params.get('wait_for_video'):
1764 self.report_warning(e)
1765 self._wait_for_video()
1766 raise
1767 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1768 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1769 return
1770 if isinstance(ie_result, list):
1771 # Backwards compatibility: old IE result format
1772 ie_result = {
1773 '_type': 'compat_list',
1774 'entries': ie_result,
1776 if extra_info.get('original_url'):
1777 ie_result.setdefault('original_url', extra_info['original_url'])
1778 self.add_default_extra_info(ie_result, ie, url)
1779 if process:
1780 self._wait_for_video(ie_result)
1781 return self.process_ie_result(ie_result, download, extra_info)
1782 else:
1783 return ie_result
1785 def add_default_extra_info(self, ie_result, ie, url):
1786 if url is not None:
1787 self.add_extra_info(ie_result, {
1788 'webpage_url': url,
1789 'original_url': url,
1791 webpage_url = ie_result.get('webpage_url')
1792 if webpage_url:
1793 self.add_extra_info(ie_result, {
1794 'webpage_url_basename': url_basename(webpage_url),
1795 'webpage_url_domain': get_domain(webpage_url),
1797 if ie is not None:
1798 self.add_extra_info(ie_result, {
1799 'extractor': ie.IE_NAME,
1800 'extractor_key': ie.ie_key(),
1803 def process_ie_result(self, ie_result, download=True, extra_info=None):
1805 Take the result of the ie(may be modified) and resolve all unresolved
1806 references (URLs, playlist items).
1808 It will also download the videos if 'download'.
1809 Returns the resolved ie_result.
1811 if extra_info is None:
1812 extra_info = {}
1813 result_type = ie_result.get('_type', 'video')
1815 if result_type in ('url', 'url_transparent'):
1816 ie_result['url'] = sanitize_url(
1817 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1818 if ie_result.get('original_url') and not extra_info.get('original_url'):
1819 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1821 extract_flat = self.params.get('extract_flat', False)
1822 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1823 or extract_flat is True):
1824 info_copy = ie_result.copy()
1825 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1826 if ie and not ie_result.get('id'):
1827 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1828 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1829 self.add_extra_info(info_copy, extra_info)
1830 info_copy, _ = self.pre_process(info_copy)
1831 self._fill_common_fields(info_copy, False)
1832 self.__forced_printings(info_copy)
1833 self._raise_pending_errors(info_copy)
1834 if self.params.get('force_write_download_archive', False):
1835 self.record_download_archive(info_copy)
1836 return ie_result
1838 if result_type == 'video':
1839 self.add_extra_info(ie_result, extra_info)
1840 ie_result = self.process_video_result(ie_result, download=download)
1841 self._raise_pending_errors(ie_result)
1842 additional_urls = (ie_result or {}).get('additional_urls')
1843 if additional_urls:
1844 # TODO: Improve MetadataParserPP to allow setting a list
1845 if isinstance(additional_urls, str):
1846 additional_urls = [additional_urls]
1847 self.to_screen(
1848 '[info] {}: {} additional URL(s) requested'.format(ie_result['id'], len(additional_urls)))
1849 self.write_debug('Additional URLs: "{}"'.format('", "'.join(additional_urls)))
1850 ie_result['additional_entries'] = [
1851 self.extract_info(
1852 url, download, extra_info=extra_info,
1853 force_generic_extractor=self.params.get('force_generic_extractor'))
1854 for url in additional_urls
1856 return ie_result
1857 elif result_type == 'url':
1858 # We have to add extra_info to the results because it may be
1859 # contained in a playlist
1860 return self.extract_info(
1861 ie_result['url'], download,
1862 ie_key=ie_result.get('ie_key'),
1863 extra_info=extra_info)
1864 elif result_type == 'url_transparent':
1865 # Use the information from the embedding page
1866 info = self.extract_info(
1867 ie_result['url'], ie_key=ie_result.get('ie_key'),
1868 extra_info=extra_info, download=False, process=False)
1870 # extract_info may return None when ignoreerrors is enabled and
1871 # extraction failed with an error, don't crash and return early
1872 # in this case
1873 if not info:
1874 return info
1876 exempted_fields = {'_type', 'url', 'ie_key'}
1877 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1878 # For video clips, the id etc of the clip extractor should be used
1879 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1881 new_result = info.copy()
1882 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1884 # Extracted info may not be a video result (i.e.
1885 # info.get('_type', 'video') != video) but rather an url or
1886 # url_transparent. In such cases outer metadata (from ie_result)
1887 # should be propagated to inner one (info). For this to happen
1888 # _type of info should be overridden with url_transparent. This
1889 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1890 if new_result.get('_type') == 'url':
1891 new_result['_type'] = 'url_transparent'
1893 return self.process_ie_result(
1894 new_result, download=download, extra_info=extra_info)
1895 elif result_type in ('playlist', 'multi_video'):
1896 # Protect from infinite recursion due to recursively nested playlists
1897 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1898 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1899 if webpage_url and webpage_url in self._playlist_urls:
1900 self.to_screen(
1901 '[download] Skipping already downloaded playlist: {}'.format(
1902 ie_result.get('title')) or ie_result.get('id'))
1903 return
1905 self._playlist_level += 1
1906 self._playlist_urls.add(webpage_url)
1907 self._fill_common_fields(ie_result, False)
1908 self._sanitize_thumbnails(ie_result)
1909 try:
1910 return self.__process_playlist(ie_result, download)
1911 finally:
1912 self._playlist_level -= 1
1913 if not self._playlist_level:
1914 self._playlist_urls.clear()
1915 elif result_type == 'compat_list':
1916 self.report_warning(
1917 'Extractor {} returned a compat_list result. '
1918 'It needs to be updated.'.format(ie_result.get('extractor')))
1920 def _fixup(r):
1921 self.add_extra_info(r, {
1922 'extractor': ie_result['extractor'],
1923 'webpage_url': ie_result['webpage_url'],
1924 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1925 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1926 'extractor_key': ie_result['extractor_key'],
1928 return r
1929 ie_result['entries'] = [
1930 self.process_ie_result(_fixup(r), download, extra_info)
1931 for r in ie_result['entries']
1933 return ie_result
1934 else:
1935 raise Exception(f'Invalid result type: {result_type}')
1937 def _ensure_dir_exists(self, path):
1938 return make_dir(path, self.report_error)
1940 @staticmethod
1941 def _playlist_infodict(ie_result, strict=False, **kwargs):
1942 info = {
1943 'playlist_count': ie_result.get('playlist_count'),
1944 'playlist': ie_result.get('title') or ie_result.get('id'),
1945 'playlist_id': ie_result.get('id'),
1946 'playlist_title': ie_result.get('title'),
1947 'playlist_uploader': ie_result.get('uploader'),
1948 'playlist_uploader_id': ie_result.get('uploader_id'),
1949 'playlist_channel': ie_result.get('channel'),
1950 'playlist_channel_id': ie_result.get('channel_id'),
1951 **kwargs,
1953 if strict:
1954 return info
1955 if ie_result.get('webpage_url'):
1956 info.update({
1957 'webpage_url': ie_result['webpage_url'],
1958 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1959 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1961 return {
1962 **info,
1963 'playlist_index': 0,
1964 '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1965 'extractor': ie_result['extractor'],
1966 'extractor_key': ie_result['extractor_key'],
1969 def __process_playlist(self, ie_result, download):
1970 """Process each entry in the playlist"""
1971 assert ie_result['_type'] in ('playlist', 'multi_video')
1973 common_info = self._playlist_infodict(ie_result, strict=True)
1974 title = common_info.get('playlist') or '<Untitled>'
1975 if self._match_entry(common_info, incomplete=True) is not None:
1976 return
1977 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1979 all_entries = PlaylistEntries(self, ie_result)
1980 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1982 lazy = self.params.get('lazy_playlist')
1983 if lazy:
1984 resolved_entries, n_entries = [], 'N/A'
1985 ie_result['requested_entries'], ie_result['entries'] = None, None
1986 else:
1987 entries = resolved_entries = list(entries)
1988 n_entries = len(resolved_entries)
1989 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1990 if not ie_result.get('playlist_count'):
1991 # Better to do this after potentially exhausting entries
1992 ie_result['playlist_count'] = all_entries.get_full_count()
1994 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1995 ie_copy = collections.ChainMap(ie_result, extra)
1997 _infojson_written = False
1998 write_playlist_files = self.params.get('allow_playlist_files', True)
1999 if write_playlist_files and self.params.get('list_thumbnails'):
2000 self.list_thumbnails(ie_result)
2001 if write_playlist_files and not self.params.get('simulate'):
2002 _infojson_written = self._write_info_json(
2003 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
2004 if _infojson_written is None:
2005 return
2006 if self._write_description('playlist', ie_result,
2007 self.prepare_filename(ie_copy, 'pl_description')) is None:
2008 return
2009 # TODO: This should be passed to ThumbnailsConvertor if necessary
2010 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
2012 if lazy:
2013 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
2014 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
2015 elif self.params.get('playlistreverse'):
2016 entries.reverse()
2017 elif self.params.get('playlistrandom'):
2018 random.shuffle(entries)
2020 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
2021 f'{format_field(ie_result, "playlist_count", " of %s")}')
2023 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
2024 if self.params.get('extract_flat') == 'discard_in_playlist':
2025 keep_resolved_entries = ie_result['_type'] != 'playlist'
2026 if keep_resolved_entries:
2027 self.write_debug('The information of all playlist entries will be held in memory')
2029 failures = 0
2030 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
2031 for i, (playlist_index, entry) in enumerate(entries):
2032 if lazy:
2033 resolved_entries.append((playlist_index, entry))
2034 if not entry:
2035 continue
2037 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
2038 if not lazy and 'playlist-index' in self.params['compat_opts']:
2039 playlist_index = ie_result['requested_entries'][i]
2041 entry_copy = collections.ChainMap(entry, {
2042 **common_info,
2043 'n_entries': int_or_none(n_entries),
2044 'playlist_index': playlist_index,
2045 'playlist_autonumber': i + 1,
2048 if self._match_entry(entry_copy, incomplete=True) is not None:
2049 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
2050 resolved_entries[i] = (playlist_index, NO_DEFAULT)
2051 continue
2053 self.to_screen(
2054 f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} '
2055 f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}')
2057 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
2058 'playlist_index': playlist_index,
2059 'playlist_autonumber': i + 1,
2060 }, extra))
2061 if not entry_result:
2062 failures += 1
2063 if failures >= max_failures:
2064 self.report_error(
2065 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2066 break
2067 if keep_resolved_entries:
2068 resolved_entries[i] = (playlist_index, entry_result)
2070 # Update with processed data
2071 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
2072 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2073 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2074 # Do not set for full playlist
2075 ie_result.pop('requested_entries')
2077 # Write the updated info to json
2078 if _infojson_written is True and self._write_info_json(
2079 'updated playlist', ie_result,
2080 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2081 return
2083 ie_result = self.run_all_pps('playlist', ie_result)
2084 self.to_screen(f'[download] Finished downloading playlist: {title}')
2085 return ie_result
2087 @_handle_extraction_exceptions
2088 def __process_iterable_entry(self, entry, download, extra_info):
2089 return self.process_ie_result(
2090 entry, download=download, extra_info=extra_info)
2092 def _build_format_filter(self, filter_spec):
2093 " Returns a function to filter the formats according to the filter_spec "
2095 OPERATORS = {
2096 '<': operator.lt,
2097 '<=': operator.le,
2098 '>': operator.gt,
2099 '>=': operator.ge,
2100 '=': operator.eq,
2101 '!=': operator.ne,
2103 operator_rex = re.compile(r'''(?x)\s*
2104 (?P<key>[\w.-]+)\s*
2105 (?P<op>{})(?P<none_inclusive>\s*\?)?\s*
2106 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2107 '''.format('|'.join(map(re.escape, OPERATORS.keys()))))
2108 m = operator_rex.fullmatch(filter_spec)
2109 if m:
2110 try:
2111 comparison_value = int(m.group('value'))
2112 except ValueError:
2113 comparison_value = parse_filesize(m.group('value'))
2114 if comparison_value is None:
2115 comparison_value = parse_filesize(m.group('value') + 'B')
2116 if comparison_value is None:
2117 raise ValueError(
2118 'Invalid value {!r} in format specification {!r}'.format(
2119 m.group('value'), filter_spec))
2120 op = OPERATORS[m.group('op')]
2122 if not m:
2123 STR_OPERATORS = {
2124 '=': operator.eq,
2125 '^=': lambda attr, value: attr.startswith(value),
2126 '$=': lambda attr, value: attr.endswith(value),
2127 '*=': lambda attr, value: value in attr,
2128 '~=': lambda attr, value: value.search(attr) is not None,
2130 str_operator_rex = re.compile(r'''(?x)\s*
2131 (?P<key>[a-zA-Z0-9._-]+)\s*
2132 (?P<negation>!\s*)?(?P<op>{})\s*(?P<none_inclusive>\?\s*)?
2133 (?P<quote>["'])?
2134 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2135 (?(quote)(?P=quote))\s*
2136 '''.format('|'.join(map(re.escape, STR_OPERATORS.keys()))))
2137 m = str_operator_rex.fullmatch(filter_spec)
2138 if m:
2139 if m.group('op') == '~=':
2140 comparison_value = re.compile(m.group('value'))
2141 else:
2142 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2143 str_op = STR_OPERATORS[m.group('op')]
2144 if m.group('negation'):
2145 op = lambda attr, value: not str_op(attr, value)
2146 else:
2147 op = str_op
2149 if not m:
2150 raise SyntaxError(f'Invalid filter specification {filter_spec!r}')
2152 def _filter(f):
2153 actual_value = f.get(m.group('key'))
2154 if actual_value is None:
2155 return m.group('none_inclusive')
2156 return op(actual_value, comparison_value)
2157 return _filter
2159 def _check_formats(self, formats):
2160 for f in formats:
2161 working = f.get('__working')
2162 if working is not None:
2163 if working:
2164 yield f
2165 continue
2166 self.to_screen('[info] Testing format {}'.format(f['format_id']))
2167 path = self.get_output_path('temp')
2168 if not self._ensure_dir_exists(f'{path}/'):
2169 continue
2170 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2171 temp_file.close()
2172 try:
2173 success, _ = self.dl(temp_file.name, f, test=True)
2174 except (DownloadError, OSError, ValueError, *network_exceptions):
2175 success = False
2176 finally:
2177 if os.path.exists(temp_file.name):
2178 try:
2179 os.remove(temp_file.name)
2180 except OSError:
2181 self.report_warning(f'Unable to delete temporary file "{temp_file.name}"')
2182 f['__working'] = success
2183 if success:
2184 yield f
2185 else:
2186 self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id']))
2188 def _select_formats(self, formats, selector):
2189 return list(selector({
2190 'formats': formats,
2191 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2192 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
2193 or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
2196 def _default_format_spec(self, info_dict):
2197 prefer_best = (
2198 self.params['outtmpl']['default'] == '-'
2199 or info_dict.get('is_live') and not self.params.get('live_from_start'))
2201 def can_merge():
2202 merger = FFmpegMergerPP(self)
2203 return merger.available and merger.can_merge()
2205 if not prefer_best and not can_merge():
2206 prefer_best = True
2207 formats = self._get_formats(info_dict)
2208 evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
2209 if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
2210 self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
2211 'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')
2213 compat = (self.params.get('allow_multiple_audio_streams')
2214 or 'format-spec' in self.params['compat_opts'])
2216 return ('best/bestvideo+bestaudio' if prefer_best
2217 else 'bestvideo+bestaudio/best' if compat
2218 else 'bestvideo*+bestaudio/best')
2220 def build_format_selector(self, format_spec):
2221 def syntax_error(note, start):
2222 message = (
2223 'Invalid format specification: '
2224 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2225 return SyntaxError(message)
2227 PICKFIRST = 'PICKFIRST'
2228 MERGE = 'MERGE'
2229 SINGLE = 'SINGLE'
2230 GROUP = 'GROUP'
2231 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2233 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2234 'video': self.params.get('allow_multiple_video_streams', False)}
2236 def _parse_filter(tokens):
2237 filter_parts = []
2238 for type_, string_, _start, _, _ in tokens:
2239 if type_ == tokenize.OP and string_ == ']':
2240 return ''.join(filter_parts)
2241 else:
2242 filter_parts.append(string_)
2244 def _remove_unused_ops(tokens):
2245 # Remove operators that we don't use and join them with the surrounding strings.
2246 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2247 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2248 last_string, last_start, last_end, last_line = None, None, None, None
2249 for type_, string_, start, end, line in tokens:
2250 if type_ == tokenize.OP and string_ == '[':
2251 if last_string:
2252 yield tokenize.NAME, last_string, last_start, last_end, last_line
2253 last_string = None
2254 yield type_, string_, start, end, line
2255 # everything inside brackets will be handled by _parse_filter
2256 for type_, string_, start, end, line in tokens:
2257 yield type_, string_, start, end, line
2258 if type_ == tokenize.OP and string_ == ']':
2259 break
2260 elif type_ == tokenize.OP and string_ in ALLOWED_OPS:
2261 if last_string:
2262 yield tokenize.NAME, last_string, last_start, last_end, last_line
2263 last_string = None
2264 yield type_, string_, start, end, line
2265 elif type_ in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2266 if not last_string:
2267 last_string = string_
2268 last_start = start
2269 last_end = end
2270 else:
2271 last_string += string_
2272 if last_string:
2273 yield tokenize.NAME, last_string, last_start, last_end, last_line
2275 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2276 selectors = []
2277 current_selector = None
2278 for type_, string_, start, _, _ in tokens:
2279 # ENCODING is only defined in Python 3.x
2280 if type_ == getattr(tokenize, 'ENCODING', None):
2281 continue
2282 elif type_ in [tokenize.NAME, tokenize.NUMBER]:
2283 current_selector = FormatSelector(SINGLE, string_, [])
2284 elif type_ == tokenize.OP:
2285 if string_ == ')':
2286 if not inside_group:
2287 # ')' will be handled by the parentheses group
2288 tokens.restore_last_token()
2289 break
2290 elif inside_merge and string_ in ['/', ',']:
2291 tokens.restore_last_token()
2292 break
2293 elif inside_choice and string_ == ',':
2294 tokens.restore_last_token()
2295 break
2296 elif string_ == ',':
2297 if not current_selector:
2298 raise syntax_error('"," must follow a format selector', start)
2299 selectors.append(current_selector)
2300 current_selector = None
2301 elif string_ == '/':
2302 if not current_selector:
2303 raise syntax_error('"/" must follow a format selector', start)
2304 first_choice = current_selector
2305 second_choice = _parse_format_selection(tokens, inside_choice=True)
2306 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2307 elif string_ == '[':
2308 if not current_selector:
2309 current_selector = FormatSelector(SINGLE, 'best', [])
2310 format_filter = _parse_filter(tokens)
2311 current_selector.filters.append(format_filter)
2312 elif string_ == '(':
2313 if current_selector:
2314 raise syntax_error('Unexpected "("', start)
2315 group = _parse_format_selection(tokens, inside_group=True)
2316 current_selector = FormatSelector(GROUP, group, [])
2317 elif string_ == '+':
2318 if not current_selector:
2319 raise syntax_error('Unexpected "+"', start)
2320 selector_1 = current_selector
2321 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2322 if not selector_2:
2323 raise syntax_error('Expected a selector', start)
2324 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2325 else:
2326 raise syntax_error(f'Operator not recognized: "{string_}"', start)
2327 elif type_ == tokenize.ENDMARKER:
2328 break
2329 if current_selector:
2330 selectors.append(current_selector)
2331 return selectors
2333 def _merge(formats_pair):
2334 format_1, format_2 = formats_pair
2336 formats_info = []
2337 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2338 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2340 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2341 get_no_more = {'video': False, 'audio': False}
2342 for (i, fmt_info) in enumerate(formats_info):
2343 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2344 formats_info.pop(i)
2345 continue
2346 for aud_vid in ['audio', 'video']:
2347 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2348 if get_no_more[aud_vid]:
2349 formats_info.pop(i)
2350 break
2351 get_no_more[aud_vid] = True
2353 if len(formats_info) == 1:
2354 return formats_info[0]
2356 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2357 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2359 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2360 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2362 output_ext = get_compatible_ext(
2363 vcodecs=[f.get('vcodec') for f in video_fmts],
2364 acodecs=[f.get('acodec') for f in audio_fmts],
2365 vexts=[f['ext'] for f in video_fmts],
2366 aexts=[f['ext'] for f in audio_fmts],
2367 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2368 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2370 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2372 new_dict = {
2373 'requested_formats': formats_info,
2374 'format': '+'.join(filtered('format')),
2375 'format_id': '+'.join(filtered('format_id')),
2376 'ext': output_ext,
2377 'protocol': '+'.join(map(determine_protocol, formats_info)),
2378 'language': '+'.join(orderedSet(filtered('language'))) or None,
2379 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2380 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2381 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2384 if the_only_video:
2385 new_dict.update({
2386 'width': the_only_video.get('width'),
2387 'height': the_only_video.get('height'),
2388 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2389 'fps': the_only_video.get('fps'),
2390 'dynamic_range': the_only_video.get('dynamic_range'),
2391 'vcodec': the_only_video.get('vcodec'),
2392 'vbr': the_only_video.get('vbr'),
2393 'stretched_ratio': the_only_video.get('stretched_ratio'),
2394 'aspect_ratio': the_only_video.get('aspect_ratio'),
2397 if the_only_audio:
2398 new_dict.update({
2399 'acodec': the_only_audio.get('acodec'),
2400 'abr': the_only_audio.get('abr'),
2401 'asr': the_only_audio.get('asr'),
2402 'audio_channels': the_only_audio.get('audio_channels'),
2405 return new_dict
2407 def _check_formats(formats):
2408 if self.params.get('check_formats') == 'selected':
2409 yield from self._check_formats(formats)
2410 return
2411 elif (self.params.get('check_formats') is not None
2412 or self.params.get('allow_unplayable_formats')):
2413 yield from formats
2414 return
2416 for f in formats:
2417 if f.get('has_drm') or f.get('__needs_testing'):
2418 yield from self._check_formats([f])
2419 else:
2420 yield f
2422 def _build_selector_function(selector):
2423 if isinstance(selector, list): # ,
2424 fs = [_build_selector_function(s) for s in selector]
2426 def selector_function(ctx):
2427 for f in fs:
2428 yield from f(ctx)
2429 return selector_function
2431 elif selector.type == GROUP: # ()
2432 selector_function = _build_selector_function(selector.selector)
2434 elif selector.type == PICKFIRST: # /
2435 fs = [_build_selector_function(s) for s in selector.selector]
2437 def selector_function(ctx):
2438 for f in fs:
2439 picked_formats = list(f(ctx))
2440 if picked_formats:
2441 return picked_formats
2442 return []
2444 elif selector.type == MERGE: # +
2445 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2447 def selector_function(ctx):
2448 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2449 yield _merge(pair)
2451 elif selector.type == SINGLE: # atom
2452 format_spec = selector.selector or 'best'
2454 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2455 if format_spec == 'all':
2456 def selector_function(ctx):
2457 yield from _check_formats(ctx['formats'][::-1])
2458 elif format_spec == 'mergeall':
2459 def selector_function(ctx):
2460 formats = list(_check_formats(
2461 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2462 if not formats:
2463 return
2464 merged_format = formats[-1]
2465 for f in formats[-2::-1]:
2466 merged_format = _merge((merged_format, f))
2467 yield merged_format
2469 else:
2470 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2471 mobj = re.match(
2472 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2473 format_spec)
2474 if mobj is not None:
2475 format_idx = int_or_none(mobj.group('n'), default=1)
2476 format_reverse = mobj.group('bw')[0] == 'b'
2477 format_type = (mobj.group('type') or [None])[0]
2478 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2479 format_modified = mobj.group('mod') is not None
2481 format_fallback = not format_type and not format_modified # for b, w
2482 _filter_f = (
2483 (lambda f: f.get(f'{format_type}codec') != 'none')
2484 if format_type and format_modified # bv*, ba*, wv*, wa*
2485 else (lambda f: f.get(f'{not_format_type}codec') == 'none')
2486 if format_type # bv, ba, wv, wa
2487 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2488 if not format_modified # b, w
2489 else lambda f: True) # b*, w*
2490 filter_f = lambda f: _filter_f(f) and (
2491 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2492 else:
2493 if format_spec in self._format_selection_exts['audio']:
2494 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2495 elif format_spec in self._format_selection_exts['video']:
2496 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2497 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2498 elif format_spec in self._format_selection_exts['storyboards']:
2499 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2500 else:
2501 filter_f = lambda f: f.get('format_id') == format_spec # id
2503 def selector_function(ctx):
2504 formats = list(ctx['formats'])
2505 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2506 if not matches:
2507 if format_fallback and ctx['incomplete_formats']:
2508 # for extractors with incomplete formats (audio only (soundcloud)
2509 # or video only (imgur)) best/worst will fallback to
2510 # best/worst {video,audio}-only format
2511 matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))
2512 elif seperate_fallback and not ctx['has_merged_format']:
2513 # for compatibility with youtube-dl when there is no pre-merged format
2514 matches = list(filter(seperate_fallback, formats))
2515 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2516 try:
2517 yield matches[format_idx - 1]
2518 except LazyList.IndexError:
2519 return
2521 filters = [self._build_format_filter(f) for f in selector.filters]
2523 def final_selector(ctx):
2524 ctx_copy = dict(ctx)
2525 for _filter in filters:
2526 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2527 return selector_function(ctx_copy)
2528 return final_selector
2530 # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
2531 # Prefix numbers with random letters to avoid it being classified as a number
2532 # See: https://github.com/yt-dlp/yt-dlp/pulls/8797
2533 # TODO: Implement parser not reliant on tokenize.tokenize
2534 prefix = ''.join(random.choices(string.ascii_letters, k=32))
2535 stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
2536 try:
2537 tokens = list(_remove_unused_ops(
2538 token._replace(string=token.string.replace(prefix, ''))
2539 for token in tokenize.tokenize(stream.readline)))
2540 except tokenize.TokenError:
2541 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2543 class TokenIterator:
2544 def __init__(self, tokens):
2545 self.tokens = tokens
2546 self.counter = 0
2548 def __iter__(self):
2549 return self
2551 def __next__(self):
2552 if self.counter >= len(self.tokens):
2553 raise StopIteration
2554 value = self.tokens[self.counter]
2555 self.counter += 1
2556 return value
2558 next = __next__
2560 def restore_last_token(self):
2561 self.counter -= 1
2563 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2564 return _build_selector_function(parsed_selector)
2566 def _calc_headers(self, info_dict, load_cookies=False):
2567 res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
2568 clean_headers(res)
2570 if load_cookies: # For --load-info-json
2571 self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat
2572 self._load_cookies(info_dict.get('cookies'), autoscope=False)
2573 # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2574 # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2575 res.pop('Cookie', None)
2576 cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2577 if cookies:
2578 encoder = LenientSimpleCookie()
2579 values = []
2580 for cookie in cookies:
2581 _, value = encoder.value_encode(cookie.value)
2582 values.append(f'{cookie.name}={value}')
2583 if cookie.domain:
2584 values.append(f'Domain={cookie.domain}')
2585 if cookie.path:
2586 values.append(f'Path={cookie.path}')
2587 if cookie.secure:
2588 values.append('Secure')
2589 if cookie.expires:
2590 values.append(f'Expires={cookie.expires}')
2591 if cookie.version:
2592 values.append(f'Version={cookie.version}')
2593 info_dict['cookies'] = '; '.join(values)
2595 if 'X-Forwarded-For' not in res:
2596 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2597 if x_forwarded_for_ip:
2598 res['X-Forwarded-For'] = x_forwarded_for_ip
2600 return res
2602 def _calc_cookies(self, url):
2603 self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2604 return self.cookiejar.get_cookie_header(url)
2606 def _sort_thumbnails(self, thumbnails):
2607 thumbnails.sort(key=lambda t: (
2608 t.get('preference') if t.get('preference') is not None else -1,
2609 t.get('width') if t.get('width') is not None else -1,
2610 t.get('height') if t.get('height') is not None else -1,
2611 t.get('id') if t.get('id') is not None else '',
2612 t.get('url')))
2614 def _sanitize_thumbnails(self, info_dict):
2615 thumbnails = info_dict.get('thumbnails')
2616 if thumbnails is None:
2617 thumbnail = info_dict.get('thumbnail')
2618 if thumbnail:
2619 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2620 if not thumbnails:
2621 return
2623 def check_thumbnails(thumbnails):
2624 for t in thumbnails:
2625 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2626 try:
2627 self.urlopen(HEADRequest(t['url']))
2628 except network_exceptions as err:
2629 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2630 continue
2631 yield t
2633 self._sort_thumbnails(thumbnails)
2634 for i, t in enumerate(thumbnails):
2635 if t.get('id') is None:
2636 t['id'] = str(i)
2637 if t.get('width') and t.get('height'):
2638 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2639 t['url'] = sanitize_url(t['url'])
2641 if self.params.get('check_formats') is True:
2642 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2643 else:
2644 info_dict['thumbnails'] = thumbnails
2646 def _fill_common_fields(self, info_dict, final=True):
2647 # TODO: move sanitization here
2648 if final:
2649 title = info_dict['fulltitle'] = info_dict.get('title')
2650 if not title:
2651 if title == '':
2652 self.write_debug('Extractor gave empty title. Creating a generic title')
2653 else:
2654 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2655 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2657 if info_dict.get('duration') is not None:
2658 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2660 for ts_key, date_key in (
2661 ('timestamp', 'upload_date'),
2662 ('release_timestamp', 'release_date'),
2663 ('modified_timestamp', 'modified_date'),
2665 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2666 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2667 # see http://bugs.python.org/issue1646728)
2668 with contextlib.suppress(ValueError, OverflowError, OSError):
2669 upload_date = dt.datetime.fromtimestamp(info_dict[ts_key], dt.timezone.utc)
2670 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2672 if not info_dict.get('release_year'):
2673 info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))
2675 live_keys = ('is_live', 'was_live')
2676 live_status = info_dict.get('live_status')
2677 if live_status is None:
2678 for key in live_keys:
2679 if info_dict.get(key) is False:
2680 continue
2681 if info_dict.get(key):
2682 live_status = key
2683 break
2684 if all(info_dict.get(key) is False for key in live_keys):
2685 live_status = 'not_live'
2686 if live_status:
2687 info_dict['live_status'] = live_status
2688 for key in live_keys:
2689 if info_dict.get(key) is None:
2690 info_dict[key] = (live_status == key)
2691 if live_status == 'post_live':
2692 info_dict['was_live'] = True
2694 # Auto generate title fields corresponding to the *_number fields when missing
2695 # in order to always have clean titles. This is very common for TV series.
2696 for field in ('chapter', 'season', 'episode'):
2697 if final and info_dict.get(f'{field}_number') is not None and not info_dict.get(field):
2698 info_dict[field] = '%s %d' % (field.capitalize(), info_dict[f'{field}_number'])
2700 for old_key, new_key in self._deprecated_multivalue_fields.items():
2701 if new_key in info_dict and old_key in info_dict:
2702 if '_version' not in info_dict: # HACK: Do not warn when using --load-info-json
2703 self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
2704 elif old_value := info_dict.get(old_key):
2705 info_dict[new_key] = old_value.split(', ')
2706 elif new_value := info_dict.get(new_key):
2707 info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
2709 def _raise_pending_errors(self, info):
2710 err = info.pop('__pending_error', None)
2711 if err:
2712 self.report_error(err, tb=False)
2714 def sort_formats(self, info_dict):
2715 formats = self._get_formats(info_dict)
2716 formats.sort(key=FormatSorter(
2717 self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2719 def process_video_result(self, info_dict, download=True):
2720 assert info_dict.get('_type', 'video') == 'video'
2721 self._num_videos += 1
2723 if 'id' not in info_dict:
2724 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2725 elif not info_dict.get('id'):
2726 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2728 def report_force_conversion(field, field_not, conversion):
2729 self.report_warning(
2730 f'"{field}" field is not {field_not} - forcing {conversion} conversion, '
2731 'there is an error in extractor')
2733 def sanitize_string_field(info, string_field):
2734 field = info.get(string_field)
2735 if field is None or isinstance(field, str):
2736 return
2737 report_force_conversion(string_field, 'a string', 'string')
2738 info[string_field] = str(field)
2740 def sanitize_numeric_fields(info):
2741 for numeric_field in self._NUMERIC_FIELDS:
2742 field = info.get(numeric_field)
2743 if field is None or isinstance(field, (int, float)):
2744 continue
2745 report_force_conversion(numeric_field, 'numeric', 'int')
2746 info[numeric_field] = int_or_none(field)
2748 sanitize_string_field(info_dict, 'id')
2749 sanitize_numeric_fields(info_dict)
2750 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2751 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2752 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2753 self.report_warning('"duration" field is negative, there is an error in extractor')
2755 chapters = info_dict.get('chapters') or []
2756 if chapters and chapters[0].get('start_time'):
2757 chapters.insert(0, {'start_time': 0})
2759 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2760 for idx, (prev, current, next_) in enumerate(zip(
2761 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2762 if current.get('start_time') is None:
2763 current['start_time'] = prev.get('end_time')
2764 if not current.get('end_time'):
2765 current['end_time'] = next_.get('start_time')
2766 if not current.get('title'):
2767 current['title'] = f'<Untitled Chapter {idx}>'
2769 if 'playlist' not in info_dict:
2770 # It isn't part of a playlist
2771 info_dict['playlist'] = None
2772 info_dict['playlist_index'] = None
2774 self._sanitize_thumbnails(info_dict)
2776 thumbnail = info_dict.get('thumbnail')
2777 thumbnails = info_dict.get('thumbnails')
2778 if thumbnail:
2779 info_dict['thumbnail'] = sanitize_url(thumbnail)
2780 elif thumbnails:
2781 info_dict['thumbnail'] = thumbnails[-1]['url']
2783 if info_dict.get('display_id') is None and 'id' in info_dict:
2784 info_dict['display_id'] = info_dict['id']
2786 self._fill_common_fields(info_dict)
2788 for cc_kind in ('subtitles', 'automatic_captions'):
2789 cc = info_dict.get(cc_kind)
2790 if cc:
2791 for _, subtitle in cc.items():
2792 for subtitle_format in subtitle:
2793 if subtitle_format.get('url'):
2794 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2795 if subtitle_format.get('ext') is None:
2796 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2798 automatic_captions = info_dict.get('automatic_captions')
2799 subtitles = info_dict.get('subtitles')
2801 info_dict['requested_subtitles'] = self.process_subtitles(
2802 info_dict['id'], subtitles, automatic_captions)
2804 formats = self._get_formats(info_dict)
2806 # Backward compatibility with InfoExtractor._sort_formats
2807 field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2808 if field_preference:
2809 info_dict['_format_sort_fields'] = field_preference
2811 info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
2812 f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2813 if not self.params.get('allow_unplayable_formats'):
2814 formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2816 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2817 self.report_warning(
2818 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2819 'only images are available for download. Use --list-formats to see them'.capitalize())
2821 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2822 if not get_from_start:
2823 info_dict['title'] += ' ' + dt.datetime.now().strftime('%Y-%m-%d %H:%M')
2824 if info_dict.get('is_live') and formats:
2825 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2826 if get_from_start and not formats:
2827 self.raise_no_formats(info_dict, msg=(
2828 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2829 'If you want to download from the current time, use --no-live-from-start'))
2831 def is_wellformed(f):
2832 url = f.get('url')
2833 if not url:
2834 self.report_warning(
2835 '"url" field is missing or empty - skipping format, '
2836 'there is an error in extractor')
2837 return False
2838 if isinstance(url, bytes):
2839 sanitize_string_field(f, 'url')
2840 return True
2842 # Filter out malformed formats for better extraction robustness
2843 formats = list(filter(is_wellformed, formats or []))
2845 if not formats:
2846 self.raise_no_formats(info_dict)
2848 for fmt in formats:
2849 sanitize_string_field(fmt, 'format_id')
2850 sanitize_numeric_fields(fmt)
2851 fmt['url'] = sanitize_url(fmt['url'])
2852 FormatSorter._fill_sorting_fields(fmt)
2853 if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
2854 if fmt.get('acodec') is None:
2855 fmt['acodec'] = fmt['ext']
2856 if fmt.get('resolution') is None:
2857 fmt['resolution'] = self.format_resolution(fmt, default=None)
2858 if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none':
2859 fmt['dynamic_range'] = 'SDR'
2860 if fmt.get('aspect_ratio') is None:
2861 fmt['aspect_ratio'] = try_call(lambda: round(fmt['width'] / fmt['height'], 2))
2862 # For fragmented formats, "tbr" is often max bitrate and not average
2863 if (('manifest-filesize-approx' in self.params['compat_opts'] or not fmt.get('manifest_url'))
2864 and not fmt.get('filesize') and not fmt.get('filesize_approx')):
2865 fmt['filesize_approx'] = filesize_from_tbr(fmt.get('tbr'), info_dict.get('duration'))
2866 fmt['http_headers'] = self._calc_headers(collections.ChainMap(fmt, info_dict), load_cookies=True)
2868 # Safeguard against old/insecure infojson when using --load-info-json
2869 if info_dict.get('http_headers'):
2870 info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
2871 info_dict['http_headers'].pop('Cookie', None)
2873 # This is copied to http_headers by the above _calc_headers and can now be removed
2874 if '__x_forwarded_for_ip' in info_dict:
2875 del info_dict['__x_forwarded_for_ip']
2877 self.sort_formats({
2878 'formats': formats,
2879 '_format_sort_fields': info_dict.get('_format_sort_fields'),
2882 # Sanitize and group by format_id
2883 formats_dict = {}
2884 for i, fmt in enumerate(formats):
2885 if not fmt.get('format_id'):
2886 fmt['format_id'] = str(i)
2887 else:
2888 # Sanitize format_id from characters used in format selector expression
2889 fmt['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', fmt['format_id'])
2890 formats_dict.setdefault(fmt['format_id'], []).append(fmt)
2892 # Make sure all formats have unique format_id
2893 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2894 for format_id, ambiguous_formats in formats_dict.items():
2895 ambigious_id = len(ambiguous_formats) > 1
2896 for i, fmt in enumerate(ambiguous_formats):
2897 if ambigious_id:
2898 fmt['format_id'] = f'{format_id}-{i}'
2899 # Ensure there is no conflict between id and ext in format selection
2900 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2901 if fmt['format_id'] != fmt['ext'] and fmt['format_id'] in common_exts:
2902 fmt['format_id'] = 'f{}'.format(fmt['format_id'])
2904 if fmt.get('format') is None:
2905 fmt['format'] = '{id} - {res}{note}'.format(
2906 id=fmt['format_id'],
2907 res=self.format_resolution(fmt),
2908 note=format_field(fmt, 'format_note', ' (%s)'),
2911 if self.params.get('check_formats') is True:
2912 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2914 if not formats or formats[0] is not info_dict:
2915 # only set the 'formats' fields if the original info_dict list them
2916 # otherwise we end up with a circular reference, the first (and unique)
2917 # element in the 'formats' field in info_dict is info_dict itself,
2918 # which can't be exported to json
2919 info_dict['formats'] = formats
2921 info_dict, _ = self.pre_process(info_dict)
2923 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2924 return info_dict
2926 self.post_extract(info_dict)
2927 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2929 # The pre-processors may have modified the formats
2930 formats = self._get_formats(info_dict)
2932 list_only = self.params.get('simulate') == 'list_only'
2933 interactive_format_selection = not list_only and self.format_selector == '-'
2934 if self.params.get('list_thumbnails'):
2935 self.list_thumbnails(info_dict)
2936 if self.params.get('listsubtitles'):
2937 if 'automatic_captions' in info_dict:
2938 self.list_subtitles(
2939 info_dict['id'], automatic_captions, 'automatic captions')
2940 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2941 if self.params.get('listformats') or interactive_format_selection:
2942 self.list_formats(info_dict)
2943 if list_only:
2944 # Without this printing, -F --print-json will not work
2945 self.__forced_printings(info_dict)
2946 return info_dict
2948 format_selector = self.format_selector
2949 while True:
2950 if interactive_format_selection:
2951 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2952 + '(Press ENTER for default, or Ctrl+C to quit)'
2953 + self._format_screen(': ', self.Styles.EMPHASIS))
2954 try:
2955 format_selector = self.build_format_selector(req_format) if req_format else None
2956 except SyntaxError as err:
2957 self.report_error(err, tb=False, is_error=False)
2958 continue
2960 if format_selector is None:
2961 req_format = self._default_format_spec(info_dict)
2962 self.write_debug(f'Default format spec: {req_format}')
2963 format_selector = self.build_format_selector(req_format)
2965 formats_to_download = self._select_formats(formats, format_selector)
2966 if interactive_format_selection and not formats_to_download:
2967 self.report_error('Requested format is not available', tb=False, is_error=False)
2968 continue
2969 break
2971 if not formats_to_download:
2972 if not self.params.get('ignore_no_formats_error'):
2973 raise ExtractorError(
2974 'Requested format is not available. Use --list-formats for a list of available formats',
2975 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2976 self.report_warning('Requested format is not available')
2977 # Process what we can, even without any available formats.
2978 formats_to_download = [{}]
2980 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2981 best_format, downloaded_formats = formats_to_download[-1], []
2982 if download:
2983 if best_format and requested_ranges:
2984 def to_screen(*msg):
2985 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2987 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2988 (f['format_id'] for f in formats_to_download))
2989 if requested_ranges != ({}, ):
2990 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2991 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2992 max_downloads_reached = False
2994 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2995 new_info = self._copy_infodict(info_dict)
2996 new_info.update(fmt)
2997 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2998 end_time = offset + min(chapter.get('end_time', duration), duration)
2999 # duration may not be accurate. So allow deviations <1sec
3000 if end_time == float('inf') or end_time > offset + duration + 1:
3001 end_time = None
3002 if chapter or offset:
3003 new_info.update({
3004 'section_start': offset + chapter.get('start_time', 0),
3005 'section_end': end_time,
3006 'section_title': chapter.get('title'),
3007 'section_number': chapter.get('index'),
3009 downloaded_formats.append(new_info)
3010 try:
3011 self.process_info(new_info)
3012 except MaxDownloadsReached:
3013 max_downloads_reached = True
3014 self._raise_pending_errors(new_info)
3015 # Remove copied info
3016 for key, val in tuple(new_info.items()):
3017 if info_dict.get(key) == val:
3018 new_info.pop(key)
3019 if max_downloads_reached:
3020 break
3022 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
3023 assert write_archive.issubset({True, False, 'ignore'})
3024 if True in write_archive and False not in write_archive:
3025 self.record_download_archive(info_dict)
3027 info_dict['requested_downloads'] = downloaded_formats
3028 info_dict = self.run_all_pps('after_video', info_dict)
3029 if max_downloads_reached:
3030 raise MaxDownloadsReached
3032 # We update the info dict with the selected best quality format (backwards compatibility)
3033 info_dict.update(best_format)
3034 return info_dict
3036 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
3037 """Select the requested subtitles and their format"""
3038 available_subs, normal_sub_langs = {}, []
3039 if normal_subtitles and self.params.get('writesubtitles'):
3040 available_subs.update(normal_subtitles)
3041 normal_sub_langs = tuple(normal_subtitles.keys())
3042 if automatic_captions and self.params.get('writeautomaticsub'):
3043 for lang, cap_info in automatic_captions.items():
3044 if lang not in available_subs:
3045 available_subs[lang] = cap_info
3047 if not available_subs or (
3048 not self.params.get('writesubtitles')
3049 and not self.params.get('writeautomaticsub')):
3050 return None
3052 all_sub_langs = tuple(available_subs.keys())
3053 if self.params.get('allsubtitles', False):
3054 requested_langs = all_sub_langs
3055 elif self.params.get('subtitleslangs', False):
3056 try:
3057 requested_langs = orderedSet_from_options(
3058 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
3059 except re.error as e:
3060 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
3061 else:
3062 requested_langs = LazyList(itertools.chain(
3063 ['en'] if 'en' in normal_sub_langs else [],
3064 filter(lambda f: f.startswith('en'), normal_sub_langs),
3065 ['en'] if 'en' in all_sub_langs else [],
3066 filter(lambda f: f.startswith('en'), all_sub_langs),
3067 normal_sub_langs, all_sub_langs,
3068 ))[:1]
3069 if requested_langs:
3070 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
3072 formats_query = self.params.get('subtitlesformat', 'best')
3073 formats_preference = formats_query.split('/') if formats_query else []
3074 subs = {}
3075 for lang in requested_langs:
3076 formats = available_subs.get(lang)
3077 if formats is None:
3078 self.report_warning(f'{lang} subtitles not available for {video_id}')
3079 continue
3080 for ext in formats_preference:
3081 if ext == 'best':
3082 f = formats[-1]
3083 break
3084 matches = list(filter(lambda f: f['ext'] == ext, formats))
3085 if matches:
3086 f = matches[-1]
3087 break
3088 else:
3089 f = formats[-1]
3090 self.report_warning(
3091 'No subtitle format found matching "{}" for language {}, '
3092 'using {}. Use --list-subs for a list of available subtitles'.format(formats_query, lang, f['ext']))
3093 subs[lang] = f
3094 return subs
3096 def _forceprint(self, key, info_dict):
3097 if info_dict is None:
3098 return
3099 info_copy = info_dict.copy()
3100 info_copy.setdefault('filename', self.prepare_filename(info_dict))
3101 if info_dict.get('requested_formats') is not None:
3102 # For RTMP URLs, also include the playpath
3103 info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3104 elif info_dict.get('url'):
3105 info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
3106 info_copy['formats_table'] = self.render_formats_table(info_dict)
3107 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3108 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3109 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3111 def format_tmpl(tmpl):
3112 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
3113 if not mobj:
3114 return tmpl
3116 fmt = '%({})s'
3117 if tmpl.startswith('{'):
3118 tmpl, fmt = f'.{tmpl}', '%({})j'
3119 if tmpl.endswith('='):
3120 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3121 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3123 for tmpl in self.params['forceprint'].get(key, []):
3124 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3126 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3127 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3128 tmpl = format_tmpl(tmpl)
3129 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3130 if self._ensure_dir_exists(filename):
3131 with open(filename, 'a', encoding='utf-8', newline='') as f:
3132 f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3134 return info_copy
3136 def __forced_printings(self, info_dict, filename=None, incomplete=True):
3137 if (self.params.get('forcejson')
3138 or self.params['forceprint'].get('video')
3139 or self.params['print_to_file'].get('video')):
3140 self.post_extract(info_dict)
3141 if filename:
3142 info_dict['filename'] = filename
3143 info_copy = self._forceprint('video', info_dict)
3145 def print_field(field, actual_field=None, optional=False):
3146 if actual_field is None:
3147 actual_field = field
3148 if self.params.get(f'force{field}') and (
3149 info_copy.get(field) is not None or (not optional and not incomplete)):
3150 self.to_stdout(info_copy[actual_field])
3152 print_field('title')
3153 print_field('id')
3154 print_field('url', 'urls')
3155 print_field('thumbnail', optional=True)
3156 print_field('description', optional=True)
3157 print_field('filename')
3158 if self.params.get('forceduration') and info_copy.get('duration') is not None:
3159 self.to_stdout(formatSeconds(info_copy['duration']))
3160 print_field('format')
3162 if self.params.get('forcejson'):
3163 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3165 def dl(self, name, info, subtitle=False, test=False):
3166 if not info.get('url'):
3167 self.raise_no_formats(info, True)
3169 if test:
3170 verbose = self.params.get('verbose')
3171 quiet = self.params.get('quiet') or not verbose
3172 params = {
3173 'test': True,
3174 'quiet': quiet,
3175 'verbose': verbose,
3176 'noprogress': quiet,
3177 'nopart': True,
3178 'skip_unavailable_fragments': False,
3179 'keep_fragments': False,
3180 'overwrites': True,
3181 '_no_ytdl_file': True,
3183 else:
3184 params = self.params
3185 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3186 if not test:
3187 for ph in self._progress_hooks:
3188 fd.add_progress_hook(ph)
3189 urls = '", "'.join(
3190 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3191 for f in info.get('requested_formats', []) or [info])
3192 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3194 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3195 # But it may contain objects that are not deep-copyable
3196 new_info = self._copy_infodict(info)
3197 if new_info.get('http_headers') is None:
3198 new_info['http_headers'] = self._calc_headers(new_info)
3199 return fd.download(name, new_info, subtitle)
3201 def existing_file(self, filepaths, *, default_overwrite=True):
3202 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3203 if existing_files and not self.params.get('overwrites', default_overwrite):
3204 return existing_files[0]
3206 for file in existing_files:
3207 self.report_file_delete(file)
3208 os.remove(file)
3209 return None
3211 @_catch_unsafe_extension_error
3212 def process_info(self, info_dict):
3213 """Process a single resolved IE result. (Modifies it in-place)"""
3215 assert info_dict.get('_type', 'video') == 'video'
3216 original_infodict = info_dict
3218 if 'format' not in info_dict and 'ext' in info_dict:
3219 info_dict['format'] = info_dict['ext']
3221 if self._match_entry(info_dict) is not None:
3222 info_dict['__write_download_archive'] = 'ignore'
3223 return
3225 # Does nothing under normal operation - for backward compatibility of process_info
3226 self.post_extract(info_dict)
3228 def replace_info_dict(new_info):
3229 nonlocal info_dict
3230 if new_info == info_dict:
3231 return
3232 info_dict.clear()
3233 info_dict.update(new_info)
3235 new_info, _ = self.pre_process(info_dict, 'video')
3236 replace_info_dict(new_info)
3237 self._num_downloads += 1
3239 # info_dict['_filename'] needs to be set for backward compatibility
3240 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3241 temp_filename = self.prepare_filename(info_dict, 'temp')
3242 files_to_move = {}
3244 # Forced printings
3245 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3247 def check_max_downloads():
3248 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3249 raise MaxDownloadsReached
3251 if self.params.get('simulate'):
3252 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3253 check_max_downloads()
3254 return
3256 if full_filename is None:
3257 return
3258 if not self._ensure_dir_exists(encodeFilename(full_filename)):
3259 return
3260 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3261 return
3263 if self._write_description('video', info_dict,
3264 self.prepare_filename(info_dict, 'description')) is None:
3265 return
3267 sub_files = self._write_subtitles(info_dict, temp_filename)
3268 if sub_files is None:
3269 return
3270 files_to_move.update(dict(sub_files))
3272 thumb_files = self._write_thumbnails(
3273 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3274 if thumb_files is None:
3275 return
3276 files_to_move.update(dict(thumb_files))
3278 infofn = self.prepare_filename(info_dict, 'infojson')
3279 _infojson_written = self._write_info_json('video', info_dict, infofn)
3280 if _infojson_written:
3281 info_dict['infojson_filename'] = infofn
3282 # For backward compatibility, even though it was a private field
3283 info_dict['__infojson_filename'] = infofn
3284 elif _infojson_written is None:
3285 return
3287 # Note: Annotations are deprecated
3288 annofn = None
3289 if self.params.get('writeannotations', False):
3290 annofn = self.prepare_filename(info_dict, 'annotation')
3291 if annofn:
3292 if not self._ensure_dir_exists(encodeFilename(annofn)):
3293 return
3294 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3295 self.to_screen('[info] Video annotations are already present')
3296 elif not info_dict.get('annotations'):
3297 self.report_warning('There are no annotations to write.')
3298 else:
3299 try:
3300 self.to_screen('[info] Writing video annotations to: ' + annofn)
3301 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3302 annofile.write(info_dict['annotations'])
3303 except (KeyError, TypeError):
3304 self.report_warning('There are no annotations to write.')
3305 except OSError:
3306 self.report_error('Cannot write annotations file: ' + annofn)
3307 return
3309 # Write internet shortcut files
3310 def _write_link_file(link_type):
3311 url = try_get(info_dict['webpage_url'], iri_to_uri)
3312 if not url:
3313 self.report_warning(
3314 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3315 return True
3316 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3317 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3318 return False
3319 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3320 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3321 return True
3322 try:
3323 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3324 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3325 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3326 template_vars = {'url': url}
3327 if link_type == 'desktop':
3328 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3329 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3330 except OSError:
3331 self.report_error(f'Cannot write internet shortcut {linkfn}')
3332 return False
3333 return True
3335 write_links = {
3336 'url': self.params.get('writeurllink'),
3337 'webloc': self.params.get('writewebloclink'),
3338 'desktop': self.params.get('writedesktoplink'),
3340 if self.params.get('writelink'):
3341 link_type = ('webloc' if sys.platform == 'darwin'
3342 else 'desktop' if sys.platform.startswith('linux')
3343 else 'url')
3344 write_links[link_type] = True
3346 if any(should_write and not _write_link_file(link_type)
3347 for link_type, should_write in write_links.items()):
3348 return
3350 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3351 replace_info_dict(new_info)
3353 if self.params.get('skip_download'):
3354 info_dict['filepath'] = temp_filename
3355 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3356 info_dict['__files_to_move'] = files_to_move
3357 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3358 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3359 else:
3360 # Download
3361 info_dict.setdefault('__postprocessors', [])
3362 try:
3364 def existing_video_file(*filepaths):
3365 ext = info_dict.get('ext')
3366 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3367 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3368 default_overwrite=False)
3369 if file:
3370 info_dict['ext'] = os.path.splitext(file)[1][1:]
3371 return file
3373 fd, success = None, True
3374 if info_dict.get('protocol') or info_dict.get('url'):
3375 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3376 if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3377 info_dict.get('section_start') or info_dict.get('section_end')):
3378 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3379 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3380 self.report_error(f'{msg}. Aborting')
3381 return
3383 if info_dict.get('requested_formats') is not None:
3384 old_ext = info_dict['ext']
3385 if self.params.get('merge_output_format') is None:
3386 if (info_dict['ext'] == 'webm'
3387 and info_dict.get('thumbnails')
3388 # check with type instead of pp_key, __name__, or isinstance
3389 # since we dont want any custom PPs to trigger this
3390 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
3391 info_dict['ext'] = 'mkv'
3392 self.report_warning(
3393 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3394 new_ext = info_dict['ext']
3396 def correct_ext(filename, ext=new_ext):
3397 if filename == '-':
3398 return filename
3399 filename_real_ext = os.path.splitext(filename)[1][1:]
3400 filename_wo_ext = (
3401 os.path.splitext(filename)[0]
3402 if filename_real_ext in (old_ext, new_ext)
3403 else filename)
3404 return f'{filename_wo_ext}.{ext}'
3406 # Ensure filename always has a correct extension for successful merge
3407 full_filename = correct_ext(full_filename)
3408 temp_filename = correct_ext(temp_filename)
3409 dl_filename = existing_video_file(full_filename, temp_filename)
3411 info_dict['__real_download'] = False
3412 # NOTE: Copy so that original format dicts are not modified
3413 info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3415 merger = FFmpegMergerPP(self)
3416 downloaded = []
3417 if dl_filename is not None:
3418 self.report_file_already_downloaded(dl_filename)
3419 elif fd:
3420 for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3421 f['filepath'] = fname = prepend_extension(
3422 correct_ext(temp_filename, info_dict['ext']),
3423 'f{}'.format(f['format_id']), info_dict['ext'])
3424 downloaded.append(fname)
3425 info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3426 success, real_download = self.dl(temp_filename, info_dict)
3427 info_dict['__real_download'] = real_download
3428 else:
3429 if self.params.get('allow_unplayable_formats'):
3430 self.report_warning(
3431 'You have requested merging of multiple formats '
3432 'while also allowing unplayable formats to be downloaded. '
3433 'The formats won\'t be merged to prevent data corruption.')
3434 elif not merger.available:
3435 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3436 if not self.params.get('ignoreerrors'):
3437 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3438 return
3439 self.report_warning(f'{msg}. The formats won\'t be merged')
3441 if temp_filename == '-':
3442 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3443 else 'but the formats are incompatible for simultaneous download' if merger.available
3444 else 'but ffmpeg is not installed')
3445 self.report_warning(
3446 f'You have requested downloading multiple formats to stdout {reason}. '
3447 'The formats will be streamed one after the other')
3448 fname = temp_filename
3449 for f in info_dict['requested_formats']:
3450 new_info = dict(info_dict)
3451 del new_info['requested_formats']
3452 new_info.update(f)
3453 if temp_filename != '-':
3454 fname = prepend_extension(
3455 correct_ext(temp_filename, new_info['ext']),
3456 'f{}'.format(f['format_id']), new_info['ext'])
3457 if not self._ensure_dir_exists(fname):
3458 return
3459 f['filepath'] = fname
3460 downloaded.append(fname)
3461 partial_success, real_download = self.dl(fname, new_info)
3462 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3463 success = success and partial_success
3465 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3466 info_dict['__postprocessors'].append(merger)
3467 info_dict['__files_to_merge'] = downloaded
3468 # Even if there were no downloads, it is being merged only now
3469 info_dict['__real_download'] = True
3470 else:
3471 for file in downloaded:
3472 files_to_move[file] = None
3473 else:
3474 # Just a single file
3475 dl_filename = existing_video_file(full_filename, temp_filename)
3476 if dl_filename is None or dl_filename == temp_filename:
3477 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3478 # So we should try to resume the download
3479 success, real_download = self.dl(temp_filename, info_dict)
3480 info_dict['__real_download'] = real_download
3481 else:
3482 self.report_file_already_downloaded(dl_filename)
3484 dl_filename = dl_filename or temp_filename
3485 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3487 except network_exceptions as err:
3488 self.report_error(f'unable to download video data: {err}')
3489 return
3490 except OSError as err:
3491 raise UnavailableVideoError(err)
3492 except ContentTooShortError as err:
3493 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3494 return
3496 self._raise_pending_errors(info_dict)
3497 if success and full_filename != '-':
3499 def fixup():
3500 do_fixup = True
3501 fixup_policy = self.params.get('fixup')
3502 vid = info_dict['id']
3504 if fixup_policy in ('ignore', 'never'):
3505 return
3506 elif fixup_policy == 'warn':
3507 do_fixup = 'warn'
3508 elif fixup_policy != 'force':
3509 assert fixup_policy in ('detect_or_warn', None)
3510 if not info_dict.get('__real_download'):
3511 do_fixup = False
3513 def ffmpeg_fixup(cndn, msg, cls):
3514 if not (do_fixup and cndn):
3515 return
3516 elif do_fixup == 'warn':
3517 self.report_warning(f'{vid}: {msg}')
3518 return
3519 pp = cls(self)
3520 if pp.available:
3521 info_dict['__postprocessors'].append(pp)
3522 else:
3523 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3525 stretched_ratio = info_dict.get('stretched_ratio')
3526 ffmpeg_fixup(stretched_ratio not in (1, None),
3527 f'Non-uniform pixel ratio {stretched_ratio}',
3528 FFmpegFixupStretchedPP)
3530 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3531 downloader = downloader.FD_NAME if downloader else None
3533 ext = info_dict.get('ext')
3534 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3535 isinstance(pp, FFmpegVideoConvertorPP)
3536 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3537 ) for pp in self._pps['post_process'])
3539 if not postprocessed_by_ffmpeg:
3540 ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
3541 and info_dict.get('container') == 'm4a_dash',
3542 'writing DASH m4a. Only some players support this container',
3543 FFmpegFixupM4aPP)
3544 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3545 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3546 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3547 FFmpegFixupM3u8PP)
3548 ffmpeg_fixup(downloader == 'dashsegments'
3549 and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
3550 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3552 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3553 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3555 fixup()
3556 try:
3557 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3558 except PostProcessingError as err:
3559 self.report_error(f'Postprocessing: {err}')
3560 return
3561 try:
3562 for ph in self._post_hooks:
3563 ph(info_dict['filepath'])
3564 except Exception as err:
3565 self.report_error(f'post hooks: {err}')
3566 return
3567 info_dict['__write_download_archive'] = True
3569 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
3570 if self.params.get('force_write_download_archive'):
3571 info_dict['__write_download_archive'] = True
3572 check_max_downloads()
3574 def __download_wrapper(self, func):
3575 @functools.wraps(func)
3576 def wrapper(*args, **kwargs):
3577 try:
3578 res = func(*args, **kwargs)
3579 except CookieLoadError:
3580 raise
3581 except UnavailableVideoError as e:
3582 self.report_error(e)
3583 except DownloadCancelled as e:
3584 self.to_screen(f'[info] {e}')
3585 if not self.params.get('break_per_url'):
3586 raise
3587 self._num_downloads = 0
3588 else:
3589 if self.params.get('dump_single_json', False):
3590 self.post_extract(res)
3591 self.to_stdout(json.dumps(self.sanitize_info(res)))
3592 return wrapper
3594 def download(self, url_list):
3595 """Download a given list of URLs."""
3596 url_list = variadic(url_list) # Passing a single URL is a common mistake
3597 outtmpl = self.params['outtmpl']['default']
3598 if (len(url_list) > 1
3599 and outtmpl != '-'
3600 and '%' not in outtmpl
3601 and self.params.get('max_downloads') != 1):
3602 raise SameFileError(outtmpl)
3604 for url in url_list:
3605 self.__download_wrapper(self.extract_info)(
3606 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3608 return self._download_retcode
3610 def download_with_info_file(self, info_filename):
3611 with contextlib.closing(fileinput.FileInput(
3612 [info_filename], mode='r',
3613 openhook=fileinput.hook_encoded('utf-8'))) as f:
3614 # FileInput doesn't have a read method, we can't call json.load
3615 infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3616 for info in variadic(json.loads('\n'.join(f)))]
3617 for info in infos:
3618 try:
3619 self.__download_wrapper(self.process_ie_result)(info, download=True)
3620 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3621 if not isinstance(e, EntryNotInPlaylist):
3622 self.to_stderr('\r')
3623 webpage_url = info.get('webpage_url')
3624 if webpage_url is None:
3625 raise
3626 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3627 self.download([webpage_url])
3628 except ExtractorError as e:
3629 self.report_error(e)
3630 return self._download_retcode
3632 @staticmethod
3633 def sanitize_info(info_dict, remove_private_keys=False):
3634 """ Sanitize the infodict for converting to json """
3635 if info_dict is None:
3636 return info_dict
3637 info_dict.setdefault('epoch', int(time.time()))
3638 info_dict.setdefault('_type', 'video')
3639 info_dict.setdefault('_version', {
3640 'version': __version__,
3641 'current_git_head': current_git_head(),
3642 'release_git_head': RELEASE_GIT_HEAD,
3643 'repository': ORIGIN,
3646 if remove_private_keys:
3647 reject = lambda k, v: v is None or k.startswith('__') or k in {
3648 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3649 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3650 'playlist_autonumber',
3652 else:
3653 reject = lambda k, v: False
3655 def filter_fn(obj):
3656 if isinstance(obj, dict):
3657 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3658 elif isinstance(obj, (list, tuple, set, LazyList)):
3659 return list(map(filter_fn, obj))
3660 elif obj is None or isinstance(obj, (str, int, float, bool)):
3661 return obj
3662 else:
3663 return repr(obj)
3665 return filter_fn(info_dict)
3667 @staticmethod
3668 def filter_requested_info(info_dict, actually_filter=True):
3669 """ Alias of sanitize_info for backward compatibility """
3670 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3672 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3673 for filename in set(filter(None, files_to_delete)):
3674 if msg:
3675 self.to_screen(msg % filename)
3676 try:
3677 os.remove(filename)
3678 except OSError:
3679 self.report_warning(f'Unable to delete file {filename}')
3680 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3681 del info['__files_to_move'][filename]
3683 @staticmethod
3684 def post_extract(info_dict):
3685 def actual_post_extract(info_dict):
3686 if info_dict.get('_type') in ('playlist', 'multi_video'):
3687 for video_dict in info_dict.get('entries', {}):
3688 actual_post_extract(video_dict or {})
3689 return
3691 post_extractor = info_dict.pop('__post_extractor', None) or dict
3692 info_dict.update(post_extractor())
3694 actual_post_extract(info_dict or {})
3696 def run_pp(self, pp, infodict):
3697 files_to_delete = []
3698 if '__files_to_move' not in infodict:
3699 infodict['__files_to_move'] = {}
3700 try:
3701 files_to_delete, infodict = pp.run(infodict)
3702 except PostProcessingError as e:
3703 # Must be True and not 'only_download'
3704 if self.params.get('ignoreerrors') is True:
3705 self.report_error(e)
3706 return infodict
3707 raise
3709 if not files_to_delete:
3710 return infodict
3711 if self.params.get('keepvideo', False):
3712 for f in files_to_delete:
3713 infodict['__files_to_move'].setdefault(f, '')
3714 else:
3715 self._delete_downloaded_files(
3716 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3717 return infodict
3719 def run_all_pps(self, key, info, *, additional_pps=None):
3720 if key != 'video':
3721 self._forceprint(key, info)
3722 for pp in (additional_pps or []) + self._pps[key]:
3723 info = self.run_pp(pp, info)
3724 return info
3726 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3727 info = dict(ie_info)
3728 info['__files_to_move'] = files_to_move or {}
3729 try:
3730 info = self.run_all_pps(key, info)
3731 except PostProcessingError as err:
3732 msg = f'Preprocessing: {err}'
3733 info.setdefault('__pending_error', msg)
3734 self.report_error(msg, is_error=False)
3735 return info, info.pop('__files_to_move', None)
3737 def post_process(self, filename, info, files_to_move=None):
3738 """Run all the postprocessors on the given file."""
3739 info['filepath'] = filename
3740 info['__files_to_move'] = files_to_move or {}
3741 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3742 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3743 del info['__files_to_move']
3744 return self.run_all_pps('after_move', info)
3746 def _make_archive_id(self, info_dict):
3747 video_id = info_dict.get('id')
3748 if not video_id:
3749 return
3750 # Future-proof against any change in case
3751 # and backwards compatibility with prior versions
3752 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3753 if extractor is None:
3754 url = str_or_none(info_dict.get('url'))
3755 if not url:
3756 return
3757 # Try to find matching extractor for the URL and take its ie_key
3758 for ie_key, ie in self._ies.items():
3759 if ie.suitable(url):
3760 extractor = ie_key
3761 break
3762 else:
3763 return
3764 return make_archive_id(extractor, video_id)
3766 def in_download_archive(self, info_dict):
3767 if not self.archive:
3768 return False
3770 vid_ids = [self._make_archive_id(info_dict)]
3771 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3772 return any(id_ in self.archive for id_ in vid_ids)
3774 def record_download_archive(self, info_dict):
3775 fn = self.params.get('download_archive')
3776 if fn is None:
3777 return
3778 vid_id = self._make_archive_id(info_dict)
3779 assert vid_id
3781 self.write_debug(f'Adding to archive: {vid_id}')
3782 if is_path_like(fn):
3783 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3784 archive_file.write(vid_id + '\n')
3785 self.archive.add(vid_id)
3787 @staticmethod
3788 def format_resolution(format, default='unknown'):
3789 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3790 return 'audio only'
3791 if format.get('resolution') is not None:
3792 return format['resolution']
3793 if format.get('width') and format.get('height'):
3794 return '%dx%d' % (format['width'], format['height'])
3795 elif format.get('height'):
3796 return '{}p'.format(format['height'])
3797 elif format.get('width'):
3798 return '%dx?' % format['width']
3799 return default
3801 def _list_format_headers(self, *headers):
3802 if self.params.get('listformats_table', True) is not False:
3803 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3804 return headers
3806 def _format_note(self, fdict):
3807 res = ''
3808 if fdict.get('ext') in ['f4f', 'f4m']:
3809 res += '(unsupported)'
3810 if fdict.get('language'):
3811 if res:
3812 res += ' '
3813 res += '[{}]'.format(fdict['language'])
3814 if fdict.get('format_note') is not None:
3815 if res:
3816 res += ' '
3817 res += fdict['format_note']
3818 if fdict.get('tbr') is not None:
3819 if res:
3820 res += ', '
3821 res += '%4dk' % fdict['tbr']
3822 if fdict.get('container') is not None:
3823 if res:
3824 res += ', '
3825 res += '{} container'.format(fdict['container'])
3826 if (fdict.get('vcodec') is not None
3827 and fdict.get('vcodec') != 'none'):
3828 if res:
3829 res += ', '
3830 res += fdict['vcodec']
3831 if fdict.get('vbr') is not None:
3832 res += '@'
3833 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3834 res += 'video@'
3835 if fdict.get('vbr') is not None:
3836 res += '%4dk' % fdict['vbr']
3837 if fdict.get('fps') is not None:
3838 if res:
3839 res += ', '
3840 res += '{}fps'.format(fdict['fps'])
3841 if fdict.get('acodec') is not None:
3842 if res:
3843 res += ', '
3844 if fdict['acodec'] == 'none':
3845 res += 'video only'
3846 else:
3847 res += '%-5s' % fdict['acodec']
3848 elif fdict.get('abr') is not None:
3849 if res:
3850 res += ', '
3851 res += 'audio'
3852 if fdict.get('abr') is not None:
3853 res += '@%3dk' % fdict['abr']
3854 if fdict.get('asr') is not None:
3855 res += ' (%5dHz)' % fdict['asr']
3856 if fdict.get('filesize') is not None:
3857 if res:
3858 res += ', '
3859 res += format_bytes(fdict['filesize'])
3860 elif fdict.get('filesize_approx') is not None:
3861 if res:
3862 res += ', '
3863 res += '~' + format_bytes(fdict['filesize_approx'])
3864 return res
3866 def _get_formats(self, info_dict):
3867 if info_dict.get('formats') is None:
3868 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3869 return [info_dict]
3870 return []
3871 return info_dict['formats']
3873 def render_formats_table(self, info_dict):
3874 formats = self._get_formats(info_dict)
3875 if not formats:
3876 return
3877 if not self.params.get('listformats_table', True) is not False:
3878 table = [
3880 format_field(f, 'format_id'),
3881 format_field(f, 'ext'),
3882 self.format_resolution(f),
3883 self._format_note(f),
3884 ] for f in formats if (f.get('preference') or 0) >= -1000]
3885 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3887 def simplified_codec(f, field):
3888 assert field in ('acodec', 'vcodec')
3889 codec = f.get(field)
3890 if not codec:
3891 return 'unknown'
3892 elif codec != 'none':
3893 return '.'.join(codec.split('.')[:4])
3895 if field == 'vcodec' and f.get('acodec') == 'none':
3896 return 'images'
3897 elif field == 'acodec' and f.get('vcodec') == 'none':
3898 return ''
3899 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3900 self.Styles.SUPPRESS)
3902 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3903 table = [
3905 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3906 format_field(f, 'ext'),
3907 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3908 format_field(f, 'fps', '\t%d', func=round),
3909 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3910 format_field(f, 'audio_channels', '\t%s'),
3911 delim, (
3912 format_field(f, 'filesize', ' \t%s', func=format_bytes)
3913 or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3914 or format_field(filesize_from_tbr(f.get('tbr'), info_dict.get('duration')), None,
3915 self._format_out('~\t%s', self.Styles.SUPPRESS), func=format_bytes)),
3916 format_field(f, 'tbr', '\t%dk', func=round),
3917 shorten_protocol_name(f.get('protocol', '')),
3918 delim,
3919 simplified_codec(f, 'vcodec'),
3920 format_field(f, 'vbr', '\t%dk', func=round),
3921 simplified_codec(f, 'acodec'),
3922 format_field(f, 'abr', '\t%dk', func=round),
3923 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3924 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3925 self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3926 (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3927 else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3928 format_field(f, 'format_note'),
3929 format_field(f, 'container', ignore=(None, f.get('ext'))),
3930 delim=', '), delim=' '),
3931 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3932 header_line = self._list_format_headers(
3933 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3934 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3936 return render_table(
3937 header_line, table, hide_empty=True,
3938 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3940 def render_thumbnails_table(self, info_dict):
3941 thumbnails = list(info_dict.get('thumbnails') or [])
3942 if not thumbnails:
3943 return None
3944 return render_table(
3945 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3946 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3948 def render_subtitles_table(self, video_id, subtitles):
3949 def _row(lang, formats):
3950 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3951 if len(set(names)) == 1:
3952 names = [] if names[0] == 'unknown' else names[:1]
3953 return [lang, ', '.join(names), ', '.join(exts)]
3955 if not subtitles:
3956 return None
3957 return render_table(
3958 self._list_format_headers('Language', 'Name', 'Formats'),
3959 [_row(lang, formats) for lang, formats in subtitles.items()],
3960 hide_empty=True)
3962 def __list_table(self, video_id, name, func, *args):
3963 table = func(*args)
3964 if not table:
3965 self.to_screen(f'{video_id} has no {name}')
3966 return
3967 self.to_screen(f'[info] Available {name} for {video_id}:')
3968 self.to_stdout(table)
3970 def list_formats(self, info_dict):
3971 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3973 def list_thumbnails(self, info_dict):
3974 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3976 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3977 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3979 def print_debug_header(self):
3980 if not self.params.get('verbose'):
3981 return
3983 from . import _IN_CLI # Must be delayed import
3985 # These imports can be slow. So import them only as needed
3986 from .extractor.extractors import _LAZY_LOADER
3987 from .extractor.extractors import (
3988 _PLUGIN_CLASSES as plugin_ies,
3989 _PLUGIN_OVERRIDES as plugin_ie_overrides,
3992 def get_encoding(stream):
3993 ret = str(getattr(stream, 'encoding', f'missing ({type(stream).__name__})'))
3994 additional_info = []
3995 if os.environ.get('TERM', '').lower() == 'dumb':
3996 additional_info.append('dumb')
3997 if not supports_terminal_sequences(stream):
3998 from .utils import WINDOWS_VT_MODE # Must be imported locally
3999 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
4000 if additional_info:
4001 ret = f'{ret} ({",".join(additional_info)})'
4002 return ret
4004 encoding_str = 'Encodings: locale {}, fs {}, pref {}, {}'.format(
4005 locale.getpreferredencoding(),
4006 sys.getfilesystemencoding(),
4007 self.get_encoding(),
4008 ', '.join(
4009 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
4010 if stream is not None and key != 'console'),
4013 logger = self.params.get('logger')
4014 if logger:
4015 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
4016 write_debug(encoding_str)
4017 else:
4018 write_string(f'[debug] {encoding_str}\n', encoding=None)
4019 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
4021 source = detect_variant()
4022 if VARIANT not in (None, 'pip'):
4023 source += '*'
4024 klass = type(self)
4025 write_debug(join_nonempty(
4026 f'{REPOSITORY.rpartition("/")[2]} version',
4027 _make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),
4028 f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
4029 '' if source == 'unknown' else f'({source})',
4030 '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
4031 delim=' '))
4033 if not _IN_CLI:
4034 write_debug(f'params: {self.params}')
4036 if not _LAZY_LOADER:
4037 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
4038 write_debug('Lazy loading extractors is forcibly disabled')
4039 else:
4040 write_debug('Lazy loading extractors is disabled')
4041 if self.params['compat_opts']:
4042 write_debug('Compatibility options: {}'.format(', '.join(self.params['compat_opts'])))
4044 if current_git_head():
4045 write_debug(f'Git HEAD: {current_git_head()}')
4046 write_debug(system_identifier())
4048 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
4049 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
4050 if ffmpeg_features:
4051 exe_versions['ffmpeg'] += ' ({})'.format(','.join(sorted(ffmpeg_features)))
4053 exe_versions['rtmpdump'] = rtmpdump_version()
4054 exe_versions['phantomjs'] = PhantomJSwrapper._version()
4055 exe_str = ', '.join(
4056 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
4057 ) or 'none'
4058 write_debug(f'exe versions: {exe_str}')
4060 from .compat.compat_utils import get_package_info
4061 from .dependencies import available_dependencies
4063 write_debug('Optional libraries: %s' % (', '.join(sorted({
4064 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
4065 })) or 'none'))
4067 write_debug(f'Proxy map: {self.proxies}')
4068 write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
4069 if os.environ.get('YTDLP_NO_PLUGINS'):
4070 write_debug('Plugins are forcibly disabled')
4071 return
4073 for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
4074 display_list = ['{}{}'.format(
4075 klass.__name__, '' if klass.__name__ == name else f' as {name}')
4076 for name, klass in plugins.items()]
4077 if plugin_type == 'Extractor':
4078 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
4079 for parent, plugins in plugin_ie_overrides.items())
4080 if not display_list:
4081 continue
4082 write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
4084 plugin_dirs = plugin_directories()
4085 if plugin_dirs:
4086 write_debug(f'Plugin directories: {plugin_dirs}')
4088 @functools.cached_property
4089 def proxies(self):
4090 """Global proxy configuration"""
4091 opts_proxy = self.params.get('proxy')
4092 if opts_proxy is not None:
4093 if opts_proxy == '':
4094 opts_proxy = '__noproxy__'
4095 proxies = {'all': opts_proxy}
4096 else:
4097 proxies = urllib.request.getproxies()
4098 # compat. Set HTTPS_PROXY to __noproxy__ to revert
4099 if 'http' in proxies and 'https' not in proxies:
4100 proxies['https'] = proxies['http']
4102 return proxies
4104 @functools.cached_property
4105 def cookiejar(self):
4106 """Global cookiejar instance"""
4107 try:
4108 return load_cookies(
4109 self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4110 except CookieLoadError as error:
4111 cause = error.__context__
4112 # compat: <=py3.9: `traceback.format_exception` has a different signature
4113 self.report_error(str(cause), tb=''.join(traceback.format_exception(None, cause, cause.__traceback__)))
4114 raise
4116 @property
4117 def _opener(self):
4119 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4121 self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
4122 handler = self._request_director.handlers['Urllib']
4123 return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4125 def _get_available_impersonate_targets(self):
4126 # TODO(future): make available as public API
4127 return [
4128 (target, rh.RH_NAME)
4129 for rh in self._request_director.handlers.values()
4130 if isinstance(rh, ImpersonateRequestHandler)
4131 for target in rh.supported_targets
4134 def _impersonate_target_available(self, target):
4135 # TODO(future): make available as public API
4136 return any(
4137 rh.is_supported_target(target)
4138 for rh in self._request_director.handlers.values()
4139 if isinstance(rh, ImpersonateRequestHandler))
4141 def urlopen(self, req):
4142 """ Start an HTTP download """
4143 if isinstance(req, str):
4144 req = Request(req)
4145 elif isinstance(req, urllib.request.Request):
4146 self.deprecation_warning(
4147 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4148 'Use yt_dlp.networking.common.Request instead.')
4149 req = urllib_req_to_req(req)
4150 assert isinstance(req, Request)
4152 # compat: Assume user:pass url params are basic auth
4153 url, basic_auth_header = extract_basic_auth(req.url)
4154 if basic_auth_header:
4155 req.headers['Authorization'] = basic_auth_header
4156 req.url = sanitize_url(url)
4158 clean_proxies(proxies=req.proxies, headers=req.headers)
4159 clean_headers(req.headers)
4161 try:
4162 return self._request_director.send(req)
4163 except NoSupportingHandlers as e:
4164 for ue in e.unsupported_errors:
4165 # FIXME: This depends on the order of errors.
4166 if not (ue.handler and ue.msg):
4167 continue
4168 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4169 raise RequestError(
4170 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4171 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
4172 if (
4173 'unsupported proxy type: "https"' in ue.msg.lower()
4174 and 'requests' not in self._request_director.handlers
4175 and 'curl_cffi' not in self._request_director.handlers
4177 raise RequestError(
4178 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests, curl_cffi')
4180 elif (
4181 re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
4182 and 'websockets' not in self._request_director.handlers
4184 raise RequestError(
4185 'This request requires WebSocket support. '
4186 'Ensure one of the following dependencies are installed: websockets',
4187 cause=ue) from ue
4189 elif re.match(r'unsupported (?:extensions: impersonate|impersonate target)', ue.msg.lower()):
4190 raise RequestError(
4191 f'Impersonate target "{req.extensions["impersonate"]}" is not available.'
4192 f' See --list-impersonate-targets for available targets.'
4193 f' This request requires browser impersonation, however you may be missing dependencies'
4194 f' required to support this target.')
4195 raise
4196 except SSLError as e:
4197 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4198 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4199 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4200 raise RequestError(
4201 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4202 'Try using --legacy-server-connect', cause=e) from e
4203 raise
4205 def build_request_director(self, handlers, preferences=None):
4206 logger = _YDLLogger(self)
4207 headers = self.params['http_headers'].copy()
4208 proxies = self.proxies.copy()
4209 clean_headers(headers)
4210 clean_proxies(proxies, headers)
4212 director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4213 for handler in handlers:
4214 director.add_handler(handler(
4215 logger=logger,
4216 headers=headers,
4217 cookiejar=self.cookiejar,
4218 proxies=proxies,
4219 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4220 verify=not self.params.get('nocheckcertificate'),
4221 **traverse_obj(self.params, {
4222 'verbose': 'debug_printtraffic',
4223 'source_address': 'source_address',
4224 'timeout': 'socket_timeout',
4225 'legacy_ssl_support': 'legacyserverconnect',
4226 'enable_file_urls': 'enable_file_urls',
4227 'impersonate': 'impersonate',
4228 'client_cert': {
4229 'client_certificate': 'client_certificate',
4230 'client_certificate_key': 'client_certificate_key',
4231 'client_certificate_password': 'client_certificate_password',
4235 director.preferences.update(preferences or [])
4236 if 'prefer-legacy-http-handler' in self.params['compat_opts']:
4237 director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
4238 return director
4240 @functools.cached_property
4241 def _request_director(self):
4242 return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
4244 def encode(self, s):
4245 if isinstance(s, bytes):
4246 return s # Already encoded
4248 try:
4249 return s.encode(self.get_encoding())
4250 except UnicodeEncodeError as err:
4251 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4252 raise
4254 def get_encoding(self):
4255 encoding = self.params.get('encoding')
4256 if encoding is None:
4257 encoding = preferredencoding()
4258 return encoding
4260 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4261 """ Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error """
4262 if overwrite is None:
4263 overwrite = self.params.get('overwrites', True)
4264 if not self.params.get('writeinfojson'):
4265 return False
4266 elif not infofn:
4267 self.write_debug(f'Skipping writing {label} infojson')
4268 return False
4269 elif not self._ensure_dir_exists(infofn):
4270 return None
4271 elif not overwrite and os.path.exists(infofn):
4272 self.to_screen(f'[info] {label.title()} metadata is already present')
4273 return 'exists'
4275 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4276 try:
4277 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4278 return True
4279 except OSError:
4280 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4281 return None
4283 def _write_description(self, label, ie_result, descfn):
4284 """ Write description and returns True = written, False = skip, None = error """
4285 if not self.params.get('writedescription'):
4286 return False
4287 elif not descfn:
4288 self.write_debug(f'Skipping writing {label} description')
4289 return False
4290 elif not self._ensure_dir_exists(descfn):
4291 return None
4292 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4293 self.to_screen(f'[info] {label.title()} description is already present')
4294 elif ie_result.get('description') is None:
4295 self.to_screen(f'[info] There\'s no {label} description to write')
4296 return False
4297 else:
4298 try:
4299 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4300 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
4301 descfile.write(ie_result['description'])
4302 except OSError:
4303 self.report_error(f'Cannot write {label} description file {descfn}')
4304 return None
4305 return True
4307 def _write_subtitles(self, info_dict, filename):
4308 """ Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error"""
4309 ret = []
4310 subtitles = info_dict.get('requested_subtitles')
4311 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4312 # subtitles download errors are already managed as troubles in relevant IE
4313 # that way it will silently go on when used with unsupporting IE
4314 return ret
4315 elif not subtitles:
4316 self.to_screen('[info] There are no subtitles for the requested languages')
4317 return ret
4318 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4319 if not sub_filename_base:
4320 self.to_screen('[info] Skipping writing video subtitles')
4321 return ret
4323 for sub_lang, sub_info in subtitles.items():
4324 sub_format = sub_info['ext']
4325 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4326 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4327 existing_sub = self.existing_file((sub_filename_final, sub_filename))
4328 if existing_sub:
4329 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4330 sub_info['filepath'] = existing_sub
4331 ret.append((existing_sub, sub_filename_final))
4332 continue
4334 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4335 if sub_info.get('data') is not None:
4336 try:
4337 # Use newline='' to prevent conversion of newline characters
4338 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4339 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4340 subfile.write(sub_info['data'])
4341 sub_info['filepath'] = sub_filename
4342 ret.append((sub_filename, sub_filename_final))
4343 continue
4344 except OSError:
4345 self.report_error(f'Cannot write video subtitles file {sub_filename}')
4346 return None
4348 try:
4349 sub_copy = sub_info.copy()
4350 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4351 self.dl(sub_filename, sub_copy, subtitle=True)
4352 sub_info['filepath'] = sub_filename
4353 ret.append((sub_filename, sub_filename_final))
4354 except (DownloadError, ExtractorError, OSError, ValueError, *network_exceptions) as err:
4355 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4356 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
4357 if not self.params.get('ignoreerrors'):
4358 self.report_error(msg)
4359 raise DownloadError(msg)
4360 self.report_warning(msg)
4361 return ret
4363 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4364 """ Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error """
4365 write_all = self.params.get('write_all_thumbnails', False)
4366 thumbnails, ret = [], []
4367 if write_all or self.params.get('writethumbnail', False):
4368 thumbnails = info_dict.get('thumbnails') or []
4369 if not thumbnails:
4370 self.to_screen(f'[info] There are no {label} thumbnails to download')
4371 return ret
4372 multiple = write_all and len(thumbnails) > 1
4374 if thumb_filename_base is None:
4375 thumb_filename_base = filename
4376 if thumbnails and not thumb_filename_base:
4377 self.write_debug(f'Skipping writing {label} thumbnail')
4378 return ret
4380 if thumbnails and not self._ensure_dir_exists(filename):
4381 return None
4383 for idx, t in list(enumerate(thumbnails))[::-1]:
4384 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4385 thumb_display_id = f'{label} thumbnail {t["id"]}'
4386 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4387 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4389 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4390 if existing_thumb:
4391 self.to_screen('[info] {} is already present'.format((
4392 thumb_display_id if multiple else f'{label} thumbnail').capitalize()))
4393 t['filepath'] = existing_thumb
4394 ret.append((existing_thumb, thumb_filename_final))
4395 else:
4396 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4397 try:
4398 uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
4399 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4400 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4401 shutil.copyfileobj(uf, thumbf)
4402 ret.append((thumb_filename, thumb_filename_final))
4403 t['filepath'] = thumb_filename
4404 except network_exceptions as err:
4405 if isinstance(err, HTTPError) and err.status == 404:
4406 self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4407 else:
4408 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4409 thumbnails.pop(idx)
4410 if ret and not write_all:
4411 break
4412 return ret