Release 2024.11.18
[yt-dlp3.git] / yt_dlp / YoutubeDL.py
blob749de5d4e3024d4c1f40d26b7d29fdea3c7b5acf
1 import collections
2 import contextlib
3 import copy
4 import datetime as dt
5 import errno
6 import fileinput
7 import functools
8 import http.cookiejar
9 import io
10 import itertools
11 import json
12 import locale
13 import operator
14 import os
15 import random
16 import re
17 import shutil
18 import string
19 import subprocess
20 import sys
21 import tempfile
22 import time
23 import tokenize
24 import traceback
25 import unicodedata
27 from .cache import Cache
28 from .compat import urllib # isort: split
29 from .compat import urllib_req_to_req
30 from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies
31 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
32 from .downloader.rtmp import rtmpdump_version
33 from .extractor import gen_extractor_classes, get_info_extractor
34 from .extractor.common import UnsupportedURLIE
35 from .extractor.openload import PhantomJSwrapper
36 from .minicurses import format_text
37 from .networking import HEADRequest, Request, RequestDirector
38 from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
39 from .networking.exceptions import (
40 HTTPError,
41 NoSupportingHandlers,
42 RequestError,
43 SSLError,
44 network_exceptions,
46 from .networking.impersonate import ImpersonateRequestHandler
47 from .plugins import directories as plugin_directories
48 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
49 from .postprocessor import (
50 EmbedThumbnailPP,
51 FFmpegFixupDuplicateMoovPP,
52 FFmpegFixupDurationPP,
53 FFmpegFixupM3u8PP,
54 FFmpegFixupM4aPP,
55 FFmpegFixupStretchedPP,
56 FFmpegFixupTimestampPP,
57 FFmpegMergerPP,
58 FFmpegPostProcessor,
59 FFmpegVideoConvertorPP,
60 MoveFilesAfterDownloadPP,
61 get_postprocessor,
63 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
64 from .update import (
65 REPOSITORY,
66 _get_system_deprecation,
67 _make_label,
68 current_git_head,
69 detect_variant,
71 from .utils import (
72 DEFAULT_OUTTMPL,
73 IDENTITY,
74 LINK_TEMPLATES,
75 MEDIA_EXTENSIONS,
76 NO_DEFAULT,
77 NUMBER_RE,
78 OUTTMPL_TYPES,
79 POSTPROCESS_WHEN,
80 STR_FORMAT_RE_TMPL,
81 STR_FORMAT_TYPES,
82 ContentTooShortError,
83 DateRange,
84 DownloadCancelled,
85 DownloadError,
86 EntryNotInPlaylist,
87 ExistingVideoReached,
88 ExtractorError,
89 FormatSorter,
90 GeoRestrictedError,
91 ISO3166Utils,
92 LazyList,
93 MaxDownloadsReached,
94 Namespace,
95 PagedList,
96 PlaylistEntries,
97 Popen,
98 PostProcessingError,
99 ReExtractInfo,
100 RejectedVideoReached,
101 SameFileError,
102 UnavailableVideoError,
103 UserNotLive,
104 YoutubeDLError,
105 age_restricted,
106 bug_reports_message,
107 date_from_str,
108 deprecation_warning,
109 determine_ext,
110 determine_protocol,
111 encode_compat_str,
112 escapeHTML,
113 expand_path,
114 extract_basic_auth,
115 filter_dict,
116 float_or_none,
117 format_bytes,
118 format_decimal_suffix,
119 format_field,
120 formatSeconds,
121 get_compatible_ext,
122 get_domain,
123 int_or_none,
124 iri_to_uri,
125 is_path_like,
126 join_nonempty,
127 locked_file,
128 make_archive_id,
129 make_dir,
130 number_of_digits,
131 orderedSet,
132 orderedSet_from_options,
133 parse_filesize,
134 preferredencoding,
135 prepend_extension,
136 remove_terminal_sequences,
137 render_table,
138 replace_extension,
139 sanitize_filename,
140 sanitize_path,
141 sanitize_url,
142 shell_quote,
143 str_or_none,
144 strftime_or_none,
145 subtitles_filename,
146 supports_terminal_sequences,
147 system_identifier,
148 filesize_from_tbr,
149 timetuple_from_msec,
150 to_high_limit_path,
151 traverse_obj,
152 try_call,
153 try_get,
154 url_basename,
155 variadic,
156 windows_enable_vt_mode,
157 write_json_file,
158 write_string,
160 from .utils._utils import _UnsafeExtensionError, _YDLLogger
161 from .utils.networking import (
162 HTTPHeaderDict,
163 clean_headers,
164 clean_proxies,
165 std_headers,
167 from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
169 if os.name == 'nt':
170 import ctypes
173 def _catch_unsafe_extension_error(func):
174 @functools.wraps(func)
175 def wrapper(self, *args, **kwargs):
176 try:
177 return func(self, *args, **kwargs)
178 except _UnsafeExtensionError as error:
179 self.report_error(
180 f'The extracted extension ({error.extension!r}) is unusual '
181 'and will be skipped for safety reasons. '
182 f'If you believe this is an error{bug_reports_message(",")}')
184 return wrapper
187 class YoutubeDL:
188 """YoutubeDL class.
190 YoutubeDL objects are the ones responsible of downloading the
191 actual video file and writing it to disk if the user has requested
192 it, among some other tasks. In most cases there should be one per
193 program. As, given a video URL, the downloader doesn't know how to
194 extract all the needed information, task that InfoExtractors do, it
195 has to pass the URL to one of them.
197 For this, YoutubeDL objects have a method that allows
198 InfoExtractors to be registered in a given order. When it is passed
199 a URL, the YoutubeDL object handles it to the first InfoExtractor it
200 finds that reports being able to handle it. The InfoExtractor extracts
201 all the information about the video or videos the URL refers to, and
202 YoutubeDL process the extracted information, possibly using a File
203 Downloader to download the video.
205 YoutubeDL objects accept a lot of parameters. In order not to saturate
206 the object constructor with arguments, it receives a dictionary of
207 options instead. These options are available through the params
208 attribute for the InfoExtractors to use. The YoutubeDL also
209 registers itself as the downloader in charge for the InfoExtractors
210 that are added to it, so this is a "mutual registration".
212 Available options:
214 username: Username for authentication purposes.
215 password: Password for authentication purposes.
216 videopassword: Password for accessing a video.
217 ap_mso: Adobe Pass multiple-system operator identifier.
218 ap_username: Multiple-system operator account username.
219 ap_password: Multiple-system operator account password.
220 usenetrc: Use netrc for authentication instead.
221 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
222 netrc_cmd: Use a shell command to get credentials
223 verbose: Print additional info to stdout.
224 quiet: Do not print messages to stdout.
225 no_warnings: Do not print out anything for warnings.
226 forceprint: A dict with keys WHEN mapped to a list of templates to
227 print to stdout. The allowed keys are video or any of the
228 items in utils.POSTPROCESS_WHEN.
229 For compatibility, a single list is also accepted
230 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
231 a list of tuples with (template, filename)
232 forcejson: Force printing info_dict as JSON.
233 dump_single_json: Force printing the info_dict of the whole playlist
234 (or video) as a single JSON line.
235 force_write_download_archive: Force writing download archive regardless
236 of 'skip_download' or 'simulate'.
237 simulate: Do not download the video files. If unset (or None),
238 simulate only if listsubtitles, listformats or list_thumbnails is used
239 format: Video format code. see "FORMAT SELECTION" for more details.
240 You can also pass a function. The function takes 'ctx' as
241 argument and returns the formats to download.
242 See "build_format_selector" for an implementation
243 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
244 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
245 extracting metadata even if the video is not actually
246 available for download (experimental)
247 format_sort: A list of fields by which to sort the video formats.
248 See "Sorting Formats" for more details.
249 format_sort_force: Force the given format_sort. see "Sorting Formats"
250 for more details.
251 prefer_free_formats: Whether to prefer video formats with free containers
252 over non-free ones of the same quality.
253 allow_multiple_video_streams: Allow multiple video streams to be merged
254 into a single file
255 allow_multiple_audio_streams: Allow multiple audio streams to be merged
256 into a single file
257 check_formats Whether to test if the formats are downloadable.
258 Can be True (check all), False (check none),
259 'selected' (check selected formats),
260 or None (check only if requested by extractor)
261 paths: Dictionary of output paths. The allowed keys are 'home'
262 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
263 outtmpl: Dictionary of templates for output names. Allowed keys
264 are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
265 For compatibility with youtube-dl, a single string can also be used
266 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
267 restrictfilenames: Do not allow "&" and spaces in file names
268 trim_file_name: Limit length of filename (extension excluded)
269 windowsfilenames: Force the filenames to be windows compatible
270 ignoreerrors: Do not stop on download/postprocessing errors.
271 Can be 'only_download' to ignore only download errors.
272 Default is 'only_download' for CLI, but False for API
273 skip_playlist_after_errors: Number of allowed failures until the rest of
274 the playlist is skipped
275 allowed_extractors: List of regexes to match against extractor names that are allowed
276 overwrites: Overwrite all video and metadata files if True,
277 overwrite only non-video files if None
278 and don't overwrite any file if False
279 playlist_items: Specific indices of playlist to download.
280 playlistrandom: Download playlist items in random order.
281 lazy_playlist: Process playlist entries as they are received.
282 matchtitle: Download only matching titles.
283 rejecttitle: Reject downloads for matching titles.
284 logger: Log messages to a logging.Logger instance.
285 logtostderr: Print everything to stderr instead of stdout.
286 consoletitle: Display progress in the console window's titlebar.
287 writedescription: Write the video description to a .description file
288 writeinfojson: Write the video description to a .info.json file
289 clean_infojson: Remove internal metadata from the infojson
290 getcomments: Extract video comments. This will not be written to disk
291 unless writeinfojson is also given
292 writeannotations: Write the video annotations to a .annotations.xml file
293 writethumbnail: Write the thumbnail image to a file
294 allow_playlist_files: Whether to write playlists' description, infojson etc
295 also to disk when using the 'write*' options
296 write_all_thumbnails: Write all thumbnail formats to files
297 writelink: Write an internet shortcut file, depending on the
298 current platform (.url/.webloc/.desktop)
299 writeurllink: Write a Windows internet shortcut file (.url)
300 writewebloclink: Write a macOS internet shortcut file (.webloc)
301 writedesktoplink: Write a Linux internet shortcut file (.desktop)
302 writesubtitles: Write the video subtitles to a file
303 writeautomaticsub: Write the automatically generated subtitles to a file
304 listsubtitles: Lists all available subtitles for the video
305 subtitlesformat: The format code for subtitles
306 subtitleslangs: List of languages of the subtitles to download (can be regex).
307 The list may contain "all" to refer to all the available
308 subtitles. The language can be prefixed with a "-" to
309 exclude it from the requested languages, e.g. ['all', '-live_chat']
310 keepvideo: Keep the video file after post-processing
311 daterange: A utils.DateRange object, download only if the upload_date is in the range.
312 skip_download: Skip the actual download of the video file
313 cachedir: Location of the cache files in the filesystem.
314 False to disable filesystem cache.
315 noplaylist: Download single video instead of a playlist if in doubt.
316 age_limit: An integer representing the user's age in years.
317 Unsuitable videos for the given age are skipped.
318 min_views: An integer representing the minimum view count the video
319 must have in order to not be skipped.
320 Videos without view count information are always
321 downloaded. None for no limit.
322 max_views: An integer representing the maximum view count.
323 Videos that are more popular than that are not
324 downloaded.
325 Videos without view count information are always
326 downloaded. None for no limit.
327 download_archive: A set, or the name of a file where all downloads are recorded.
328 Videos already present in the file are not downloaded again.
329 break_on_existing: Stop the download process after attempting to download a
330 file that is in the archive.
331 break_per_url: Whether break_on_reject and break_on_existing
332 should act on each input URL as opposed to for the entire queue
333 cookiefile: File name or text stream from where cookies should be read and dumped to
334 cookiesfrombrowser: A tuple containing the name of the browser, the profile
335 name/path from where cookies are loaded, the name of the keyring,
336 and the container name, e.g. ('chrome', ) or
337 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
338 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
339 support RFC 5746 secure renegotiation
340 nocheckcertificate: Do not verify SSL certificates
341 client_certificate: Path to client certificate file in PEM format. May include the private key
342 client_certificate_key: Path to private key file for client certificate
343 client_certificate_password: Password for client certificate private key, if encrypted.
344 If not provided and the key is encrypted, yt-dlp will ask interactively
345 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
346 (Only supported by some extractors)
347 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
348 http_headers: A dictionary of custom headers to be used for all requests
349 proxy: URL of the proxy server to use
350 geo_verification_proxy: URL of the proxy to use for IP address verification
351 on geo-restricted sites.
352 socket_timeout: Time to wait for unresponsive hosts, in seconds
353 bidi_workaround: Work around buggy terminals without bidirectional text
354 support, using fridibi
355 debug_printtraffic:Print out sent and received HTTP traffic
356 default_search: Prepend this string if an input url is not valid.
357 'auto' for elaborate guessing
358 encoding: Use this encoding instead of the system-specified.
359 extract_flat: Whether to resolve and process url_results further
360 * False: Always process. Default for API
361 * True: Never process
362 * 'in_playlist': Do not process inside playlist/multi_video
363 * 'discard': Always process, but don't return the result
364 from inside playlist/multi_video
365 * 'discard_in_playlist': Same as "discard", but only for
366 playlists (not multi_video). Default for CLI
367 wait_for_video: If given, wait for scheduled streams to become available.
368 The value should be a tuple containing the range
369 (min_secs, max_secs) to wait between retries
370 postprocessors: A list of dictionaries, each with an entry
371 * key: The name of the postprocessor. See
372 yt_dlp/postprocessor/__init__.py for a list.
373 * when: When to run the postprocessor. Allowed values are
374 the entries of utils.POSTPROCESS_WHEN
375 Assumed to be 'post_process' if not given
376 progress_hooks: A list of functions that get called on download
377 progress, with a dictionary with the entries
378 * status: One of "downloading", "error", or "finished".
379 Check this first and ignore unknown values.
380 * info_dict: The extracted info_dict
382 If status is one of "downloading", or "finished", the
383 following properties may also be present:
384 * filename: The final filename (always present)
385 * tmpfilename: The filename we're currently writing to
386 * downloaded_bytes: Bytes on disk
387 * total_bytes: Size of the whole file, None if unknown
388 * total_bytes_estimate: Guess of the eventual file size,
389 None if unavailable.
390 * elapsed: The number of seconds since download started.
391 * eta: The estimated time in seconds, None if unknown
392 * speed: The download speed in bytes/second, None if
393 unknown
394 * fragment_index: The counter of the currently
395 downloaded video fragment.
396 * fragment_count: The number of fragments (= individual
397 files that will be merged)
399 Progress hooks are guaranteed to be called at least once
400 (with status "finished") if the download is successful.
401 postprocessor_hooks: A list of functions that get called on postprocessing
402 progress, with a dictionary with the entries
403 * status: One of "started", "processing", or "finished".
404 Check this first and ignore unknown values.
405 * postprocessor: Name of the postprocessor
406 * info_dict: The extracted info_dict
408 Progress hooks are guaranteed to be called at least twice
409 (with status "started" and "finished") if the processing is successful.
410 merge_output_format: "/" separated list of extensions to use when merging formats.
411 final_ext: Expected final extension; used to detect when the file was
412 already downloaded and converted
413 fixup: Automatically correct known faults of the file.
414 One of:
415 - "never": do nothing
416 - "warn": only emit a warning
417 - "detect_or_warn": check whether we can do anything
418 about it, warn otherwise (default)
419 source_address: Client-side IP address to bind to.
420 impersonate: Client to impersonate for requests.
421 An ImpersonateTarget (from yt_dlp.networking.impersonate)
422 sleep_interval_requests: Number of seconds to sleep between requests
423 during extraction
424 sleep_interval: Number of seconds to sleep before each download when
425 used alone or a lower bound of a range for randomized
426 sleep before each download (minimum possible number
427 of seconds to sleep) when used along with
428 max_sleep_interval.
429 max_sleep_interval:Upper bound of a range for randomized sleep before each
430 download (maximum possible number of seconds to sleep).
431 Must only be used along with sleep_interval.
432 Actual sleep time will be a random float from range
433 [sleep_interval; max_sleep_interval].
434 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
435 listformats: Print an overview of available video formats and exit.
436 list_thumbnails: Print a table of all thumbnails and exit.
437 match_filter: A function that gets called for every video with the signature
438 (info_dict, *, incomplete: bool) -> Optional[str]
439 For backward compatibility with youtube-dl, the signature
440 (info_dict) -> Optional[str] is also allowed.
441 - If it returns a message, the video is ignored.
442 - If it returns None, the video is downloaded.
443 - If it returns utils.NO_DEFAULT, the user is interactively
444 asked whether to download the video.
445 - Raise utils.DownloadCancelled(msg) to abort remaining
446 downloads when a video is rejected.
447 match_filter_func in utils/_utils.py is one example for this.
448 color: A Dictionary with output stream names as keys
449 and their respective color policy as values.
450 Can also just be a single color policy,
451 in which case it applies to all outputs.
452 Valid stream names are 'stdout' and 'stderr'.
453 Valid color policies are one of 'always', 'auto',
454 'no_color', 'never', 'auto-tty' or 'no_color-tty'.
455 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
456 HTTP header
457 geo_bypass_country:
458 Two-letter ISO 3166-2 country code that will be used for
459 explicit geographic restriction bypassing via faking
460 X-Forwarded-For HTTP header
461 geo_bypass_ip_block:
462 IP range in CIDR notation that will be used similarly to
463 geo_bypass_country
464 external_downloader: A dictionary of protocol keys and the executable of the
465 external downloader to use for it. The allowed protocols
466 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
467 Set the value to 'native' to use the native downloader
468 compat_opts: Compatibility options. See "Differences in default behavior".
469 The following options do not work when used through the API:
470 filename, abort-on-error, multistreams, no-live-chat,
471 format-sort, no-clean-infojson, no-playlist-metafiles,
472 no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort.
473 Refer __init__.py for their implementation
474 progress_template: Dictionary of templates for progress outputs.
475 Allowed keys are 'download', 'postprocess',
476 'download-title' (console title) and 'postprocess-title'.
477 The template is mapped on a dictionary with keys 'progress' and 'info'
478 retry_sleep_functions: Dictionary of functions that takes the number of attempts
479 as argument and returns the time to sleep in seconds.
480 Allowed keys are 'http', 'fragment', 'file_access'
481 download_ranges: A callback function that gets called for every video with
482 the signature (info_dict, ydl) -> Iterable[Section].
483 Only the returned sections will be downloaded.
484 Each Section is a dict with the following keys:
485 * start_time: Start time of the section in seconds
486 * end_time: End time of the section in seconds
487 * title: Section title (Optional)
488 * index: Section number (Optional)
489 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
490 noprogress: Do not print the progress bar
491 live_from_start: Whether to download livestreams videos from the start
493 The following parameters are not used by YoutubeDL itself, they are used by
494 the downloader (see yt_dlp/downloader/common.py):
495 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
496 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
497 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
498 external_downloader_args, concurrent_fragment_downloads, progress_delta.
500 The following options are used by the post processors:
501 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
502 to the binary or its containing directory.
503 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
504 and a list of additional command-line arguments for the
505 postprocessor/executable. The dict can also have "PP+EXE" keys
506 which are used when the given exe is used by the given PP.
507 Use 'default' as the name for arguments to passed to all PP
508 For compatibility with youtube-dl, a single list of args
509 can also be used
511 The following options are used by the extractors:
512 extractor_retries: Number of times to retry for known errors (default: 3)
513 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
514 hls_split_discontinuity: Split HLS playlists into different formats at
515 discontinuities such as ad breaks (default: False)
516 extractor_args: A dictionary of arguments to be passed to the extractors.
517 See "EXTRACTOR ARGUMENTS" for details.
518 E.g. {'youtube': {'skip': ['dash', 'hls']}}
519 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
521 The following options are deprecated and may be removed in the future:
523 break_on_reject: Stop the download process when encountering a video that
524 has been filtered out.
525 - `raise DownloadCancelled(msg)` in match_filter instead
526 force_generic_extractor: Force downloader to use the generic extractor
527 - Use allowed_extractors = ['generic', 'default']
528 playliststart: - Use playlist_items
529 Playlist item to start at.
530 playlistend: - Use playlist_items
531 Playlist item to end at.
532 playlistreverse: - Use playlist_items
533 Download playlist items in reverse order.
534 forceurl: - Use forceprint
535 Force printing final URL.
536 forcetitle: - Use forceprint
537 Force printing title.
538 forceid: - Use forceprint
539 Force printing ID.
540 forcethumbnail: - Use forceprint
541 Force printing thumbnail URL.
542 forcedescription: - Use forceprint
543 Force printing description.
544 forcefilename: - Use forceprint
545 Force printing final filename.
546 forceduration: - Use forceprint
547 Force printing duration.
548 allsubtitles: - Use subtitleslangs = ['all']
549 Downloads all the subtitles of the video
550 (requires writesubtitles or writeautomaticsub)
551 include_ads: - Doesn't work
552 Download ads as well
553 call_home: - Not implemented
554 Boolean, true if we are allowed to contact the
555 yt-dlp servers for debugging.
556 post_hooks: - Register a custom postprocessor
557 A list of functions that get called as the final step
558 for each video file, after all postprocessors have been
559 called. The filename will be passed as the only argument.
560 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
561 Use the native HLS downloader instead of ffmpeg/avconv
562 if True, otherwise use ffmpeg/avconv if False, otherwise
563 use downloader suggested by extractor if None.
564 prefer_ffmpeg: - avconv support is deprecated
565 If False, use avconv instead of ffmpeg if both are available,
566 otherwise prefer ffmpeg.
567 youtube_include_dash_manifest: - Use extractor_args
568 If True (default), DASH manifests and related
569 data will be downloaded and processed by extractor.
570 You can reduce network I/O by disabling it if you don't
571 care about DASH. (only for youtube)
572 youtube_include_hls_manifest: - Use extractor_args
573 If True (default), HLS manifests and related
574 data will be downloaded and processed by extractor.
575 You can reduce network I/O by disabling it if you don't
576 care about HLS. (only for youtube)
577 no_color: Same as `color='no_color'`
578 no_overwrites: Same as `overwrites=False`
581 _NUMERIC_FIELDS = {
582 'width', 'height', 'asr', 'audio_channels', 'fps',
583 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
584 'timestamp', 'release_timestamp',
585 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
586 'average_rating', 'comment_count', 'age_limit',
587 'start_time', 'end_time',
588 'chapter_number', 'season_number', 'episode_number',
589 'track_number', 'disc_number', 'release_year',
592 _format_fields = {
593 # NB: Keep in sync with the docstring of extractor/common.py
594 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
595 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
596 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
597 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
598 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
599 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url',
600 'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version',
601 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
603 _deprecated_multivalue_fields = {
604 'album_artist': 'album_artists',
605 'artist': 'artists',
606 'composer': 'composers',
607 'creator': 'creators',
608 'genre': 'genres',
610 _format_selection_exts = {
611 'audio': set(MEDIA_EXTENSIONS.common_audio),
612 'video': {*MEDIA_EXTENSIONS.common_video, '3gp'},
613 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
616 def __init__(self, params=None, auto_init=True):
617 """Create a FileDownloader object with the given options.
618 @param auto_init Whether to load the default extractors and print header (if verbose).
619 Set to 'no_verbose_header' to not print the header
621 if params is None:
622 params = {}
623 self.params = params
624 self._ies = {}
625 self._ies_instances = {}
626 self._pps = {k: [] for k in POSTPROCESS_WHEN}
627 self._printed_messages = set()
628 self._first_webpage_request = True
629 self._post_hooks = []
630 self._progress_hooks = []
631 self._postprocessor_hooks = []
632 self._download_retcode = 0
633 self._num_downloads = 0
634 self._num_videos = 0
635 self._playlist_level = 0
636 self._playlist_urls = set()
637 self.cache = Cache(self)
638 self.__header_cookies = []
640 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
641 self._out_files = Namespace(
642 out=stdout,
643 error=sys.stderr,
644 screen=sys.stderr if self.params.get('quiet') else stdout,
645 console=None if os.name == 'nt' else next(
646 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None),
649 try:
650 windows_enable_vt_mode()
651 except Exception as e:
652 self.write_debug(f'Failed to enable VT mode: {e}')
654 if self.params.get('no_color'):
655 if self.params.get('color') is not None:
656 self.params.setdefault('_warnings', []).append(
657 'Overwriting params from "color" with "no_color"')
658 self.params['color'] = 'no_color'
660 term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
661 base_no_color = bool(os.getenv('NO_COLOR'))
663 def process_color_policy(stream):
664 stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
665 policy = traverse_obj(self.params, ('color', (stream_name, None), {str}, any)) or 'auto'
666 if policy in ('auto', 'auto-tty', 'no_color-tty'):
667 no_color = base_no_color
668 if policy.endswith('tty'):
669 no_color = policy.startswith('no_color')
670 if term_allow_color and supports_terminal_sequences(stream):
671 return 'no_color' if no_color else True
672 return False
673 assert policy in ('always', 'never', 'no_color'), policy
674 return {'always': True, 'never': False}.get(policy, policy)
676 self._allow_colors = Namespace(**{
677 name: process_color_policy(stream)
678 for name, stream in self._out_files.items_ if name != 'console'
681 system_deprecation = _get_system_deprecation()
682 if system_deprecation:
683 self.deprecated_feature(system_deprecation.replace('\n', '\n '))
685 if self.params.get('allow_unplayable_formats'):
686 self.report_warning(
687 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
688 'This is a developer option intended for debugging. \n'
689 ' If you experience any issues while using this option, '
690 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
692 if self.params.get('bidi_workaround', False):
693 try:
694 import pty
695 master, slave = pty.openpty()
696 width = shutil.get_terminal_size().columns
697 width_args = [] if width is None else ['-w', str(width)]
698 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
699 try:
700 self._output_process = Popen(['bidiv', *width_args], **sp_kwargs)
701 except OSError:
702 self._output_process = Popen(['fribidi', '-c', 'UTF-8', *width_args], **sp_kwargs)
703 self._output_channel = os.fdopen(master, 'rb')
704 except OSError as ose:
705 if ose.errno == errno.ENOENT:
706 self.report_warning(
707 'Could not find fribidi executable, ignoring --bidi-workaround. '
708 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
709 else:
710 raise
712 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
713 self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
714 self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
715 self.params['http_headers'].pop('Cookie', None)
717 if auto_init and auto_init != 'no_verbose_header':
718 self.print_debug_header()
720 def check_deprecated(param, option, suggestion):
721 if self.params.get(param) is not None:
722 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
723 return True
724 return False
726 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
727 if self.params.get('geo_verification_proxy') is None:
728 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
730 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
731 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
732 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
734 for msg in self.params.get('_warnings', []):
735 self.report_warning(msg)
736 for msg in self.params.get('_deprecation_warnings', []):
737 self.deprecated_feature(msg)
739 if impersonate_target := self.params.get('impersonate'):
740 if not self._impersonate_target_available(impersonate_target):
741 raise YoutubeDLError(
742 f'Impersonate target "{impersonate_target}" is not available. '
743 f'Use --list-impersonate-targets to see available targets. '
744 f'You may be missing dependencies required to support this target.')
746 if 'list-formats' in self.params['compat_opts']:
747 self.params['listformats_table'] = False
749 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
750 # nooverwrites was unnecessarily changed to overwrites
751 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
752 # This ensures compatibility with both keys
753 self.params['overwrites'] = not self.params['nooverwrites']
754 elif self.params.get('overwrites') is None:
755 self.params.pop('overwrites', None)
756 else:
757 self.params['nooverwrites'] = not self.params['overwrites']
759 if self.params.get('simulate') is None and any((
760 self.params.get('list_thumbnails'),
761 self.params.get('listformats'),
762 self.params.get('listsubtitles'),
764 self.params['simulate'] = 'list_only'
766 self.params.setdefault('forceprint', {})
767 self.params.setdefault('print_to_file', {})
769 # Compatibility with older syntax
770 if not isinstance(params['forceprint'], dict):
771 self.params['forceprint'] = {'video': params['forceprint']}
773 if auto_init:
774 self.add_default_info_extractors()
776 if (sys.platform != 'win32'
777 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
778 and not self.params.get('restrictfilenames', False)):
779 # Unicode filesystem API will throw errors (#1474, #13027)
780 self.report_warning(
781 'Assuming --restrict-filenames since file system encoding '
782 'cannot encode all characters. '
783 'Set the LC_ALL environment variable to fix this.')
784 self.params['restrictfilenames'] = True
786 self._parse_outtmpl()
788 # Creating format selector here allows us to catch syntax errors before the extraction
789 self.format_selector = (
790 self.params.get('format') if self.params.get('format') in (None, '-')
791 else self.params['format'] if callable(self.params['format'])
792 else self.build_format_selector(self.params['format']))
794 hooks = {
795 'post_hooks': self.add_post_hook,
796 'progress_hooks': self.add_progress_hook,
797 'postprocessor_hooks': self.add_postprocessor_hook,
799 for opt, fn in hooks.items():
800 for ph in self.params.get(opt, []):
801 fn(ph)
803 for pp_def_raw in self.params.get('postprocessors', []):
804 pp_def = dict(pp_def_raw)
805 when = pp_def.pop('when', 'post_process')
806 self.add_post_processor(
807 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
808 when=when)
810 def preload_download_archive(fn):
811 """Preload the archive, if any is specified"""
812 archive = set()
813 if fn is None:
814 return archive
815 elif not is_path_like(fn):
816 return fn
818 self.write_debug(f'Loading archive file {fn!r}')
819 try:
820 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
821 for line in archive_file:
822 archive.add(line.strip())
823 except OSError as ioe:
824 if ioe.errno != errno.ENOENT:
825 raise
826 return archive
828 self.archive = preload_download_archive(self.params.get('download_archive'))
830 def warn_if_short_id(self, argv):
831 # short YouTube ID starting with dash?
832 idxs = [
833 i for i, a in enumerate(argv)
834 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
835 if idxs:
836 correct_argv = (
837 ['yt-dlp']
838 + [a for i, a in enumerate(argv) if i not in idxs]
839 + ['--'] + [argv[i] for i in idxs]
841 self.report_warning(
842 'Long argument string detected. '
843 f'Use -- to separate parameters and URLs, like this:\n{shell_quote(correct_argv)}')
845 def add_info_extractor(self, ie):
846 """Add an InfoExtractor object to the end of the list."""
847 ie_key = ie.ie_key()
848 self._ies[ie_key] = ie
849 if not isinstance(ie, type):
850 self._ies_instances[ie_key] = ie
851 ie.set_downloader(self)
853 def get_info_extractor(self, ie_key):
855 Get an instance of an IE with name ie_key, it will try to get one from
856 the _ies list, if there's no instance it will create a new one and add
857 it to the extractor list.
859 ie = self._ies_instances.get(ie_key)
860 if ie is None:
861 ie = get_info_extractor(ie_key)()
862 self.add_info_extractor(ie)
863 return ie
865 def add_default_info_extractors(self):
867 Add the InfoExtractors returned by gen_extractors to the end of the list
869 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
870 all_ies['end'] = UnsupportedURLIE()
871 try:
872 ie_names = orderedSet_from_options(
873 self.params.get('allowed_extractors', ['default']), {
874 'all': list(all_ies),
875 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
876 }, use_regex=True)
877 except re.error as e:
878 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
879 for name in ie_names:
880 self.add_info_extractor(all_ies[name])
881 self.write_debug(f'Loaded {len(ie_names)} extractors')
883 def add_post_processor(self, pp, when='post_process'):
884 """Add a PostProcessor object to the end of the chain."""
885 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
886 self._pps[when].append(pp)
887 pp.set_downloader(self)
889 def add_post_hook(self, ph):
890 """Add the post hook"""
891 self._post_hooks.append(ph)
893 def add_progress_hook(self, ph):
894 """Add the download progress hook"""
895 self._progress_hooks.append(ph)
897 def add_postprocessor_hook(self, ph):
898 """Add the postprocessing progress hook"""
899 self._postprocessor_hooks.append(ph)
900 for pps in self._pps.values():
901 for pp in pps:
902 pp.add_progress_hook(ph)
904 def _bidi_workaround(self, message):
905 if not hasattr(self, '_output_channel'):
906 return message
908 assert hasattr(self, '_output_process')
909 assert isinstance(message, str)
910 line_count = message.count('\n') + 1
911 self._output_process.stdin.write((message + '\n').encode())
912 self._output_process.stdin.flush()
913 res = ''.join(self._output_channel.readline().decode()
914 for _ in range(line_count))
915 return res[:-len('\n')]
917 def _write_string(self, message, out=None, only_once=False):
918 if only_once:
919 if message in self._printed_messages:
920 return
921 self._printed_messages.add(message)
922 write_string(message, out=out, encoding=self.params.get('encoding'))
924 def to_stdout(self, message, skip_eol=False, quiet=None):
925 """Print message to stdout"""
926 if quiet is not None:
927 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
928 'Use "YoutubeDL.to_screen" instead')
929 if skip_eol is not False:
930 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
931 'Use "YoutubeDL.to_screen" instead')
932 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
934 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
935 """Print message to screen if not in quiet mode"""
936 if self.params.get('logger'):
937 self.params['logger'].debug(message)
938 return
939 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
940 return
941 self._write_string(
942 '{}{}'.format(self._bidi_workaround(message), ('' if skip_eol else '\n')),
943 self._out_files.screen, only_once=only_once)
945 def to_stderr(self, message, only_once=False):
946 """Print message to stderr"""
947 assert isinstance(message, str)
948 if self.params.get('logger'):
949 self.params['logger'].error(message)
950 else:
951 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
953 def _send_console_code(self, code):
954 if os.name == 'nt' or not self._out_files.console:
955 return
956 self._write_string(code, self._out_files.console)
958 def to_console_title(self, message):
959 if not self.params.get('consoletitle', False):
960 return
961 message = remove_terminal_sequences(message)
962 if os.name == 'nt':
963 if ctypes.windll.kernel32.GetConsoleWindow():
964 # c_wchar_p() might not be necessary if `message` is
965 # already of type unicode()
966 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
967 else:
968 self._send_console_code(f'\033]0;{message}\007')
970 def save_console_title(self):
971 if not self.params.get('consoletitle') or self.params.get('simulate'):
972 return
973 self._send_console_code('\033[22;0t') # Save the title on stack
975 def restore_console_title(self):
976 if not self.params.get('consoletitle') or self.params.get('simulate'):
977 return
978 self._send_console_code('\033[23;0t') # Restore the title from stack
980 def __enter__(self):
981 self.save_console_title()
982 return self
984 def save_cookies(self):
985 if self.params.get('cookiefile') is not None:
986 self.cookiejar.save()
988 def __exit__(self, *args):
989 self.restore_console_title()
990 self.close()
992 def close(self):
993 self.save_cookies()
994 if '_request_director' in self.__dict__:
995 self._request_director.close()
996 del self._request_director
998 def trouble(self, message=None, tb=None, is_error=True):
999 """Determine action to take when a download problem appears.
1001 Depending on if the downloader has been configured to ignore
1002 download errors or not, this method may throw an exception or
1003 not when errors are found, after printing the message.
1005 @param tb If given, is additional traceback information
1006 @param is_error Whether to raise error according to ignorerrors
1008 if message is not None:
1009 self.to_stderr(message)
1010 if self.params.get('verbose'):
1011 if tb is None:
1012 if sys.exc_info()[0]: # if .trouble has been called from an except block
1013 tb = ''
1014 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
1015 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
1016 tb += encode_compat_str(traceback.format_exc())
1017 else:
1018 tb_data = traceback.format_list(traceback.extract_stack())
1019 tb = ''.join(tb_data)
1020 if tb:
1021 self.to_stderr(tb)
1022 if not is_error:
1023 return
1024 if not self.params.get('ignoreerrors'):
1025 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
1026 exc_info = sys.exc_info()[1].exc_info
1027 else:
1028 exc_info = sys.exc_info()
1029 raise DownloadError(message, exc_info)
1030 self._download_retcode = 1
1032 Styles = Namespace(
1033 HEADERS='yellow',
1034 EMPHASIS='light blue',
1035 FILENAME='green',
1036 ID='green',
1037 DELIM='blue',
1038 ERROR='red',
1039 BAD_FORMAT='light red',
1040 WARNING='yellow',
1041 SUPPRESS='light black',
1044 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
1045 text = str(text)
1046 if test_encoding:
1047 original_text = text
1048 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1049 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1050 text = text.encode(encoding, 'ignore').decode(encoding)
1051 if fallback is not None and text != original_text:
1052 text = fallback
1053 return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1055 def _format_out(self, *args, **kwargs):
1056 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1058 def _format_screen(self, *args, **kwargs):
1059 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1061 def _format_err(self, *args, **kwargs):
1062 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1064 def report_warning(self, message, only_once=False):
1066 Print the message to stderr, it will be prefixed with 'WARNING:'
1067 If stderr is a tty file the 'WARNING:' will be colored
1069 if self.params.get('logger') is not None:
1070 self.params['logger'].warning(message)
1071 else:
1072 if self.params.get('no_warnings'):
1073 return
1074 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1076 def deprecation_warning(self, message, *, stacklevel=0):
1077 deprecation_warning(
1078 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1080 def deprecated_feature(self, message):
1081 if self.params.get('logger') is not None:
1082 self.params['logger'].warning(f'Deprecated Feature: {message}')
1083 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1085 def report_error(self, message, *args, **kwargs):
1087 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1088 in red if stderr is a tty file.
1090 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1092 def write_debug(self, message, only_once=False):
1093 """Log debug message or Print message to stderr"""
1094 if not self.params.get('verbose', False):
1095 return
1096 message = f'[debug] {message}'
1097 if self.params.get('logger'):
1098 self.params['logger'].debug(message)
1099 else:
1100 self.to_stderr(message, only_once)
1102 def report_file_already_downloaded(self, file_name):
1103 """Report file has already been fully downloaded."""
1104 try:
1105 self.to_screen(f'[download] {file_name} has already been downloaded')
1106 except UnicodeEncodeError:
1107 self.to_screen('[download] The file has already been downloaded')
1109 def report_file_delete(self, file_name):
1110 """Report that existing file will be deleted."""
1111 try:
1112 self.to_screen(f'Deleting existing file {file_name}')
1113 except UnicodeEncodeError:
1114 self.to_screen('Deleting existing file')
1116 def raise_no_formats(self, info, forced=False, *, msg=None):
1117 has_drm = info.get('_has_drm')
1118 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1119 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1120 if forced or not ignored:
1121 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1122 expected=has_drm or ignored or expected)
1123 else:
1124 self.report_warning(msg)
1126 def parse_outtmpl(self):
1127 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1128 self._parse_outtmpl()
1129 return self.params['outtmpl']
1131 def _parse_outtmpl(self):
1132 sanitize = IDENTITY
1133 if self.params.get('restrictfilenames'): # Remove spaces in the default template
1134 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1136 outtmpl = self.params.setdefault('outtmpl', {})
1137 if not isinstance(outtmpl, dict):
1138 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1139 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1141 def get_output_path(self, dir_type='', filename=None):
1142 paths = self.params.get('paths', {})
1143 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1144 path = os.path.join(
1145 expand_path(paths.get('home', '').strip()),
1146 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1147 filename or '')
1148 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1150 @staticmethod
1151 def _outtmpl_expandpath(outtmpl):
1152 # expand_path translates '%%' into '%' and '$$' into '$'
1153 # correspondingly that is not what we want since we need to keep
1154 # '%%' intact for template dict substitution step. Working around
1155 # with boundary-alike separator hack.
1156 sep = ''.join(random.choices(string.ascii_letters, k=32))
1157 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1159 # outtmpl should be expand_path'ed before template dict substitution
1160 # because meta fields may contain env variables we don't want to
1161 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1162 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1163 return expand_path(outtmpl).replace(sep, '')
1165 @staticmethod
1166 def escape_outtmpl(outtmpl):
1167 """ Escape any remaining strings like %s, %abc% etc. """
1168 return re.sub(
1169 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1170 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1171 outtmpl)
1173 @classmethod
1174 def validate_outtmpl(cls, outtmpl):
1175 """ @return None or Exception object """
1176 outtmpl = re.sub(
1177 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1178 lambda mobj: f'{mobj.group(0)[:-1]}s',
1179 cls._outtmpl_expandpath(outtmpl))
1180 try:
1181 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1182 return None
1183 except ValueError as err:
1184 return err
1186 @staticmethod
1187 def _copy_infodict(info_dict):
1188 info_dict = dict(info_dict)
1189 info_dict.pop('__postprocessors', None)
1190 info_dict.pop('__pending_error', None)
1191 return info_dict
1193 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1194 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1195 @param sanitize Whether to sanitize the output as a filename.
1196 For backward compatibility, a function can also be passed
1199 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1201 info_dict = self._copy_infodict(info_dict)
1202 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1203 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1204 if info_dict.get('duration', None) is not None
1205 else None)
1206 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1207 info_dict['video_autonumber'] = self._num_videos
1208 if info_dict.get('resolution') is None:
1209 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1211 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1212 # of %(field)s to %(field)0Nd for backward compatibility
1213 field_size_compat_map = {
1214 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1215 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1216 'autonumber': self.params.get('autonumber_size') or 5,
1219 TMPL_DICT = {}
1220 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1221 MATH_FUNCTIONS = {
1222 '+': float.__add__,
1223 '-': float.__sub__,
1224 '*': float.__mul__,
1226 # Field is of the form key1.key2...
1227 # where keys (except first) can be string, int, slice or "{field, ...}"
1228 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} # noqa: UP031
1229 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { # noqa: UP031
1230 'inner': FIELD_INNER_RE,
1231 'field': rf'\w*(?:\.{FIELD_INNER_RE})*',
1233 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1234 MATH_OPERATORS_RE = r'(?:{})'.format('|'.join(map(re.escape, MATH_FUNCTIONS.keys())))
1235 INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1236 (?P<negate>-)?
1237 (?P<fields>{FIELD_RE})
1238 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1239 (?:>(?P<strf_format>.+?))?
1240 (?P<remaining>
1241 (?P<alternate>(?<!\\),[^|&)]+)?
1242 (?:&(?P<replacement>.*?))?
1243 (?:\|(?P<default>.*?))?
1244 )$''')
1246 def _from_user_input(field):
1247 if field == ':':
1248 return ...
1249 elif ':' in field:
1250 return slice(*map(int_or_none, field.split(':')))
1251 elif int_or_none(field) is not None:
1252 return int(field)
1253 return field
1255 def _traverse_infodict(fields):
1256 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1257 for f in ([x] if x.startswith('{') else x.split('.'))]
1258 for i in (0, -1):
1259 if fields and not fields[i]:
1260 fields.pop(i)
1262 for i, f in enumerate(fields):
1263 if not f.startswith('{'):
1264 fields[i] = _from_user_input(f)
1265 continue
1266 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1267 fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}
1269 return traverse_obj(info_dict, fields, traverse_string=True)
1271 def get_value(mdict):
1272 # Object traversal
1273 value = _traverse_infodict(mdict['fields'])
1274 # Negative
1275 if mdict['negate']:
1276 value = float_or_none(value)
1277 if value is not None:
1278 value *= -1
1279 # Do maths
1280 offset_key = mdict['maths']
1281 if offset_key:
1282 value = float_or_none(value)
1283 operator = None
1284 while offset_key:
1285 item = re.match(
1286 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1287 offset_key).group(0)
1288 offset_key = offset_key[len(item):]
1289 if operator is None:
1290 operator = MATH_FUNCTIONS[item]
1291 continue
1292 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1293 offset = float_or_none(item)
1294 if offset is None:
1295 offset = float_or_none(_traverse_infodict(item))
1296 try:
1297 value = operator(value, multiplier * offset)
1298 except (TypeError, ZeroDivisionError):
1299 return None
1300 operator = None
1301 # Datetime formatting
1302 if mdict['strf_format']:
1303 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1305 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1306 if sanitize and value == '':
1307 value = None
1308 return value
1310 na = self.params.get('outtmpl_na_placeholder', 'NA')
1312 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1313 return sanitize_filename(str(value), restricted=restricted, is_id=(
1314 bool(re.search(r'(^|[_.])id(\.|$)', key))
1315 if 'filename-sanitization' in self.params['compat_opts']
1316 else NO_DEFAULT))
1318 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1319 sanitize = bool(sanitize)
1321 def _dumpjson_default(obj):
1322 if isinstance(obj, (set, LazyList)):
1323 return list(obj)
1324 return repr(obj)
1326 class _ReplacementFormatter(string.Formatter):
1327 def get_field(self, field_name, args, kwargs):
1328 if field_name.isdigit():
1329 return args[0], -1
1330 raise ValueError('Unsupported field')
1332 replacement_formatter = _ReplacementFormatter()
1334 def create_key(outer_mobj):
1335 if not outer_mobj.group('has_key'):
1336 return outer_mobj.group(0)
1337 key = outer_mobj.group('key')
1338 mobj = re.match(INTERNAL_FORMAT_RE, key)
1339 value, replacement, default, last_field = None, None, na, ''
1340 while mobj:
1341 mobj = mobj.groupdict()
1342 default = mobj['default'] if mobj['default'] is not None else default
1343 value = get_value(mobj)
1344 last_field, replacement = mobj['fields'], mobj['replacement']
1345 if value is None and mobj['alternate']:
1346 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1347 else:
1348 break
1350 if None not in (value, replacement):
1351 try:
1352 value = replacement_formatter.format(replacement, value)
1353 except ValueError:
1354 value, default = None, na
1356 fmt = outer_mobj.group('format')
1357 if fmt == 's' and last_field in field_size_compat_map and isinstance(value, int):
1358 fmt = f'0{field_size_compat_map[last_field]:d}d'
1360 flags = outer_mobj.group('conversion') or ''
1361 str_fmt = f'{fmt[:-1]}s'
1362 if value is None:
1363 value, fmt = default, 's'
1364 elif fmt[-1] == 'l': # list
1365 delim = '\n' if '#' in flags else ', '
1366 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1367 elif fmt[-1] == 'j': # json
1368 value, fmt = json.dumps(
1369 value, default=_dumpjson_default,
1370 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1371 elif fmt[-1] == 'h': # html
1372 value, fmt = escapeHTML(str(value)), str_fmt
1373 elif fmt[-1] == 'q': # quoted
1374 value = map(str, variadic(value) if '#' in flags else [value])
1375 value, fmt = shell_quote(value, shell=True), str_fmt
1376 elif fmt[-1] == 'B': # bytes
1377 value = f'%{str_fmt}'.encode() % str(value).encode()
1378 value, fmt = value.decode('utf-8', 'ignore'), 's'
1379 elif fmt[-1] == 'U': # unicode normalized
1380 value, fmt = unicodedata.normalize(
1381 # "+" = compatibility equivalence, "#" = NFD
1382 'NF{}{}'.format('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1383 value), str_fmt
1384 elif fmt[-1] == 'D': # decimal suffix
1385 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1386 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1387 factor=1024 if '#' in flags else 1000)
1388 elif fmt[-1] == 'S': # filename sanitization
1389 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1390 elif fmt[-1] == 'c':
1391 if value:
1392 value = str(value)[0]
1393 else:
1394 fmt = str_fmt
1395 elif fmt[-1] not in 'rsa': # numeric
1396 value = float_or_none(value)
1397 if value is None:
1398 value, fmt = default, 's'
1400 if sanitize:
1401 # If value is an object, sanitize might convert it to a string
1402 # So we convert it to repr first
1403 if fmt[-1] == 'r':
1404 value, fmt = repr(value), str_fmt
1405 elif fmt[-1] == 'a':
1406 value, fmt = ascii(value), str_fmt
1407 if fmt[-1] in 'csra':
1408 value = sanitizer(last_field, value)
1410 key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
1411 TMPL_DICT[key] = value
1412 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1414 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1416 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1417 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1418 return self.escape_outtmpl(outtmpl) % info_dict
1420 @_catch_unsafe_extension_error
1421 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1422 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1423 if outtmpl is None:
1424 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1425 try:
1426 outtmpl = self._outtmpl_expandpath(outtmpl)
1427 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1428 if not filename:
1429 return None
1431 if tmpl_type in ('', 'temp'):
1432 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1433 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1434 filename = replace_extension(filename, ext, final_ext)
1435 elif tmpl_type:
1436 force_ext = OUTTMPL_TYPES[tmpl_type]
1437 if force_ext:
1438 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1440 # https://github.com/blackjack4494/youtube-dlc/issues/85
1441 trim_file_name = self.params.get('trim_file_name', False)
1442 if trim_file_name:
1443 no_ext, *ext = filename.rsplit('.', 2)
1444 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1446 return filename
1447 except ValueError as err:
1448 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1449 return None
1451 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1452 """Generate the output filename"""
1453 if outtmpl:
1454 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1455 dir_type = None
1456 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1457 if not filename and dir_type not in ('', 'temp'):
1458 return ''
1460 if warn:
1461 if not self.params.get('paths'):
1462 pass
1463 elif filename == '-':
1464 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1465 elif os.path.isabs(filename):
1466 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1467 if filename == '-' or not filename:
1468 return filename
1470 return self.get_output_path(dir_type, filename)
1472 def _match_entry(self, info_dict, incomplete=False, silent=False):
1473 """Returns None if the file should be downloaded"""
1474 _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1475 assert incomplete or _type == 'video', 'Only video result can be considered complete'
1477 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1479 def check_filter():
1480 if _type in ('playlist', 'multi_video'):
1481 return
1482 elif _type in ('url', 'url_transparent') and not try_call(
1483 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1484 return
1486 if 'title' in info_dict:
1487 # This can happen when we're just evaluating the playlist
1488 title = info_dict['title']
1489 matchtitle = self.params.get('matchtitle', False)
1490 if matchtitle:
1491 if not re.search(matchtitle, title, re.IGNORECASE):
1492 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1493 rejecttitle = self.params.get('rejecttitle', False)
1494 if rejecttitle:
1495 if re.search(rejecttitle, title, re.IGNORECASE):
1496 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1498 date = info_dict.get('upload_date')
1499 if date is not None:
1500 date_range = self.params.get('daterange', DateRange())
1501 if date not in date_range:
1502 return f'{date_from_str(date).isoformat()} upload date is not in range {date_range}'
1503 view_count = info_dict.get('view_count')
1504 if view_count is not None:
1505 min_views = self.params.get('min_views')
1506 if min_views is not None and view_count < min_views:
1507 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1508 max_views = self.params.get('max_views')
1509 if max_views is not None and view_count > max_views:
1510 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1511 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1512 return f'Skipping "{video_title}" because it is age restricted'
1514 match_filter = self.params.get('match_filter')
1515 if match_filter is None:
1516 return None
1518 cancelled = None
1519 try:
1520 try:
1521 ret = match_filter(info_dict, incomplete=incomplete)
1522 except TypeError:
1523 # For backward compatibility
1524 ret = None if incomplete else match_filter(info_dict)
1525 except DownloadCancelled as err:
1526 if err.msg is not NO_DEFAULT:
1527 raise
1528 ret, cancelled = err.msg, err
1530 if ret is NO_DEFAULT:
1531 while True:
1532 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1533 reply = input(self._format_screen(
1534 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1535 if reply in {'y', ''}:
1536 return None
1537 elif reply == 'n':
1538 if cancelled:
1539 raise type(cancelled)(f'Skipping {video_title}')
1540 return f'Skipping {video_title}'
1541 return ret
1543 if self.in_download_archive(info_dict):
1544 reason = ''.join((
1545 format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
1546 format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1547 'has already been recorded in the archive'))
1548 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1549 else:
1550 try:
1551 reason = check_filter()
1552 except DownloadCancelled as e:
1553 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1554 else:
1555 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1556 if reason is not None:
1557 if not silent:
1558 self.to_screen('[download] ' + reason)
1559 if self.params.get(break_opt, False):
1560 raise break_err()
1561 return reason
1563 @staticmethod
1564 def add_extra_info(info_dict, extra_info):
1565 """Set the keys from extra_info in info dict if they are missing"""
1566 for key, value in extra_info.items():
1567 info_dict.setdefault(key, value)
1569 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1570 process=True, force_generic_extractor=False):
1572 Extract and return the information dictionary of the URL
1574 Arguments:
1575 @param url URL to extract
1577 Keyword arguments:
1578 @param download Whether to download videos
1579 @param process Whether to resolve all unresolved references (URLs, playlist items).
1580 Must be True for download to work
1581 @param ie_key Use only the extractor with this key
1583 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1584 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1587 if extra_info is None:
1588 extra_info = {}
1590 if not ie_key and force_generic_extractor:
1591 ie_key = 'Generic'
1593 if ie_key:
1594 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1595 else:
1596 ies = self._ies
1598 for key, ie in ies.items():
1599 if not ie.suitable(url):
1600 continue
1602 if not ie.working():
1603 self.report_warning('The program functionality for this site has been marked as broken, '
1604 'and will probably not work.')
1606 temp_id = ie.get_temp_id(url)
1607 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1608 self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1609 'has already been recorded in the archive')
1610 if self.params.get('break_on_existing', False):
1611 raise ExistingVideoReached
1612 break
1613 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1614 else:
1615 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1616 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1617 tb=False if extractors_restricted else None)
1619 def _handle_extraction_exceptions(func):
1620 @functools.wraps(func)
1621 def wrapper(self, *args, **kwargs):
1622 while True:
1623 try:
1624 return func(self, *args, **kwargs)
1625 except (CookieLoadError, DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1626 raise
1627 except ReExtractInfo as e:
1628 if e.expected:
1629 self.to_screen(f'{e}; Re-extracting data')
1630 else:
1631 self.to_stderr('\r')
1632 self.report_warning(f'{e}; Re-extracting data')
1633 continue
1634 except GeoRestrictedError as e:
1635 msg = e.msg
1636 if e.countries:
1637 msg += '\nThis video is available in {}.'.format(', '.join(
1638 map(ISO3166Utils.short2full, e.countries)))
1639 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1640 self.report_error(msg)
1641 except ExtractorError as e: # An error we somewhat expected
1642 self.report_error(str(e), e.format_traceback())
1643 except Exception as e:
1644 if self.params.get('ignoreerrors'):
1645 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1646 else:
1647 raise
1648 break
1649 return wrapper
1651 def _wait_for_video(self, ie_result={}):
1652 if (not self.params.get('wait_for_video')
1653 or ie_result.get('_type', 'video') != 'video'
1654 or ie_result.get('formats') or ie_result.get('url')):
1655 return
1657 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1658 last_msg = ''
1660 def progress(msg):
1661 nonlocal last_msg
1662 full_msg = f'{msg}\n'
1663 if not self.params.get('noprogress'):
1664 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1665 elif last_msg:
1666 return
1667 self.to_screen(full_msg, skip_eol=True)
1668 last_msg = msg
1670 min_wait, max_wait = self.params.get('wait_for_video')
1671 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1672 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1673 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1674 self.report_warning('Release time of video is not known')
1675 elif ie_result and (diff or 0) <= 0:
1676 self.report_warning('Video should already be available according to extracted info')
1677 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1678 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1680 wait_till = time.time() + diff
1681 try:
1682 while True:
1683 diff = wait_till - time.time()
1684 if diff <= 0:
1685 progress('')
1686 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1687 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1688 time.sleep(1)
1689 except KeyboardInterrupt:
1690 progress('')
1691 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1692 except BaseException as e:
1693 if not isinstance(e, ReExtractInfo):
1694 self.to_screen('')
1695 raise
1697 def _load_cookies(self, data, *, autoscope=True):
1698 """Loads cookies from a `Cookie` header
1700 This tries to work around the security vulnerability of passing cookies to every domain.
1701 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1703 @param data The Cookie header as string to load the cookies from
1704 @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1705 If `True`, save cookies for later to be stored in the jar with a limited scope
1706 If a URL, save cookies in the jar with the domain of the URL
1708 for cookie in LenientSimpleCookie(data).values():
1709 if autoscope and any(cookie.values()):
1710 raise ValueError('Invalid syntax in Cookie Header')
1712 domain = cookie.get('domain') or ''
1713 expiry = cookie.get('expires')
1714 if expiry == '': # 0 is valid
1715 expiry = None
1716 prepared_cookie = http.cookiejar.Cookie(
1717 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1718 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1719 cookie.get('secure') or False, expiry, False, None, None, {})
1721 if domain:
1722 self.cookiejar.set_cookie(prepared_cookie)
1723 elif autoscope is True:
1724 self.deprecated_feature(
1725 'Passing cookies as a header is a potential security risk; '
1726 'they will be scoped to the domain of the downloaded urls. '
1727 'Please consider loading cookies from a file or browser instead.')
1728 self.__header_cookies.append(prepared_cookie)
1729 elif autoscope:
1730 self.report_warning(
1731 'The extractor result contains an unscoped cookie as an HTTP header. '
1732 f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1733 only_once=True)
1734 self._apply_header_cookies(autoscope, [prepared_cookie])
1735 else:
1736 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1737 tb=False, is_error=False)
1739 def _apply_header_cookies(self, url, cookies=None):
1740 """Applies stray header cookies to the provided url
1742 This loads header cookies and scopes them to the domain provided in `url`.
1743 While this is not ideal, it helps reduce the risk of them being sent
1744 to an unintended destination while mostly maintaining compatibility.
1746 parsed = urllib.parse.urlparse(url)
1747 if not parsed.hostname:
1748 return
1750 for cookie in map(copy.copy, cookies or self.__header_cookies):
1751 cookie.domain = f'.{parsed.hostname}'
1752 self.cookiejar.set_cookie(cookie)
1754 @_handle_extraction_exceptions
1755 def __extract_info(self, url, ie, download, extra_info, process):
1756 self._apply_header_cookies(url)
1758 try:
1759 ie_result = ie.extract(url)
1760 except UserNotLive as e:
1761 if process:
1762 if self.params.get('wait_for_video'):
1763 self.report_warning(e)
1764 self._wait_for_video()
1765 raise
1766 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1767 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1768 return
1769 if isinstance(ie_result, list):
1770 # Backwards compatibility: old IE result format
1771 ie_result = {
1772 '_type': 'compat_list',
1773 'entries': ie_result,
1775 if extra_info.get('original_url'):
1776 ie_result.setdefault('original_url', extra_info['original_url'])
1777 self.add_default_extra_info(ie_result, ie, url)
1778 if process:
1779 self._wait_for_video(ie_result)
1780 return self.process_ie_result(ie_result, download, extra_info)
1781 else:
1782 return ie_result
1784 def add_default_extra_info(self, ie_result, ie, url):
1785 if url is not None:
1786 self.add_extra_info(ie_result, {
1787 'webpage_url': url,
1788 'original_url': url,
1790 webpage_url = ie_result.get('webpage_url')
1791 if webpage_url:
1792 self.add_extra_info(ie_result, {
1793 'webpage_url_basename': url_basename(webpage_url),
1794 'webpage_url_domain': get_domain(webpage_url),
1796 if ie is not None:
1797 self.add_extra_info(ie_result, {
1798 'extractor': ie.IE_NAME,
1799 'extractor_key': ie.ie_key(),
1802 def process_ie_result(self, ie_result, download=True, extra_info=None):
1804 Take the result of the ie(may be modified) and resolve all unresolved
1805 references (URLs, playlist items).
1807 It will also download the videos if 'download'.
1808 Returns the resolved ie_result.
1810 if extra_info is None:
1811 extra_info = {}
1812 result_type = ie_result.get('_type', 'video')
1814 if result_type in ('url', 'url_transparent'):
1815 ie_result['url'] = sanitize_url(
1816 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1817 if ie_result.get('original_url') and not extra_info.get('original_url'):
1818 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1820 extract_flat = self.params.get('extract_flat', False)
1821 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1822 or extract_flat is True):
1823 info_copy = ie_result.copy()
1824 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1825 if ie and not ie_result.get('id'):
1826 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1827 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1828 self.add_extra_info(info_copy, extra_info)
1829 info_copy, _ = self.pre_process(info_copy)
1830 self._fill_common_fields(info_copy, False)
1831 self.__forced_printings(info_copy)
1832 self._raise_pending_errors(info_copy)
1833 if self.params.get('force_write_download_archive', False):
1834 self.record_download_archive(info_copy)
1835 return ie_result
1837 if result_type == 'video':
1838 self.add_extra_info(ie_result, extra_info)
1839 ie_result = self.process_video_result(ie_result, download=download)
1840 self._raise_pending_errors(ie_result)
1841 additional_urls = (ie_result or {}).get('additional_urls')
1842 if additional_urls:
1843 # TODO: Improve MetadataParserPP to allow setting a list
1844 if isinstance(additional_urls, str):
1845 additional_urls = [additional_urls]
1846 self.to_screen(
1847 '[info] {}: {} additional URL(s) requested'.format(ie_result['id'], len(additional_urls)))
1848 self.write_debug('Additional URLs: "{}"'.format('", "'.join(additional_urls)))
1849 ie_result['additional_entries'] = [
1850 self.extract_info(
1851 url, download, extra_info=extra_info,
1852 force_generic_extractor=self.params.get('force_generic_extractor'))
1853 for url in additional_urls
1855 return ie_result
1856 elif result_type == 'url':
1857 # We have to add extra_info to the results because it may be
1858 # contained in a playlist
1859 return self.extract_info(
1860 ie_result['url'], download,
1861 ie_key=ie_result.get('ie_key'),
1862 extra_info=extra_info)
1863 elif result_type == 'url_transparent':
1864 # Use the information from the embedding page
1865 info = self.extract_info(
1866 ie_result['url'], ie_key=ie_result.get('ie_key'),
1867 extra_info=extra_info, download=False, process=False)
1869 # extract_info may return None when ignoreerrors is enabled and
1870 # extraction failed with an error, don't crash and return early
1871 # in this case
1872 if not info:
1873 return info
1875 exempted_fields = {'_type', 'url', 'ie_key'}
1876 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1877 # For video clips, the id etc of the clip extractor should be used
1878 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1880 new_result = info.copy()
1881 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1883 # Extracted info may not be a video result (i.e.
1884 # info.get('_type', 'video') != video) but rather an url or
1885 # url_transparent. In such cases outer metadata (from ie_result)
1886 # should be propagated to inner one (info). For this to happen
1887 # _type of info should be overridden with url_transparent. This
1888 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1889 if new_result.get('_type') == 'url':
1890 new_result['_type'] = 'url_transparent'
1892 return self.process_ie_result(
1893 new_result, download=download, extra_info=extra_info)
1894 elif result_type in ('playlist', 'multi_video'):
1895 # Protect from infinite recursion due to recursively nested playlists
1896 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1897 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1898 if webpage_url and webpage_url in self._playlist_urls:
1899 self.to_screen(
1900 '[download] Skipping already downloaded playlist: {}'.format(
1901 ie_result.get('title')) or ie_result.get('id'))
1902 return
1904 self._playlist_level += 1
1905 self._playlist_urls.add(webpage_url)
1906 self._fill_common_fields(ie_result, False)
1907 self._sanitize_thumbnails(ie_result)
1908 try:
1909 return self.__process_playlist(ie_result, download)
1910 finally:
1911 self._playlist_level -= 1
1912 if not self._playlist_level:
1913 self._playlist_urls.clear()
1914 elif result_type == 'compat_list':
1915 self.report_warning(
1916 'Extractor {} returned a compat_list result. '
1917 'It needs to be updated.'.format(ie_result.get('extractor')))
1919 def _fixup(r):
1920 self.add_extra_info(r, {
1921 'extractor': ie_result['extractor'],
1922 'webpage_url': ie_result['webpage_url'],
1923 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1924 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1925 'extractor_key': ie_result['extractor_key'],
1927 return r
1928 ie_result['entries'] = [
1929 self.process_ie_result(_fixup(r), download, extra_info)
1930 for r in ie_result['entries']
1932 return ie_result
1933 else:
1934 raise Exception(f'Invalid result type: {result_type}')
1936 def _ensure_dir_exists(self, path):
1937 return make_dir(path, self.report_error)
1939 @staticmethod
1940 def _playlist_infodict(ie_result, strict=False, **kwargs):
1941 info = {
1942 'playlist_count': ie_result.get('playlist_count'),
1943 'playlist': ie_result.get('title') or ie_result.get('id'),
1944 'playlist_id': ie_result.get('id'),
1945 'playlist_title': ie_result.get('title'),
1946 'playlist_uploader': ie_result.get('uploader'),
1947 'playlist_uploader_id': ie_result.get('uploader_id'),
1948 'playlist_channel': ie_result.get('channel'),
1949 'playlist_channel_id': ie_result.get('channel_id'),
1950 **kwargs,
1952 if strict:
1953 return info
1954 if ie_result.get('webpage_url'):
1955 info.update({
1956 'webpage_url': ie_result['webpage_url'],
1957 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1958 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1960 return {
1961 **info,
1962 'playlist_index': 0,
1963 '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1964 'extractor': ie_result['extractor'],
1965 'extractor_key': ie_result['extractor_key'],
1968 def __process_playlist(self, ie_result, download):
1969 """Process each entry in the playlist"""
1970 assert ie_result['_type'] in ('playlist', 'multi_video')
1972 common_info = self._playlist_infodict(ie_result, strict=True)
1973 title = common_info.get('playlist') or '<Untitled>'
1974 if self._match_entry(common_info, incomplete=True) is not None:
1975 return
1976 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1978 all_entries = PlaylistEntries(self, ie_result)
1979 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1981 lazy = self.params.get('lazy_playlist')
1982 if lazy:
1983 resolved_entries, n_entries = [], 'N/A'
1984 ie_result['requested_entries'], ie_result['entries'] = None, None
1985 else:
1986 entries = resolved_entries = list(entries)
1987 n_entries = len(resolved_entries)
1988 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1989 if not ie_result.get('playlist_count'):
1990 # Better to do this after potentially exhausting entries
1991 ie_result['playlist_count'] = all_entries.get_full_count()
1993 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1994 ie_copy = collections.ChainMap(ie_result, extra)
1996 _infojson_written = False
1997 write_playlist_files = self.params.get('allow_playlist_files', True)
1998 if write_playlist_files and self.params.get('list_thumbnails'):
1999 self.list_thumbnails(ie_result)
2000 if write_playlist_files and not self.params.get('simulate'):
2001 _infojson_written = self._write_info_json(
2002 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
2003 if _infojson_written is None:
2004 return
2005 if self._write_description('playlist', ie_result,
2006 self.prepare_filename(ie_copy, 'pl_description')) is None:
2007 return
2008 # TODO: This should be passed to ThumbnailsConvertor if necessary
2009 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
2011 if lazy:
2012 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
2013 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
2014 elif self.params.get('playlistreverse'):
2015 entries.reverse()
2016 elif self.params.get('playlistrandom'):
2017 random.shuffle(entries)
2019 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
2020 f'{format_field(ie_result, "playlist_count", " of %s")}')
2022 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
2023 if self.params.get('extract_flat') == 'discard_in_playlist':
2024 keep_resolved_entries = ie_result['_type'] != 'playlist'
2025 if keep_resolved_entries:
2026 self.write_debug('The information of all playlist entries will be held in memory')
2028 failures = 0
2029 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
2030 for i, (playlist_index, entry) in enumerate(entries):
2031 if lazy:
2032 resolved_entries.append((playlist_index, entry))
2033 if not entry:
2034 continue
2036 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
2037 if not lazy and 'playlist-index' in self.params['compat_opts']:
2038 playlist_index = ie_result['requested_entries'][i]
2040 entry_copy = collections.ChainMap(entry, {
2041 **common_info,
2042 'n_entries': int_or_none(n_entries),
2043 'playlist_index': playlist_index,
2044 'playlist_autonumber': i + 1,
2047 if self._match_entry(entry_copy, incomplete=True) is not None:
2048 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
2049 resolved_entries[i] = (playlist_index, NO_DEFAULT)
2050 continue
2052 self.to_screen(
2053 f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} '
2054 f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}')
2056 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
2057 'playlist_index': playlist_index,
2058 'playlist_autonumber': i + 1,
2059 }, extra))
2060 if not entry_result:
2061 failures += 1
2062 if failures >= max_failures:
2063 self.report_error(
2064 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2065 break
2066 if keep_resolved_entries:
2067 resolved_entries[i] = (playlist_index, entry_result)
2069 # Update with processed data
2070 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
2071 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2072 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2073 # Do not set for full playlist
2074 ie_result.pop('requested_entries')
2076 # Write the updated info to json
2077 if _infojson_written is True and self._write_info_json(
2078 'updated playlist', ie_result,
2079 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2080 return
2082 ie_result = self.run_all_pps('playlist', ie_result)
2083 self.to_screen(f'[download] Finished downloading playlist: {title}')
2084 return ie_result
2086 @_handle_extraction_exceptions
2087 def __process_iterable_entry(self, entry, download, extra_info):
2088 return self.process_ie_result(
2089 entry, download=download, extra_info=extra_info)
2091 def _build_format_filter(self, filter_spec):
2092 " Returns a function to filter the formats according to the filter_spec "
2094 OPERATORS = {
2095 '<': operator.lt,
2096 '<=': operator.le,
2097 '>': operator.gt,
2098 '>=': operator.ge,
2099 '=': operator.eq,
2100 '!=': operator.ne,
2102 operator_rex = re.compile(r'''(?x)\s*
2103 (?P<key>[\w.-]+)\s*
2104 (?P<op>{})(?P<none_inclusive>\s*\?)?\s*
2105 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2106 '''.format('|'.join(map(re.escape, OPERATORS.keys()))))
2107 m = operator_rex.fullmatch(filter_spec)
2108 if m:
2109 try:
2110 comparison_value = int(m.group('value'))
2111 except ValueError:
2112 comparison_value = parse_filesize(m.group('value'))
2113 if comparison_value is None:
2114 comparison_value = parse_filesize(m.group('value') + 'B')
2115 if comparison_value is None:
2116 raise ValueError(
2117 'Invalid value {!r} in format specification {!r}'.format(
2118 m.group('value'), filter_spec))
2119 op = OPERATORS[m.group('op')]
2121 if not m:
2122 STR_OPERATORS = {
2123 '=': operator.eq,
2124 '^=': lambda attr, value: attr.startswith(value),
2125 '$=': lambda attr, value: attr.endswith(value),
2126 '*=': lambda attr, value: value in attr,
2127 '~=': lambda attr, value: value.search(attr) is not None,
2129 str_operator_rex = re.compile(r'''(?x)\s*
2130 (?P<key>[a-zA-Z0-9._-]+)\s*
2131 (?P<negation>!\s*)?(?P<op>{})\s*(?P<none_inclusive>\?\s*)?
2132 (?P<quote>["'])?
2133 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2134 (?(quote)(?P=quote))\s*
2135 '''.format('|'.join(map(re.escape, STR_OPERATORS.keys()))))
2136 m = str_operator_rex.fullmatch(filter_spec)
2137 if m:
2138 if m.group('op') == '~=':
2139 comparison_value = re.compile(m.group('value'))
2140 else:
2141 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2142 str_op = STR_OPERATORS[m.group('op')]
2143 if m.group('negation'):
2144 op = lambda attr, value: not str_op(attr, value)
2145 else:
2146 op = str_op
2148 if not m:
2149 raise SyntaxError(f'Invalid filter specification {filter_spec!r}')
2151 def _filter(f):
2152 actual_value = f.get(m.group('key'))
2153 if actual_value is None:
2154 return m.group('none_inclusive')
2155 return op(actual_value, comparison_value)
2156 return _filter
2158 def _check_formats(self, formats):
2159 for f in formats:
2160 working = f.get('__working')
2161 if working is not None:
2162 if working:
2163 yield f
2164 continue
2165 self.to_screen('[info] Testing format {}'.format(f['format_id']))
2166 path = self.get_output_path('temp')
2167 if not self._ensure_dir_exists(f'{path}/'):
2168 continue
2169 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2170 temp_file.close()
2171 try:
2172 success, _ = self.dl(temp_file.name, f, test=True)
2173 except (DownloadError, OSError, ValueError, *network_exceptions):
2174 success = False
2175 finally:
2176 if os.path.exists(temp_file.name):
2177 try:
2178 os.remove(temp_file.name)
2179 except OSError:
2180 self.report_warning(f'Unable to delete temporary file "{temp_file.name}"')
2181 f['__working'] = success
2182 if success:
2183 yield f
2184 else:
2185 self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id']))
2187 def _select_formats(self, formats, selector):
2188 return list(selector({
2189 'formats': formats,
2190 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2191 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
2192 or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
2195 def _default_format_spec(self, info_dict):
2196 prefer_best = (
2197 self.params['outtmpl']['default'] == '-'
2198 or info_dict.get('is_live') and not self.params.get('live_from_start'))
2200 def can_merge():
2201 merger = FFmpegMergerPP(self)
2202 return merger.available and merger.can_merge()
2204 if not prefer_best and not can_merge():
2205 prefer_best = True
2206 formats = self._get_formats(info_dict)
2207 evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
2208 if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
2209 self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
2210 'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')
2212 compat = (self.params.get('allow_multiple_audio_streams')
2213 or 'format-spec' in self.params['compat_opts'])
2215 return ('best/bestvideo+bestaudio' if prefer_best
2216 else 'bestvideo+bestaudio/best' if compat
2217 else 'bestvideo*+bestaudio/best')
2219 def build_format_selector(self, format_spec):
2220 def syntax_error(note, start):
2221 message = (
2222 'Invalid format specification: '
2223 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2224 return SyntaxError(message)
2226 PICKFIRST = 'PICKFIRST'
2227 MERGE = 'MERGE'
2228 SINGLE = 'SINGLE'
2229 GROUP = 'GROUP'
2230 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2232 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2233 'video': self.params.get('allow_multiple_video_streams', False)}
2235 def _parse_filter(tokens):
2236 filter_parts = []
2237 for type_, string_, _start, _, _ in tokens:
2238 if type_ == tokenize.OP and string_ == ']':
2239 return ''.join(filter_parts)
2240 else:
2241 filter_parts.append(string_)
2243 def _remove_unused_ops(tokens):
2244 # Remove operators that we don't use and join them with the surrounding strings.
2245 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2246 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2247 last_string, last_start, last_end, last_line = None, None, None, None
2248 for type_, string_, start, end, line in tokens:
2249 if type_ == tokenize.OP and string_ == '[':
2250 if last_string:
2251 yield tokenize.NAME, last_string, last_start, last_end, last_line
2252 last_string = None
2253 yield type_, string_, start, end, line
2254 # everything inside brackets will be handled by _parse_filter
2255 for type_, string_, start, end, line in tokens:
2256 yield type_, string_, start, end, line
2257 if type_ == tokenize.OP and string_ == ']':
2258 break
2259 elif type_ == tokenize.OP and string_ in ALLOWED_OPS:
2260 if last_string:
2261 yield tokenize.NAME, last_string, last_start, last_end, last_line
2262 last_string = None
2263 yield type_, string_, start, end, line
2264 elif type_ in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2265 if not last_string:
2266 last_string = string_
2267 last_start = start
2268 last_end = end
2269 else:
2270 last_string += string_
2271 if last_string:
2272 yield tokenize.NAME, last_string, last_start, last_end, last_line
2274 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2275 selectors = []
2276 current_selector = None
2277 for type_, string_, start, _, _ in tokens:
2278 # ENCODING is only defined in Python 3.x
2279 if type_ == getattr(tokenize, 'ENCODING', None):
2280 continue
2281 elif type_ in [tokenize.NAME, tokenize.NUMBER]:
2282 current_selector = FormatSelector(SINGLE, string_, [])
2283 elif type_ == tokenize.OP:
2284 if string_ == ')':
2285 if not inside_group:
2286 # ')' will be handled by the parentheses group
2287 tokens.restore_last_token()
2288 break
2289 elif inside_merge and string_ in ['/', ',']:
2290 tokens.restore_last_token()
2291 break
2292 elif inside_choice and string_ == ',':
2293 tokens.restore_last_token()
2294 break
2295 elif string_ == ',':
2296 if not current_selector:
2297 raise syntax_error('"," must follow a format selector', start)
2298 selectors.append(current_selector)
2299 current_selector = None
2300 elif string_ == '/':
2301 if not current_selector:
2302 raise syntax_error('"/" must follow a format selector', start)
2303 first_choice = current_selector
2304 second_choice = _parse_format_selection(tokens, inside_choice=True)
2305 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2306 elif string_ == '[':
2307 if not current_selector:
2308 current_selector = FormatSelector(SINGLE, 'best', [])
2309 format_filter = _parse_filter(tokens)
2310 current_selector.filters.append(format_filter)
2311 elif string_ == '(':
2312 if current_selector:
2313 raise syntax_error('Unexpected "("', start)
2314 group = _parse_format_selection(tokens, inside_group=True)
2315 current_selector = FormatSelector(GROUP, group, [])
2316 elif string_ == '+':
2317 if not current_selector:
2318 raise syntax_error('Unexpected "+"', start)
2319 selector_1 = current_selector
2320 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2321 if not selector_2:
2322 raise syntax_error('Expected a selector', start)
2323 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2324 else:
2325 raise syntax_error(f'Operator not recognized: "{string_}"', start)
2326 elif type_ == tokenize.ENDMARKER:
2327 break
2328 if current_selector:
2329 selectors.append(current_selector)
2330 return selectors
2332 def _merge(formats_pair):
2333 format_1, format_2 = formats_pair
2335 formats_info = []
2336 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2337 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2339 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2340 get_no_more = {'video': False, 'audio': False}
2341 for (i, fmt_info) in enumerate(formats_info):
2342 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2343 formats_info.pop(i)
2344 continue
2345 for aud_vid in ['audio', 'video']:
2346 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2347 if get_no_more[aud_vid]:
2348 formats_info.pop(i)
2349 break
2350 get_no_more[aud_vid] = True
2352 if len(formats_info) == 1:
2353 return formats_info[0]
2355 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2356 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2358 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2359 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2361 output_ext = get_compatible_ext(
2362 vcodecs=[f.get('vcodec') for f in video_fmts],
2363 acodecs=[f.get('acodec') for f in audio_fmts],
2364 vexts=[f['ext'] for f in video_fmts],
2365 aexts=[f['ext'] for f in audio_fmts],
2366 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2367 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2369 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2371 new_dict = {
2372 'requested_formats': formats_info,
2373 'format': '+'.join(filtered('format')),
2374 'format_id': '+'.join(filtered('format_id')),
2375 'ext': output_ext,
2376 'protocol': '+'.join(map(determine_protocol, formats_info)),
2377 'language': '+'.join(orderedSet(filtered('language'))) or None,
2378 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2379 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2380 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2383 if the_only_video:
2384 new_dict.update({
2385 'width': the_only_video.get('width'),
2386 'height': the_only_video.get('height'),
2387 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2388 'fps': the_only_video.get('fps'),
2389 'dynamic_range': the_only_video.get('dynamic_range'),
2390 'vcodec': the_only_video.get('vcodec'),
2391 'vbr': the_only_video.get('vbr'),
2392 'stretched_ratio': the_only_video.get('stretched_ratio'),
2393 'aspect_ratio': the_only_video.get('aspect_ratio'),
2396 if the_only_audio:
2397 new_dict.update({
2398 'acodec': the_only_audio.get('acodec'),
2399 'abr': the_only_audio.get('abr'),
2400 'asr': the_only_audio.get('asr'),
2401 'audio_channels': the_only_audio.get('audio_channels'),
2404 return new_dict
2406 def _check_formats(formats):
2407 if self.params.get('check_formats') == 'selected':
2408 yield from self._check_formats(formats)
2409 return
2410 elif (self.params.get('check_formats') is not None
2411 or self.params.get('allow_unplayable_formats')):
2412 yield from formats
2413 return
2415 for f in formats:
2416 if f.get('has_drm') or f.get('__needs_testing'):
2417 yield from self._check_formats([f])
2418 else:
2419 yield f
2421 def _build_selector_function(selector):
2422 if isinstance(selector, list): # ,
2423 fs = [_build_selector_function(s) for s in selector]
2425 def selector_function(ctx):
2426 for f in fs:
2427 yield from f(ctx)
2428 return selector_function
2430 elif selector.type == GROUP: # ()
2431 selector_function = _build_selector_function(selector.selector)
2433 elif selector.type == PICKFIRST: # /
2434 fs = [_build_selector_function(s) for s in selector.selector]
2436 def selector_function(ctx):
2437 for f in fs:
2438 picked_formats = list(f(ctx))
2439 if picked_formats:
2440 return picked_formats
2441 return []
2443 elif selector.type == MERGE: # +
2444 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2446 def selector_function(ctx):
2447 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2448 yield _merge(pair)
2450 elif selector.type == SINGLE: # atom
2451 format_spec = selector.selector or 'best'
2453 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2454 if format_spec == 'all':
2455 def selector_function(ctx):
2456 yield from _check_formats(ctx['formats'][::-1])
2457 elif format_spec == 'mergeall':
2458 def selector_function(ctx):
2459 formats = list(_check_formats(
2460 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2461 if not formats:
2462 return
2463 merged_format = formats[-1]
2464 for f in formats[-2::-1]:
2465 merged_format = _merge((merged_format, f))
2466 yield merged_format
2468 else:
2469 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2470 mobj = re.match(
2471 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2472 format_spec)
2473 if mobj is not None:
2474 format_idx = int_or_none(mobj.group('n'), default=1)
2475 format_reverse = mobj.group('bw')[0] == 'b'
2476 format_type = (mobj.group('type') or [None])[0]
2477 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2478 format_modified = mobj.group('mod') is not None
2480 format_fallback = not format_type and not format_modified # for b, w
2481 _filter_f = (
2482 (lambda f: f.get(f'{format_type}codec') != 'none')
2483 if format_type and format_modified # bv*, ba*, wv*, wa*
2484 else (lambda f: f.get(f'{not_format_type}codec') == 'none')
2485 if format_type # bv, ba, wv, wa
2486 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2487 if not format_modified # b, w
2488 else lambda f: True) # b*, w*
2489 filter_f = lambda f: _filter_f(f) and (
2490 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2491 else:
2492 if format_spec in self._format_selection_exts['audio']:
2493 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2494 elif format_spec in self._format_selection_exts['video']:
2495 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2496 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2497 elif format_spec in self._format_selection_exts['storyboards']:
2498 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2499 else:
2500 filter_f = lambda f: f.get('format_id') == format_spec # id
2502 def selector_function(ctx):
2503 formats = list(ctx['formats'])
2504 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2505 if not matches:
2506 if format_fallback and ctx['incomplete_formats']:
2507 # for extractors with incomplete formats (audio only (soundcloud)
2508 # or video only (imgur)) best/worst will fallback to
2509 # best/worst {video,audio}-only format
2510 matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))
2511 elif seperate_fallback and not ctx['has_merged_format']:
2512 # for compatibility with youtube-dl when there is no pre-merged format
2513 matches = list(filter(seperate_fallback, formats))
2514 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2515 try:
2516 yield matches[format_idx - 1]
2517 except LazyList.IndexError:
2518 return
2520 filters = [self._build_format_filter(f) for f in selector.filters]
2522 def final_selector(ctx):
2523 ctx_copy = dict(ctx)
2524 for _filter in filters:
2525 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2526 return selector_function(ctx_copy)
2527 return final_selector
2529 # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
2530 # Prefix numbers with random letters to avoid it being classified as a number
2531 # See: https://github.com/yt-dlp/yt-dlp/pulls/8797
2532 # TODO: Implement parser not reliant on tokenize.tokenize
2533 prefix = ''.join(random.choices(string.ascii_letters, k=32))
2534 stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
2535 try:
2536 tokens = list(_remove_unused_ops(
2537 token._replace(string=token.string.replace(prefix, ''))
2538 for token in tokenize.tokenize(stream.readline)))
2539 except tokenize.TokenError:
2540 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2542 class TokenIterator:
2543 def __init__(self, tokens):
2544 self.tokens = tokens
2545 self.counter = 0
2547 def __iter__(self):
2548 return self
2550 def __next__(self):
2551 if self.counter >= len(self.tokens):
2552 raise StopIteration
2553 value = self.tokens[self.counter]
2554 self.counter += 1
2555 return value
2557 next = __next__
2559 def restore_last_token(self):
2560 self.counter -= 1
2562 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2563 return _build_selector_function(parsed_selector)
2565 def _calc_headers(self, info_dict, load_cookies=False):
2566 res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
2567 clean_headers(res)
2569 if load_cookies: # For --load-info-json
2570 self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat
2571 self._load_cookies(info_dict.get('cookies'), autoscope=False)
2572 # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2573 # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2574 res.pop('Cookie', None)
2575 cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2576 if cookies:
2577 encoder = LenientSimpleCookie()
2578 values = []
2579 for cookie in cookies:
2580 _, value = encoder.value_encode(cookie.value)
2581 values.append(f'{cookie.name}={value}')
2582 if cookie.domain:
2583 values.append(f'Domain={cookie.domain}')
2584 if cookie.path:
2585 values.append(f'Path={cookie.path}')
2586 if cookie.secure:
2587 values.append('Secure')
2588 if cookie.expires:
2589 values.append(f'Expires={cookie.expires}')
2590 if cookie.version:
2591 values.append(f'Version={cookie.version}')
2592 info_dict['cookies'] = '; '.join(values)
2594 if 'X-Forwarded-For' not in res:
2595 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2596 if x_forwarded_for_ip:
2597 res['X-Forwarded-For'] = x_forwarded_for_ip
2599 return res
2601 def _calc_cookies(self, url):
2602 self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2603 return self.cookiejar.get_cookie_header(url)
2605 def _sort_thumbnails(self, thumbnails):
2606 thumbnails.sort(key=lambda t: (
2607 t.get('preference') if t.get('preference') is not None else -1,
2608 t.get('width') if t.get('width') is not None else -1,
2609 t.get('height') if t.get('height') is not None else -1,
2610 t.get('id') if t.get('id') is not None else '',
2611 t.get('url')))
2613 def _sanitize_thumbnails(self, info_dict):
2614 thumbnails = info_dict.get('thumbnails')
2615 if thumbnails is None:
2616 thumbnail = info_dict.get('thumbnail')
2617 if thumbnail:
2618 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2619 if not thumbnails:
2620 return
2622 def check_thumbnails(thumbnails):
2623 for t in thumbnails:
2624 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2625 try:
2626 self.urlopen(HEADRequest(t['url']))
2627 except network_exceptions as err:
2628 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2629 continue
2630 yield t
2632 self._sort_thumbnails(thumbnails)
2633 for i, t in enumerate(thumbnails):
2634 if t.get('id') is None:
2635 t['id'] = str(i)
2636 if t.get('width') and t.get('height'):
2637 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2638 t['url'] = sanitize_url(t['url'])
2640 if self.params.get('check_formats') is True:
2641 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2642 else:
2643 info_dict['thumbnails'] = thumbnails
2645 def _fill_common_fields(self, info_dict, final=True):
2646 # TODO: move sanitization here
2647 if final:
2648 title = info_dict['fulltitle'] = info_dict.get('title')
2649 if not title:
2650 if title == '':
2651 self.write_debug('Extractor gave empty title. Creating a generic title')
2652 else:
2653 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2654 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2656 if info_dict.get('duration') is not None:
2657 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2659 for ts_key, date_key in (
2660 ('timestamp', 'upload_date'),
2661 ('release_timestamp', 'release_date'),
2662 ('modified_timestamp', 'modified_date'),
2664 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2665 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2666 # see http://bugs.python.org/issue1646728)
2667 with contextlib.suppress(ValueError, OverflowError, OSError):
2668 upload_date = dt.datetime.fromtimestamp(info_dict[ts_key], dt.timezone.utc)
2669 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2671 if not info_dict.get('release_year'):
2672 info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))
2674 live_keys = ('is_live', 'was_live')
2675 live_status = info_dict.get('live_status')
2676 if live_status is None:
2677 for key in live_keys:
2678 if info_dict.get(key) is False:
2679 continue
2680 if info_dict.get(key):
2681 live_status = key
2682 break
2683 if all(info_dict.get(key) is False for key in live_keys):
2684 live_status = 'not_live'
2685 if live_status:
2686 info_dict['live_status'] = live_status
2687 for key in live_keys:
2688 if info_dict.get(key) is None:
2689 info_dict[key] = (live_status == key)
2690 if live_status == 'post_live':
2691 info_dict['was_live'] = True
2693 # Auto generate title fields corresponding to the *_number fields when missing
2694 # in order to always have clean titles. This is very common for TV series.
2695 for field in ('chapter', 'season', 'episode'):
2696 if final and info_dict.get(f'{field}_number') is not None and not info_dict.get(field):
2697 info_dict[field] = '%s %d' % (field.capitalize(), info_dict[f'{field}_number'])
2699 for old_key, new_key in self._deprecated_multivalue_fields.items():
2700 if new_key in info_dict and old_key in info_dict:
2701 if '_version' not in info_dict: # HACK: Do not warn when using --load-info-json
2702 self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
2703 elif old_value := info_dict.get(old_key):
2704 info_dict[new_key] = old_value.split(', ')
2705 elif new_value := info_dict.get(new_key):
2706 info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
2708 def _raise_pending_errors(self, info):
2709 err = info.pop('__pending_error', None)
2710 if err:
2711 self.report_error(err, tb=False)
2713 def sort_formats(self, info_dict):
2714 formats = self._get_formats(info_dict)
2715 formats.sort(key=FormatSorter(
2716 self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2718 def process_video_result(self, info_dict, download=True):
2719 assert info_dict.get('_type', 'video') == 'video'
2720 self._num_videos += 1
2722 if 'id' not in info_dict:
2723 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2724 elif not info_dict.get('id'):
2725 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2727 def report_force_conversion(field, field_not, conversion):
2728 self.report_warning(
2729 f'"{field}" field is not {field_not} - forcing {conversion} conversion, '
2730 'there is an error in extractor')
2732 def sanitize_string_field(info, string_field):
2733 field = info.get(string_field)
2734 if field is None or isinstance(field, str):
2735 return
2736 report_force_conversion(string_field, 'a string', 'string')
2737 info[string_field] = str(field)
2739 def sanitize_numeric_fields(info):
2740 for numeric_field in self._NUMERIC_FIELDS:
2741 field = info.get(numeric_field)
2742 if field is None or isinstance(field, (int, float)):
2743 continue
2744 report_force_conversion(numeric_field, 'numeric', 'int')
2745 info[numeric_field] = int_or_none(field)
2747 sanitize_string_field(info_dict, 'id')
2748 sanitize_numeric_fields(info_dict)
2749 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2750 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2751 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2752 self.report_warning('"duration" field is negative, there is an error in extractor')
2754 chapters = info_dict.get('chapters') or []
2755 if chapters and chapters[0].get('start_time'):
2756 chapters.insert(0, {'start_time': 0})
2758 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2759 for idx, (prev, current, next_) in enumerate(zip(
2760 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2761 if current.get('start_time') is None:
2762 current['start_time'] = prev.get('end_time')
2763 if not current.get('end_time'):
2764 current['end_time'] = next_.get('start_time')
2765 if not current.get('title'):
2766 current['title'] = f'<Untitled Chapter {idx}>'
2768 if 'playlist' not in info_dict:
2769 # It isn't part of a playlist
2770 info_dict['playlist'] = None
2771 info_dict['playlist_index'] = None
2773 self._sanitize_thumbnails(info_dict)
2775 thumbnail = info_dict.get('thumbnail')
2776 thumbnails = info_dict.get('thumbnails')
2777 if thumbnail:
2778 info_dict['thumbnail'] = sanitize_url(thumbnail)
2779 elif thumbnails:
2780 info_dict['thumbnail'] = thumbnails[-1]['url']
2782 if info_dict.get('display_id') is None and 'id' in info_dict:
2783 info_dict['display_id'] = info_dict['id']
2785 self._fill_common_fields(info_dict)
2787 for cc_kind in ('subtitles', 'automatic_captions'):
2788 cc = info_dict.get(cc_kind)
2789 if cc:
2790 for _, subtitle in cc.items():
2791 for subtitle_format in subtitle:
2792 if subtitle_format.get('url'):
2793 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2794 if subtitle_format.get('ext') is None:
2795 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2797 automatic_captions = info_dict.get('automatic_captions')
2798 subtitles = info_dict.get('subtitles')
2800 info_dict['requested_subtitles'] = self.process_subtitles(
2801 info_dict['id'], subtitles, automatic_captions)
2803 formats = self._get_formats(info_dict)
2805 # Backward compatibility with InfoExtractor._sort_formats
2806 field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2807 if field_preference:
2808 info_dict['_format_sort_fields'] = field_preference
2810 info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
2811 f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2812 if not self.params.get('allow_unplayable_formats'):
2813 formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2815 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2816 self.report_warning(
2817 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2818 'only images are available for download. Use --list-formats to see them'.capitalize())
2820 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2821 if not get_from_start:
2822 info_dict['title'] += ' ' + dt.datetime.now().strftime('%Y-%m-%d %H:%M')
2823 if info_dict.get('is_live') and formats:
2824 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2825 if get_from_start and not formats:
2826 self.raise_no_formats(info_dict, msg=(
2827 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2828 'If you want to download from the current time, use --no-live-from-start'))
2830 def is_wellformed(f):
2831 url = f.get('url')
2832 if not url:
2833 self.report_warning(
2834 '"url" field is missing or empty - skipping format, '
2835 'there is an error in extractor')
2836 return False
2837 if isinstance(url, bytes):
2838 sanitize_string_field(f, 'url')
2839 return True
2841 # Filter out malformed formats for better extraction robustness
2842 formats = list(filter(is_wellformed, formats or []))
2844 if not formats:
2845 self.raise_no_formats(info_dict)
2847 for fmt in formats:
2848 sanitize_string_field(fmt, 'format_id')
2849 sanitize_numeric_fields(fmt)
2850 fmt['url'] = sanitize_url(fmt['url'])
2851 FormatSorter._fill_sorting_fields(fmt)
2852 if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
2853 if fmt.get('acodec') is None:
2854 fmt['acodec'] = fmt['ext']
2855 if fmt.get('resolution') is None:
2856 fmt['resolution'] = self.format_resolution(fmt, default=None)
2857 if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none':
2858 fmt['dynamic_range'] = 'SDR'
2859 if fmt.get('aspect_ratio') is None:
2860 fmt['aspect_ratio'] = try_call(lambda: round(fmt['width'] / fmt['height'], 2))
2861 # For fragmented formats, "tbr" is often max bitrate and not average
2862 if (('manifest-filesize-approx' in self.params['compat_opts'] or not fmt.get('manifest_url'))
2863 and not fmt.get('filesize') and not fmt.get('filesize_approx')):
2864 fmt['filesize_approx'] = filesize_from_tbr(fmt.get('tbr'), info_dict.get('duration'))
2865 fmt['http_headers'] = self._calc_headers(collections.ChainMap(fmt, info_dict), load_cookies=True)
2867 # Safeguard against old/insecure infojson when using --load-info-json
2868 if info_dict.get('http_headers'):
2869 info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
2870 info_dict['http_headers'].pop('Cookie', None)
2872 # This is copied to http_headers by the above _calc_headers and can now be removed
2873 if '__x_forwarded_for_ip' in info_dict:
2874 del info_dict['__x_forwarded_for_ip']
2876 self.sort_formats({
2877 'formats': formats,
2878 '_format_sort_fields': info_dict.get('_format_sort_fields'),
2881 # Sanitize and group by format_id
2882 formats_dict = {}
2883 for i, fmt in enumerate(formats):
2884 if not fmt.get('format_id'):
2885 fmt['format_id'] = str(i)
2886 else:
2887 # Sanitize format_id from characters used in format selector expression
2888 fmt['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', fmt['format_id'])
2889 formats_dict.setdefault(fmt['format_id'], []).append(fmt)
2891 # Make sure all formats have unique format_id
2892 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2893 for format_id, ambiguous_formats in formats_dict.items():
2894 ambigious_id = len(ambiguous_formats) > 1
2895 for i, fmt in enumerate(ambiguous_formats):
2896 if ambigious_id:
2897 fmt['format_id'] = f'{format_id}-{i}'
2898 # Ensure there is no conflict between id and ext in format selection
2899 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2900 if fmt['format_id'] != fmt['ext'] and fmt['format_id'] in common_exts:
2901 fmt['format_id'] = 'f{}'.format(fmt['format_id'])
2903 if fmt.get('format') is None:
2904 fmt['format'] = '{id} - {res}{note}'.format(
2905 id=fmt['format_id'],
2906 res=self.format_resolution(fmt),
2907 note=format_field(fmt, 'format_note', ' (%s)'),
2910 if self.params.get('check_formats') is True:
2911 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2913 if not formats or formats[0] is not info_dict:
2914 # only set the 'formats' fields if the original info_dict list them
2915 # otherwise we end up with a circular reference, the first (and unique)
2916 # element in the 'formats' field in info_dict is info_dict itself,
2917 # which can't be exported to json
2918 info_dict['formats'] = formats
2920 info_dict, _ = self.pre_process(info_dict)
2922 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2923 return info_dict
2925 self.post_extract(info_dict)
2926 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2928 # The pre-processors may have modified the formats
2929 formats = self._get_formats(info_dict)
2931 list_only = self.params.get('simulate') == 'list_only'
2932 interactive_format_selection = not list_only and self.format_selector == '-'
2933 if self.params.get('list_thumbnails'):
2934 self.list_thumbnails(info_dict)
2935 if self.params.get('listsubtitles'):
2936 if 'automatic_captions' in info_dict:
2937 self.list_subtitles(
2938 info_dict['id'], automatic_captions, 'automatic captions')
2939 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2940 if self.params.get('listformats') or interactive_format_selection:
2941 self.list_formats(info_dict)
2942 if list_only:
2943 # Without this printing, -F --print-json will not work
2944 self.__forced_printings(info_dict)
2945 return info_dict
2947 format_selector = self.format_selector
2948 while True:
2949 if interactive_format_selection:
2950 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2951 + '(Press ENTER for default, or Ctrl+C to quit)'
2952 + self._format_screen(': ', self.Styles.EMPHASIS))
2953 try:
2954 format_selector = self.build_format_selector(req_format) if req_format else None
2955 except SyntaxError as err:
2956 self.report_error(err, tb=False, is_error=False)
2957 continue
2959 if format_selector is None:
2960 req_format = self._default_format_spec(info_dict)
2961 self.write_debug(f'Default format spec: {req_format}')
2962 format_selector = self.build_format_selector(req_format)
2964 formats_to_download = self._select_formats(formats, format_selector)
2965 if interactive_format_selection and not formats_to_download:
2966 self.report_error('Requested format is not available', tb=False, is_error=False)
2967 continue
2968 break
2970 if not formats_to_download:
2971 if not self.params.get('ignore_no_formats_error'):
2972 raise ExtractorError(
2973 'Requested format is not available. Use --list-formats for a list of available formats',
2974 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2975 self.report_warning('Requested format is not available')
2976 # Process what we can, even without any available formats.
2977 formats_to_download = [{}]
2979 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2980 best_format, downloaded_formats = formats_to_download[-1], []
2981 if download:
2982 if best_format and requested_ranges:
2983 def to_screen(*msg):
2984 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2986 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2987 (f['format_id'] for f in formats_to_download))
2988 if requested_ranges != ({}, ):
2989 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2990 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2991 max_downloads_reached = False
2993 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2994 new_info = self._copy_infodict(info_dict)
2995 new_info.update(fmt)
2996 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2997 end_time = offset + min(chapter.get('end_time', duration), duration)
2998 # duration may not be accurate. So allow deviations <1sec
2999 if end_time == float('inf') or end_time > offset + duration + 1:
3000 end_time = None
3001 if chapter or offset:
3002 new_info.update({
3003 'section_start': offset + chapter.get('start_time', 0),
3004 'section_end': end_time,
3005 'section_title': chapter.get('title'),
3006 'section_number': chapter.get('index'),
3008 downloaded_formats.append(new_info)
3009 try:
3010 self.process_info(new_info)
3011 except MaxDownloadsReached:
3012 max_downloads_reached = True
3013 self._raise_pending_errors(new_info)
3014 # Remove copied info
3015 for key, val in tuple(new_info.items()):
3016 if info_dict.get(key) == val:
3017 new_info.pop(key)
3018 if max_downloads_reached:
3019 break
3021 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
3022 assert write_archive.issubset({True, False, 'ignore'})
3023 if True in write_archive and False not in write_archive:
3024 self.record_download_archive(info_dict)
3026 info_dict['requested_downloads'] = downloaded_formats
3027 info_dict = self.run_all_pps('after_video', info_dict)
3028 if max_downloads_reached:
3029 raise MaxDownloadsReached
3031 # We update the info dict with the selected best quality format (backwards compatibility)
3032 info_dict.update(best_format)
3033 return info_dict
3035 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
3036 """Select the requested subtitles and their format"""
3037 available_subs, normal_sub_langs = {}, []
3038 if normal_subtitles and self.params.get('writesubtitles'):
3039 available_subs.update(normal_subtitles)
3040 normal_sub_langs = tuple(normal_subtitles.keys())
3041 if automatic_captions and self.params.get('writeautomaticsub'):
3042 for lang, cap_info in automatic_captions.items():
3043 if lang not in available_subs:
3044 available_subs[lang] = cap_info
3046 if not available_subs or (
3047 not self.params.get('writesubtitles')
3048 and not self.params.get('writeautomaticsub')):
3049 return None
3051 all_sub_langs = tuple(available_subs.keys())
3052 if self.params.get('allsubtitles', False):
3053 requested_langs = all_sub_langs
3054 elif self.params.get('subtitleslangs', False):
3055 try:
3056 requested_langs = orderedSet_from_options(
3057 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
3058 except re.error as e:
3059 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
3060 else:
3061 requested_langs = LazyList(itertools.chain(
3062 ['en'] if 'en' in normal_sub_langs else [],
3063 filter(lambda f: f.startswith('en'), normal_sub_langs),
3064 ['en'] if 'en' in all_sub_langs else [],
3065 filter(lambda f: f.startswith('en'), all_sub_langs),
3066 normal_sub_langs, all_sub_langs,
3067 ))[:1]
3068 if requested_langs:
3069 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
3071 formats_query = self.params.get('subtitlesformat', 'best')
3072 formats_preference = formats_query.split('/') if formats_query else []
3073 subs = {}
3074 for lang in requested_langs:
3075 formats = available_subs.get(lang)
3076 if formats is None:
3077 self.report_warning(f'{lang} subtitles not available for {video_id}')
3078 continue
3079 for ext in formats_preference:
3080 if ext == 'best':
3081 f = formats[-1]
3082 break
3083 matches = list(filter(lambda f: f['ext'] == ext, formats))
3084 if matches:
3085 f = matches[-1]
3086 break
3087 else:
3088 f = formats[-1]
3089 self.report_warning(
3090 'No subtitle format found matching "{}" for language {}, '
3091 'using {}. Use --list-subs for a list of available subtitles'.format(formats_query, lang, f['ext']))
3092 subs[lang] = f
3093 return subs
3095 def _forceprint(self, key, info_dict):
3096 if info_dict is None:
3097 return
3098 info_copy = info_dict.copy()
3099 info_copy.setdefault('filename', self.prepare_filename(info_dict))
3100 if info_dict.get('requested_formats') is not None:
3101 # For RTMP URLs, also include the playpath
3102 info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3103 elif info_dict.get('url'):
3104 info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
3105 info_copy['formats_table'] = self.render_formats_table(info_dict)
3106 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3107 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3108 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3110 def format_tmpl(tmpl):
3111 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
3112 if not mobj:
3113 return tmpl
3115 fmt = '%({})s'
3116 if tmpl.startswith('{'):
3117 tmpl, fmt = f'.{tmpl}', '%({})j'
3118 if tmpl.endswith('='):
3119 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3120 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3122 for tmpl in self.params['forceprint'].get(key, []):
3123 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3125 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3126 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3127 tmpl = format_tmpl(tmpl)
3128 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3129 if self._ensure_dir_exists(filename):
3130 with open(filename, 'a', encoding='utf-8', newline='') as f:
3131 f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3133 return info_copy
3135 def __forced_printings(self, info_dict, filename=None, incomplete=True):
3136 if (self.params.get('forcejson')
3137 or self.params['forceprint'].get('video')
3138 or self.params['print_to_file'].get('video')):
3139 self.post_extract(info_dict)
3140 if filename:
3141 info_dict['filename'] = filename
3142 info_copy = self._forceprint('video', info_dict)
3144 def print_field(field, actual_field=None, optional=False):
3145 if actual_field is None:
3146 actual_field = field
3147 if self.params.get(f'force{field}') and (
3148 info_copy.get(field) is not None or (not optional and not incomplete)):
3149 self.to_stdout(info_copy[actual_field])
3151 print_field('title')
3152 print_field('id')
3153 print_field('url', 'urls')
3154 print_field('thumbnail', optional=True)
3155 print_field('description', optional=True)
3156 print_field('filename')
3157 if self.params.get('forceduration') and info_copy.get('duration') is not None:
3158 self.to_stdout(formatSeconds(info_copy['duration']))
3159 print_field('format')
3161 if self.params.get('forcejson'):
3162 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3164 def dl(self, name, info, subtitle=False, test=False):
3165 if not info.get('url'):
3166 self.raise_no_formats(info, True)
3168 if test:
3169 verbose = self.params.get('verbose')
3170 quiet = self.params.get('quiet') or not verbose
3171 params = {
3172 'test': True,
3173 'quiet': quiet,
3174 'verbose': verbose,
3175 'noprogress': quiet,
3176 'nopart': True,
3177 'skip_unavailable_fragments': False,
3178 'keep_fragments': False,
3179 'overwrites': True,
3180 '_no_ytdl_file': True,
3182 else:
3183 params = self.params
3184 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3185 if not test:
3186 for ph in self._progress_hooks:
3187 fd.add_progress_hook(ph)
3188 urls = '", "'.join(
3189 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3190 for f in info.get('requested_formats', []) or [info])
3191 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3193 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3194 # But it may contain objects that are not deep-copyable
3195 new_info = self._copy_infodict(info)
3196 if new_info.get('http_headers') is None:
3197 new_info['http_headers'] = self._calc_headers(new_info)
3198 return fd.download(name, new_info, subtitle)
3200 def existing_file(self, filepaths, *, default_overwrite=True):
3201 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3202 if existing_files and not self.params.get('overwrites', default_overwrite):
3203 return existing_files[0]
3205 for file in existing_files:
3206 self.report_file_delete(file)
3207 os.remove(file)
3208 return None
3210 @_catch_unsafe_extension_error
3211 def process_info(self, info_dict):
3212 """Process a single resolved IE result. (Modifies it in-place)"""
3214 assert info_dict.get('_type', 'video') == 'video'
3215 original_infodict = info_dict
3217 if 'format' not in info_dict and 'ext' in info_dict:
3218 info_dict['format'] = info_dict['ext']
3220 if self._match_entry(info_dict) is not None:
3221 info_dict['__write_download_archive'] = 'ignore'
3222 return
3224 # Does nothing under normal operation - for backward compatibility of process_info
3225 self.post_extract(info_dict)
3227 def replace_info_dict(new_info):
3228 nonlocal info_dict
3229 if new_info == info_dict:
3230 return
3231 info_dict.clear()
3232 info_dict.update(new_info)
3234 new_info, _ = self.pre_process(info_dict, 'video')
3235 replace_info_dict(new_info)
3236 self._num_downloads += 1
3238 # info_dict['_filename'] needs to be set for backward compatibility
3239 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3240 temp_filename = self.prepare_filename(info_dict, 'temp')
3241 files_to_move = {}
3243 # Forced printings
3244 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3246 def check_max_downloads():
3247 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3248 raise MaxDownloadsReached
3250 if self.params.get('simulate'):
3251 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3252 check_max_downloads()
3253 return
3255 if full_filename is None:
3256 return
3257 if not self._ensure_dir_exists(full_filename):
3258 return
3259 if not self._ensure_dir_exists(temp_filename):
3260 return
3262 if self._write_description('video', info_dict,
3263 self.prepare_filename(info_dict, 'description')) is None:
3264 return
3266 sub_files = self._write_subtitles(info_dict, temp_filename)
3267 if sub_files is None:
3268 return
3269 files_to_move.update(dict(sub_files))
3271 thumb_files = self._write_thumbnails(
3272 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3273 if thumb_files is None:
3274 return
3275 files_to_move.update(dict(thumb_files))
3277 infofn = self.prepare_filename(info_dict, 'infojson')
3278 _infojson_written = self._write_info_json('video', info_dict, infofn)
3279 if _infojson_written:
3280 info_dict['infojson_filename'] = infofn
3281 # For backward compatibility, even though it was a private field
3282 info_dict['__infojson_filename'] = infofn
3283 elif _infojson_written is None:
3284 return
3286 # Note: Annotations are deprecated
3287 annofn = None
3288 if self.params.get('writeannotations', False):
3289 annofn = self.prepare_filename(info_dict, 'annotation')
3290 if annofn:
3291 if not self._ensure_dir_exists(annofn):
3292 return
3293 if not self.params.get('overwrites', True) and os.path.exists(annofn):
3294 self.to_screen('[info] Video annotations are already present')
3295 elif not info_dict.get('annotations'):
3296 self.report_warning('There are no annotations to write.')
3297 else:
3298 try:
3299 self.to_screen('[info] Writing video annotations to: ' + annofn)
3300 with open(annofn, 'w', encoding='utf-8') as annofile:
3301 annofile.write(info_dict['annotations'])
3302 except (KeyError, TypeError):
3303 self.report_warning('There are no annotations to write.')
3304 except OSError:
3305 self.report_error('Cannot write annotations file: ' + annofn)
3306 return
3308 # Write internet shortcut files
3309 def _write_link_file(link_type):
3310 url = try_get(info_dict['webpage_url'], iri_to_uri)
3311 if not url:
3312 self.report_warning(
3313 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3314 return True
3315 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3316 if not self._ensure_dir_exists(linkfn):
3317 return False
3318 if self.params.get('overwrites', True) and os.path.exists(linkfn):
3319 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3320 return True
3321 try:
3322 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3323 with open(to_high_limit_path(linkfn), 'w', encoding='utf-8',
3324 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3325 template_vars = {'url': url}
3326 if link_type == 'desktop':
3327 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3328 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3329 except OSError:
3330 self.report_error(f'Cannot write internet shortcut {linkfn}')
3331 return False
3332 return True
3334 write_links = {
3335 'url': self.params.get('writeurllink'),
3336 'webloc': self.params.get('writewebloclink'),
3337 'desktop': self.params.get('writedesktoplink'),
3339 if self.params.get('writelink'):
3340 link_type = ('webloc' if sys.platform == 'darwin'
3341 else 'desktop' if sys.platform.startswith('linux')
3342 else 'url')
3343 write_links[link_type] = True
3345 if any(should_write and not _write_link_file(link_type)
3346 for link_type, should_write in write_links.items()):
3347 return
3349 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3350 replace_info_dict(new_info)
3352 if self.params.get('skip_download'):
3353 info_dict['filepath'] = temp_filename
3354 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename))
3355 info_dict['__files_to_move'] = files_to_move
3356 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3357 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3358 else:
3359 # Download
3360 info_dict.setdefault('__postprocessors', [])
3361 try:
3363 def existing_video_file(*filepaths):
3364 ext = info_dict.get('ext')
3365 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3366 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3367 default_overwrite=False)
3368 if file:
3369 info_dict['ext'] = os.path.splitext(file)[1][1:]
3370 return file
3372 fd, success = None, True
3373 if info_dict.get('protocol') or info_dict.get('url'):
3374 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3375 if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3376 info_dict.get('section_start') or info_dict.get('section_end')):
3377 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3378 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3379 self.report_error(f'{msg}. Aborting')
3380 return
3382 if info_dict.get('requested_formats') is not None:
3383 old_ext = info_dict['ext']
3384 if self.params.get('merge_output_format') is None:
3385 if (info_dict['ext'] == 'webm'
3386 and info_dict.get('thumbnails')
3387 # check with type instead of pp_key, __name__, or isinstance
3388 # since we dont want any custom PPs to trigger this
3389 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
3390 info_dict['ext'] = 'mkv'
3391 self.report_warning(
3392 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3393 new_ext = info_dict['ext']
3395 def correct_ext(filename, ext=new_ext):
3396 if filename == '-':
3397 return filename
3398 filename_real_ext = os.path.splitext(filename)[1][1:]
3399 filename_wo_ext = (
3400 os.path.splitext(filename)[0]
3401 if filename_real_ext in (old_ext, new_ext)
3402 else filename)
3403 return f'{filename_wo_ext}.{ext}'
3405 # Ensure filename always has a correct extension for successful merge
3406 full_filename = correct_ext(full_filename)
3407 temp_filename = correct_ext(temp_filename)
3408 dl_filename = existing_video_file(full_filename, temp_filename)
3410 info_dict['__real_download'] = False
3411 # NOTE: Copy so that original format dicts are not modified
3412 info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3414 merger = FFmpegMergerPP(self)
3415 downloaded = []
3416 if dl_filename is not None:
3417 self.report_file_already_downloaded(dl_filename)
3418 elif fd:
3419 for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3420 f['filepath'] = fname = prepend_extension(
3421 correct_ext(temp_filename, info_dict['ext']),
3422 'f{}'.format(f['format_id']), info_dict['ext'])
3423 downloaded.append(fname)
3424 info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3425 success, real_download = self.dl(temp_filename, info_dict)
3426 info_dict['__real_download'] = real_download
3427 else:
3428 if self.params.get('allow_unplayable_formats'):
3429 self.report_warning(
3430 'You have requested merging of multiple formats '
3431 'while also allowing unplayable formats to be downloaded. '
3432 'The formats won\'t be merged to prevent data corruption.')
3433 elif not merger.available:
3434 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3435 if not self.params.get('ignoreerrors'):
3436 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3437 return
3438 self.report_warning(f'{msg}. The formats won\'t be merged')
3440 if temp_filename == '-':
3441 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3442 else 'but the formats are incompatible for simultaneous download' if merger.available
3443 else 'but ffmpeg is not installed')
3444 self.report_warning(
3445 f'You have requested downloading multiple formats to stdout {reason}. '
3446 'The formats will be streamed one after the other')
3447 fname = temp_filename
3448 for f in info_dict['requested_formats']:
3449 new_info = dict(info_dict)
3450 del new_info['requested_formats']
3451 new_info.update(f)
3452 if temp_filename != '-':
3453 fname = prepend_extension(
3454 correct_ext(temp_filename, new_info['ext']),
3455 'f{}'.format(f['format_id']), new_info['ext'])
3456 if not self._ensure_dir_exists(fname):
3457 return
3458 f['filepath'] = fname
3459 downloaded.append(fname)
3460 partial_success, real_download = self.dl(fname, new_info)
3461 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3462 success = success and partial_success
3464 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3465 info_dict['__postprocessors'].append(merger)
3466 info_dict['__files_to_merge'] = downloaded
3467 # Even if there were no downloads, it is being merged only now
3468 info_dict['__real_download'] = True
3469 else:
3470 for file in downloaded:
3471 files_to_move[file] = None
3472 else:
3473 # Just a single file
3474 dl_filename = existing_video_file(full_filename, temp_filename)
3475 if dl_filename is None or dl_filename == temp_filename:
3476 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3477 # So we should try to resume the download
3478 success, real_download = self.dl(temp_filename, info_dict)
3479 info_dict['__real_download'] = real_download
3480 else:
3481 self.report_file_already_downloaded(dl_filename)
3483 dl_filename = dl_filename or temp_filename
3484 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename))
3486 except network_exceptions as err:
3487 self.report_error(f'unable to download video data: {err}')
3488 return
3489 except OSError as err:
3490 raise UnavailableVideoError(err)
3491 except ContentTooShortError as err:
3492 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3493 return
3495 self._raise_pending_errors(info_dict)
3496 if success and full_filename != '-':
3498 def fixup():
3499 do_fixup = True
3500 fixup_policy = self.params.get('fixup')
3501 vid = info_dict['id']
3503 if fixup_policy in ('ignore', 'never'):
3504 return
3505 elif fixup_policy == 'warn':
3506 do_fixup = 'warn'
3507 elif fixup_policy != 'force':
3508 assert fixup_policy in ('detect_or_warn', None)
3509 if not info_dict.get('__real_download'):
3510 do_fixup = False
3512 def ffmpeg_fixup(cndn, msg, cls):
3513 if not (do_fixup and cndn):
3514 return
3515 elif do_fixup == 'warn':
3516 self.report_warning(f'{vid}: {msg}')
3517 return
3518 pp = cls(self)
3519 if pp.available:
3520 info_dict['__postprocessors'].append(pp)
3521 else:
3522 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3524 stretched_ratio = info_dict.get('stretched_ratio')
3525 ffmpeg_fixup(stretched_ratio not in (1, None),
3526 f'Non-uniform pixel ratio {stretched_ratio}',
3527 FFmpegFixupStretchedPP)
3529 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3530 downloader = downloader.FD_NAME if downloader else None
3532 ext = info_dict.get('ext')
3533 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3534 isinstance(pp, FFmpegVideoConvertorPP)
3535 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3536 ) for pp in self._pps['post_process'])
3538 if not postprocessed_by_ffmpeg:
3539 ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
3540 and info_dict.get('container') == 'm4a_dash',
3541 'writing DASH m4a. Only some players support this container',
3542 FFmpegFixupM4aPP)
3543 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3544 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3545 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3546 FFmpegFixupM3u8PP)
3547 ffmpeg_fixup(downloader == 'dashsegments'
3548 and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
3549 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3551 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3552 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3554 fixup()
3555 try:
3556 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3557 except PostProcessingError as err:
3558 self.report_error(f'Postprocessing: {err}')
3559 return
3560 try:
3561 for ph in self._post_hooks:
3562 ph(info_dict['filepath'])
3563 except Exception as err:
3564 self.report_error(f'post hooks: {err}')
3565 return
3566 info_dict['__write_download_archive'] = True
3568 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
3569 if self.params.get('force_write_download_archive'):
3570 info_dict['__write_download_archive'] = True
3571 check_max_downloads()
3573 def __download_wrapper(self, func):
3574 @functools.wraps(func)
3575 def wrapper(*args, **kwargs):
3576 try:
3577 res = func(*args, **kwargs)
3578 except CookieLoadError:
3579 raise
3580 except UnavailableVideoError as e:
3581 self.report_error(e)
3582 except DownloadCancelled as e:
3583 self.to_screen(f'[info] {e}')
3584 if not self.params.get('break_per_url'):
3585 raise
3586 self._num_downloads = 0
3587 else:
3588 if self.params.get('dump_single_json', False):
3589 self.post_extract(res)
3590 self.to_stdout(json.dumps(self.sanitize_info(res)))
3591 return wrapper
3593 def download(self, url_list):
3594 """Download a given list of URLs."""
3595 url_list = variadic(url_list) # Passing a single URL is a common mistake
3596 outtmpl = self.params['outtmpl']['default']
3597 if (len(url_list) > 1
3598 and outtmpl != '-'
3599 and '%' not in outtmpl
3600 and self.params.get('max_downloads') != 1):
3601 raise SameFileError(outtmpl)
3603 for url in url_list:
3604 self.__download_wrapper(self.extract_info)(
3605 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3607 return self._download_retcode
3609 def download_with_info_file(self, info_filename):
3610 with contextlib.closing(fileinput.FileInput(
3611 [info_filename], mode='r',
3612 openhook=fileinput.hook_encoded('utf-8'))) as f:
3613 # FileInput doesn't have a read method, we can't call json.load
3614 infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3615 for info in variadic(json.loads('\n'.join(f)))]
3616 for info in infos:
3617 try:
3618 self.__download_wrapper(self.process_ie_result)(info, download=True)
3619 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3620 if not isinstance(e, EntryNotInPlaylist):
3621 self.to_stderr('\r')
3622 webpage_url = info.get('webpage_url')
3623 if webpage_url is None:
3624 raise
3625 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3626 self.download([webpage_url])
3627 except ExtractorError as e:
3628 self.report_error(e)
3629 return self._download_retcode
3631 @staticmethod
3632 def sanitize_info(info_dict, remove_private_keys=False):
3633 """ Sanitize the infodict for converting to json """
3634 if info_dict is None:
3635 return info_dict
3636 info_dict.setdefault('epoch', int(time.time()))
3637 info_dict.setdefault('_type', 'video')
3638 info_dict.setdefault('_version', {
3639 'version': __version__,
3640 'current_git_head': current_git_head(),
3641 'release_git_head': RELEASE_GIT_HEAD,
3642 'repository': ORIGIN,
3645 if remove_private_keys:
3646 reject = lambda k, v: v is None or k.startswith('__') or k in {
3647 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3648 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3649 'playlist_autonumber',
3651 else:
3652 reject = lambda k, v: False
3654 def filter_fn(obj):
3655 if isinstance(obj, dict):
3656 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3657 elif isinstance(obj, (list, tuple, set, LazyList)):
3658 return list(map(filter_fn, obj))
3659 elif obj is None or isinstance(obj, (str, int, float, bool)):
3660 return obj
3661 else:
3662 return repr(obj)
3664 return filter_fn(info_dict)
3666 @staticmethod
3667 def filter_requested_info(info_dict, actually_filter=True):
3668 """ Alias of sanitize_info for backward compatibility """
3669 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3671 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3672 for filename in set(filter(None, files_to_delete)):
3673 if msg:
3674 self.to_screen(msg % filename)
3675 try:
3676 os.remove(filename)
3677 except OSError:
3678 self.report_warning(f'Unable to delete file {filename}')
3679 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3680 del info['__files_to_move'][filename]
3682 @staticmethod
3683 def post_extract(info_dict):
3684 def actual_post_extract(info_dict):
3685 if info_dict.get('_type') in ('playlist', 'multi_video'):
3686 for video_dict in info_dict.get('entries', {}):
3687 actual_post_extract(video_dict or {})
3688 return
3690 post_extractor = info_dict.pop('__post_extractor', None) or dict
3691 info_dict.update(post_extractor())
3693 actual_post_extract(info_dict or {})
3695 def run_pp(self, pp, infodict):
3696 files_to_delete = []
3697 if '__files_to_move' not in infodict:
3698 infodict['__files_to_move'] = {}
3699 try:
3700 files_to_delete, infodict = pp.run(infodict)
3701 except PostProcessingError as e:
3702 # Must be True and not 'only_download'
3703 if self.params.get('ignoreerrors') is True:
3704 self.report_error(e)
3705 return infodict
3706 raise
3708 if not files_to_delete:
3709 return infodict
3710 if self.params.get('keepvideo', False):
3711 for f in files_to_delete:
3712 infodict['__files_to_move'].setdefault(f, '')
3713 else:
3714 self._delete_downloaded_files(
3715 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3716 return infodict
3718 def run_all_pps(self, key, info, *, additional_pps=None):
3719 if key != 'video':
3720 self._forceprint(key, info)
3721 for pp in (additional_pps or []) + self._pps[key]:
3722 info = self.run_pp(pp, info)
3723 return info
3725 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3726 info = dict(ie_info)
3727 info['__files_to_move'] = files_to_move or {}
3728 try:
3729 info = self.run_all_pps(key, info)
3730 except PostProcessingError as err:
3731 msg = f'Preprocessing: {err}'
3732 info.setdefault('__pending_error', msg)
3733 self.report_error(msg, is_error=False)
3734 return info, info.pop('__files_to_move', None)
3736 def post_process(self, filename, info, files_to_move=None):
3737 """Run all the postprocessors on the given file."""
3738 info['filepath'] = filename
3739 info['__files_to_move'] = files_to_move or {}
3740 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3741 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3742 del info['__files_to_move']
3743 return self.run_all_pps('after_move', info)
3745 def _make_archive_id(self, info_dict):
3746 video_id = info_dict.get('id')
3747 if not video_id:
3748 return
3749 # Future-proof against any change in case
3750 # and backwards compatibility with prior versions
3751 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3752 if extractor is None:
3753 url = str_or_none(info_dict.get('url'))
3754 if not url:
3755 return
3756 # Try to find matching extractor for the URL and take its ie_key
3757 for ie_key, ie in self._ies.items():
3758 if ie.suitable(url):
3759 extractor = ie_key
3760 break
3761 else:
3762 return
3763 return make_archive_id(extractor, video_id)
3765 def in_download_archive(self, info_dict):
3766 if not self.archive:
3767 return False
3769 vid_ids = [self._make_archive_id(info_dict)]
3770 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3771 return any(id_ in self.archive for id_ in vid_ids)
3773 def record_download_archive(self, info_dict):
3774 fn = self.params.get('download_archive')
3775 if fn is None:
3776 return
3777 vid_id = self._make_archive_id(info_dict)
3778 assert vid_id
3780 self.write_debug(f'Adding to archive: {vid_id}')
3781 if is_path_like(fn):
3782 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3783 archive_file.write(vid_id + '\n')
3784 self.archive.add(vid_id)
3786 @staticmethod
3787 def format_resolution(format, default='unknown'):
3788 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3789 return 'audio only'
3790 if format.get('resolution') is not None:
3791 return format['resolution']
3792 if format.get('width') and format.get('height'):
3793 return '%dx%d' % (format['width'], format['height'])
3794 elif format.get('height'):
3795 return '{}p'.format(format['height'])
3796 elif format.get('width'):
3797 return '%dx?' % format['width']
3798 return default
3800 def _list_format_headers(self, *headers):
3801 if self.params.get('listformats_table', True) is not False:
3802 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3803 return headers
3805 def _format_note(self, fdict):
3806 res = ''
3807 if fdict.get('ext') in ['f4f', 'f4m']:
3808 res += '(unsupported)'
3809 if fdict.get('language'):
3810 if res:
3811 res += ' '
3812 res += '[{}]'.format(fdict['language'])
3813 if fdict.get('format_note') is not None:
3814 if res:
3815 res += ' '
3816 res += fdict['format_note']
3817 if fdict.get('tbr') is not None:
3818 if res:
3819 res += ', '
3820 res += '%4dk' % fdict['tbr']
3821 if fdict.get('container') is not None:
3822 if res:
3823 res += ', '
3824 res += '{} container'.format(fdict['container'])
3825 if (fdict.get('vcodec') is not None
3826 and fdict.get('vcodec') != 'none'):
3827 if res:
3828 res += ', '
3829 res += fdict['vcodec']
3830 if fdict.get('vbr') is not None:
3831 res += '@'
3832 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3833 res += 'video@'
3834 if fdict.get('vbr') is not None:
3835 res += '%4dk' % fdict['vbr']
3836 if fdict.get('fps') is not None:
3837 if res:
3838 res += ', '
3839 res += '{}fps'.format(fdict['fps'])
3840 if fdict.get('acodec') is not None:
3841 if res:
3842 res += ', '
3843 if fdict['acodec'] == 'none':
3844 res += 'video only'
3845 else:
3846 res += '%-5s' % fdict['acodec']
3847 elif fdict.get('abr') is not None:
3848 if res:
3849 res += ', '
3850 res += 'audio'
3851 if fdict.get('abr') is not None:
3852 res += '@%3dk' % fdict['abr']
3853 if fdict.get('asr') is not None:
3854 res += ' (%5dHz)' % fdict['asr']
3855 if fdict.get('filesize') is not None:
3856 if res:
3857 res += ', '
3858 res += format_bytes(fdict['filesize'])
3859 elif fdict.get('filesize_approx') is not None:
3860 if res:
3861 res += ', '
3862 res += '~' + format_bytes(fdict['filesize_approx'])
3863 return res
3865 def _get_formats(self, info_dict):
3866 if info_dict.get('formats') is None:
3867 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3868 return [info_dict]
3869 return []
3870 return info_dict['formats']
3872 def render_formats_table(self, info_dict):
3873 formats = self._get_formats(info_dict)
3874 if not formats:
3875 return
3876 if not self.params.get('listformats_table', True) is not False:
3877 table = [
3879 format_field(f, 'format_id'),
3880 format_field(f, 'ext'),
3881 self.format_resolution(f),
3882 self._format_note(f),
3883 ] for f in formats if (f.get('preference') or 0) >= -1000]
3884 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3886 def simplified_codec(f, field):
3887 assert field in ('acodec', 'vcodec')
3888 codec = f.get(field)
3889 if not codec:
3890 return 'unknown'
3891 elif codec != 'none':
3892 return '.'.join(codec.split('.')[:4])
3894 if field == 'vcodec' and f.get('acodec') == 'none':
3895 return 'images'
3896 elif field == 'acodec' and f.get('vcodec') == 'none':
3897 return ''
3898 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3899 self.Styles.SUPPRESS)
3901 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3902 table = [
3904 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3905 format_field(f, 'ext'),
3906 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3907 format_field(f, 'fps', '\t%d', func=round),
3908 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3909 format_field(f, 'audio_channels', '\t%s'),
3910 delim, (
3911 format_field(f, 'filesize', ' \t%s', func=format_bytes)
3912 or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3913 or format_field(filesize_from_tbr(f.get('tbr'), info_dict.get('duration')), None,
3914 self._format_out('~\t%s', self.Styles.SUPPRESS), func=format_bytes)),
3915 format_field(f, 'tbr', '\t%dk', func=round),
3916 shorten_protocol_name(f.get('protocol', '')),
3917 delim,
3918 simplified_codec(f, 'vcodec'),
3919 format_field(f, 'vbr', '\t%dk', func=round),
3920 simplified_codec(f, 'acodec'),
3921 format_field(f, 'abr', '\t%dk', func=round),
3922 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3923 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3924 self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3925 (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3926 else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3927 format_field(f, 'format_note'),
3928 format_field(f, 'container', ignore=(None, f.get('ext'))),
3929 delim=', '), delim=' '),
3930 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3931 header_line = self._list_format_headers(
3932 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3933 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3935 return render_table(
3936 header_line, table, hide_empty=True,
3937 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3939 def render_thumbnails_table(self, info_dict):
3940 thumbnails = list(info_dict.get('thumbnails') or [])
3941 if not thumbnails:
3942 return None
3943 return render_table(
3944 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3945 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3947 def render_subtitles_table(self, video_id, subtitles):
3948 def _row(lang, formats):
3949 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3950 if len(set(names)) == 1:
3951 names = [] if names[0] == 'unknown' else names[:1]
3952 return [lang, ', '.join(names), ', '.join(exts)]
3954 if not subtitles:
3955 return None
3956 return render_table(
3957 self._list_format_headers('Language', 'Name', 'Formats'),
3958 [_row(lang, formats) for lang, formats in subtitles.items()],
3959 hide_empty=True)
3961 def __list_table(self, video_id, name, func, *args):
3962 table = func(*args)
3963 if not table:
3964 self.to_screen(f'{video_id} has no {name}')
3965 return
3966 self.to_screen(f'[info] Available {name} for {video_id}:')
3967 self.to_stdout(table)
3969 def list_formats(self, info_dict):
3970 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3972 def list_thumbnails(self, info_dict):
3973 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3975 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3976 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3978 def print_debug_header(self):
3979 if not self.params.get('verbose'):
3980 return
3982 from . import _IN_CLI # Must be delayed import
3984 # These imports can be slow. So import them only as needed
3985 from .extractor.extractors import _LAZY_LOADER
3986 from .extractor.extractors import (
3987 _PLUGIN_CLASSES as plugin_ies,
3988 _PLUGIN_OVERRIDES as plugin_ie_overrides,
3991 def get_encoding(stream):
3992 ret = str(getattr(stream, 'encoding', f'missing ({type(stream).__name__})'))
3993 additional_info = []
3994 if os.environ.get('TERM', '').lower() == 'dumb':
3995 additional_info.append('dumb')
3996 if not supports_terminal_sequences(stream):
3997 from .utils import WINDOWS_VT_MODE # Must be imported locally
3998 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3999 if additional_info:
4000 ret = f'{ret} ({",".join(additional_info)})'
4001 return ret
4003 encoding_str = 'Encodings: locale {}, fs {}, pref {}, {}'.format(
4004 locale.getpreferredencoding(),
4005 sys.getfilesystemencoding(),
4006 self.get_encoding(),
4007 ', '.join(
4008 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
4009 if stream is not None and key != 'console'),
4012 logger = self.params.get('logger')
4013 if logger:
4014 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
4015 write_debug(encoding_str)
4016 else:
4017 write_string(f'[debug] {encoding_str}\n', encoding=None)
4018 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
4020 source = detect_variant()
4021 if VARIANT not in (None, 'pip'):
4022 source += '*'
4023 klass = type(self)
4024 write_debug(join_nonempty(
4025 f'{REPOSITORY.rpartition("/")[2]} version',
4026 _make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),
4027 f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
4028 '' if source == 'unknown' else f'({source})',
4029 '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
4030 delim=' '))
4032 if not _IN_CLI:
4033 write_debug(f'params: {self.params}')
4035 if not _LAZY_LOADER:
4036 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
4037 write_debug('Lazy loading extractors is forcibly disabled')
4038 else:
4039 write_debug('Lazy loading extractors is disabled')
4040 if self.params['compat_opts']:
4041 write_debug('Compatibility options: {}'.format(', '.join(self.params['compat_opts'])))
4043 if current_git_head():
4044 write_debug(f'Git HEAD: {current_git_head()}')
4045 write_debug(system_identifier())
4047 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
4048 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
4049 if ffmpeg_features:
4050 exe_versions['ffmpeg'] += ' ({})'.format(','.join(sorted(ffmpeg_features)))
4052 exe_versions['rtmpdump'] = rtmpdump_version()
4053 exe_versions['phantomjs'] = PhantomJSwrapper._version()
4054 exe_str = ', '.join(
4055 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
4056 ) or 'none'
4057 write_debug(f'exe versions: {exe_str}')
4059 from .compat.compat_utils import get_package_info
4060 from .dependencies import available_dependencies
4062 write_debug('Optional libraries: %s' % (', '.join(sorted({
4063 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
4064 })) or 'none'))
4066 write_debug(f'Proxy map: {self.proxies}')
4067 write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
4068 if os.environ.get('YTDLP_NO_PLUGINS'):
4069 write_debug('Plugins are forcibly disabled')
4070 return
4072 for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
4073 display_list = ['{}{}'.format(
4074 klass.__name__, '' if klass.__name__ == name else f' as {name}')
4075 for name, klass in plugins.items()]
4076 if plugin_type == 'Extractor':
4077 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
4078 for parent, plugins in plugin_ie_overrides.items())
4079 if not display_list:
4080 continue
4081 write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
4083 plugin_dirs = plugin_directories()
4084 if plugin_dirs:
4085 write_debug(f'Plugin directories: {plugin_dirs}')
4087 @functools.cached_property
4088 def proxies(self):
4089 """Global proxy configuration"""
4090 opts_proxy = self.params.get('proxy')
4091 if opts_proxy is not None:
4092 if opts_proxy == '':
4093 opts_proxy = '__noproxy__'
4094 proxies = {'all': opts_proxy}
4095 else:
4096 proxies = urllib.request.getproxies()
4097 # compat. Set HTTPS_PROXY to __noproxy__ to revert
4098 if 'http' in proxies and 'https' not in proxies:
4099 proxies['https'] = proxies['http']
4101 return proxies
4103 @functools.cached_property
4104 def cookiejar(self):
4105 """Global cookiejar instance"""
4106 try:
4107 return load_cookies(
4108 self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4109 except CookieLoadError as error:
4110 cause = error.__context__
4111 # compat: <=py3.9: `traceback.format_exception` has a different signature
4112 self.report_error(str(cause), tb=''.join(traceback.format_exception(None, cause, cause.__traceback__)))
4113 raise
4115 @property
4116 def _opener(self):
4118 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4120 self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
4121 handler = self._request_director.handlers['Urllib']
4122 return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4124 def _get_available_impersonate_targets(self):
4125 # TODO(future): make available as public API
4126 return [
4127 (target, rh.RH_NAME)
4128 for rh in self._request_director.handlers.values()
4129 if isinstance(rh, ImpersonateRequestHandler)
4130 for target in rh.supported_targets
4133 def _impersonate_target_available(self, target):
4134 # TODO(future): make available as public API
4135 return any(
4136 rh.is_supported_target(target)
4137 for rh in self._request_director.handlers.values()
4138 if isinstance(rh, ImpersonateRequestHandler))
4140 def urlopen(self, req):
4141 """ Start an HTTP download """
4142 if isinstance(req, str):
4143 req = Request(req)
4144 elif isinstance(req, urllib.request.Request):
4145 self.deprecation_warning(
4146 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4147 'Use yt_dlp.networking.common.Request instead.')
4148 req = urllib_req_to_req(req)
4149 assert isinstance(req, Request)
4151 # compat: Assume user:pass url params are basic auth
4152 url, basic_auth_header = extract_basic_auth(req.url)
4153 if basic_auth_header:
4154 req.headers['Authorization'] = basic_auth_header
4155 req.url = sanitize_url(url)
4157 clean_proxies(proxies=req.proxies, headers=req.headers)
4158 clean_headers(req.headers)
4160 try:
4161 return self._request_director.send(req)
4162 except NoSupportingHandlers as e:
4163 for ue in e.unsupported_errors:
4164 # FIXME: This depends on the order of errors.
4165 if not (ue.handler and ue.msg):
4166 continue
4167 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4168 raise RequestError(
4169 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4170 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
4171 if (
4172 'unsupported proxy type: "https"' in ue.msg.lower()
4173 and 'requests' not in self._request_director.handlers
4174 and 'curl_cffi' not in self._request_director.handlers
4176 raise RequestError(
4177 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests, curl_cffi')
4179 elif (
4180 re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
4181 and 'websockets' not in self._request_director.handlers
4183 raise RequestError(
4184 'This request requires WebSocket support. '
4185 'Ensure one of the following dependencies are installed: websockets',
4186 cause=ue) from ue
4188 elif re.match(r'unsupported (?:extensions: impersonate|impersonate target)', ue.msg.lower()):
4189 raise RequestError(
4190 f'Impersonate target "{req.extensions["impersonate"]}" is not available.'
4191 f' See --list-impersonate-targets for available targets.'
4192 f' This request requires browser impersonation, however you may be missing dependencies'
4193 f' required to support this target.')
4194 raise
4195 except SSLError as e:
4196 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4197 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4198 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4199 raise RequestError(
4200 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4201 'Try using --legacy-server-connect', cause=e) from e
4202 raise
4204 def build_request_director(self, handlers, preferences=None):
4205 logger = _YDLLogger(self)
4206 headers = self.params['http_headers'].copy()
4207 proxies = self.proxies.copy()
4208 clean_headers(headers)
4209 clean_proxies(proxies, headers)
4211 director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4212 for handler in handlers:
4213 director.add_handler(handler(
4214 logger=logger,
4215 headers=headers,
4216 cookiejar=self.cookiejar,
4217 proxies=proxies,
4218 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4219 verify=not self.params.get('nocheckcertificate'),
4220 **traverse_obj(self.params, {
4221 'verbose': 'debug_printtraffic',
4222 'source_address': 'source_address',
4223 'timeout': 'socket_timeout',
4224 'legacy_ssl_support': 'legacyserverconnect',
4225 'enable_file_urls': 'enable_file_urls',
4226 'impersonate': 'impersonate',
4227 'client_cert': {
4228 'client_certificate': 'client_certificate',
4229 'client_certificate_key': 'client_certificate_key',
4230 'client_certificate_password': 'client_certificate_password',
4234 director.preferences.update(preferences or [])
4235 if 'prefer-legacy-http-handler' in self.params['compat_opts']:
4236 director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
4237 return director
4239 @functools.cached_property
4240 def _request_director(self):
4241 return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
4243 def encode(self, s):
4244 if isinstance(s, bytes):
4245 return s # Already encoded
4247 try:
4248 return s.encode(self.get_encoding())
4249 except UnicodeEncodeError as err:
4250 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4251 raise
4253 def get_encoding(self):
4254 encoding = self.params.get('encoding')
4255 if encoding is None:
4256 encoding = preferredencoding()
4257 return encoding
4259 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4260 """ Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error """
4261 if overwrite is None:
4262 overwrite = self.params.get('overwrites', True)
4263 if not self.params.get('writeinfojson'):
4264 return False
4265 elif not infofn:
4266 self.write_debug(f'Skipping writing {label} infojson')
4267 return False
4268 elif not self._ensure_dir_exists(infofn):
4269 return None
4270 elif not overwrite and os.path.exists(infofn):
4271 self.to_screen(f'[info] {label.title()} metadata is already present')
4272 return 'exists'
4274 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4275 try:
4276 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4277 return True
4278 except OSError:
4279 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4280 return None
4282 def _write_description(self, label, ie_result, descfn):
4283 """ Write description and returns True = written, False = skip, None = error """
4284 if not self.params.get('writedescription'):
4285 return False
4286 elif not descfn:
4287 self.write_debug(f'Skipping writing {label} description')
4288 return False
4289 elif not self._ensure_dir_exists(descfn):
4290 return None
4291 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4292 self.to_screen(f'[info] {label.title()} description is already present')
4293 elif ie_result.get('description') is None:
4294 self.to_screen(f'[info] There\'s no {label} description to write')
4295 return False
4296 else:
4297 try:
4298 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4299 with open(descfn, 'w', encoding='utf-8') as descfile:
4300 descfile.write(ie_result['description'])
4301 except OSError:
4302 self.report_error(f'Cannot write {label} description file {descfn}')
4303 return None
4304 return True
4306 def _write_subtitles(self, info_dict, filename):
4307 """ Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error"""
4308 ret = []
4309 subtitles = info_dict.get('requested_subtitles')
4310 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4311 # subtitles download errors are already managed as troubles in relevant IE
4312 # that way it will silently go on when used with unsupporting IE
4313 return ret
4314 elif not subtitles:
4315 self.to_screen('[info] There are no subtitles for the requested languages')
4316 return ret
4317 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4318 if not sub_filename_base:
4319 self.to_screen('[info] Skipping writing video subtitles')
4320 return ret
4322 for sub_lang, sub_info in subtitles.items():
4323 sub_format = sub_info['ext']
4324 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4325 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4326 existing_sub = self.existing_file((sub_filename_final, sub_filename))
4327 if existing_sub:
4328 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4329 sub_info['filepath'] = existing_sub
4330 ret.append((existing_sub, sub_filename_final))
4331 continue
4333 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4334 if sub_info.get('data') is not None:
4335 try:
4336 # Use newline='' to prevent conversion of newline characters
4337 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4338 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4339 subfile.write(sub_info['data'])
4340 sub_info['filepath'] = sub_filename
4341 ret.append((sub_filename, sub_filename_final))
4342 continue
4343 except OSError:
4344 self.report_error(f'Cannot write video subtitles file {sub_filename}')
4345 return None
4347 try:
4348 sub_copy = sub_info.copy()
4349 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4350 self.dl(sub_filename, sub_copy, subtitle=True)
4351 sub_info['filepath'] = sub_filename
4352 ret.append((sub_filename, sub_filename_final))
4353 except (DownloadError, ExtractorError, OSError, ValueError, *network_exceptions) as err:
4354 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4355 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
4356 if not self.params.get('ignoreerrors'):
4357 self.report_error(msg)
4358 raise DownloadError(msg)
4359 self.report_warning(msg)
4360 return ret
4362 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4363 """ Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error """
4364 write_all = self.params.get('write_all_thumbnails', False)
4365 thumbnails, ret = [], []
4366 if write_all or self.params.get('writethumbnail', False):
4367 thumbnails = info_dict.get('thumbnails') or []
4368 if not thumbnails:
4369 self.to_screen(f'[info] There are no {label} thumbnails to download')
4370 return ret
4371 multiple = write_all and len(thumbnails) > 1
4373 if thumb_filename_base is None:
4374 thumb_filename_base = filename
4375 if thumbnails and not thumb_filename_base:
4376 self.write_debug(f'Skipping writing {label} thumbnail')
4377 return ret
4379 if thumbnails and not self._ensure_dir_exists(filename):
4380 return None
4382 for idx, t in list(enumerate(thumbnails))[::-1]:
4383 thumb_ext = t.get('ext') or determine_ext(t['url'], 'jpg')
4384 if multiple:
4385 thumb_ext = f'{t["id"]}.{thumb_ext}'
4386 thumb_display_id = f'{label} thumbnail {t["id"]}'
4387 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4388 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4390 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4391 if existing_thumb:
4392 self.to_screen('[info] {} is already present'.format((
4393 thumb_display_id if multiple else f'{label} thumbnail').capitalize()))
4394 t['filepath'] = existing_thumb
4395 ret.append((existing_thumb, thumb_filename_final))
4396 else:
4397 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4398 try:
4399 uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
4400 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4401 with open(thumb_filename, 'wb') as thumbf:
4402 shutil.copyfileobj(uf, thumbf)
4403 ret.append((thumb_filename, thumb_filename_final))
4404 t['filepath'] = thumb_filename
4405 except network_exceptions as err:
4406 if isinstance(err, HTTPError) and err.status == 404:
4407 self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4408 else:
4409 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4410 thumbnails.pop(idx)
4411 if ret and not write_all:
4412 break
4413 return ret