Release 2024.12.23
[yt-dlp.git] / yt_dlp / YoutubeDL.py
blob764baf3a00f0fe568a5163331913357d11d2c0a2
1 import collections
2 import contextlib
3 import copy
4 import datetime as dt
5 import errno
6 import fileinput
7 import functools
8 import http.cookiejar
9 import io
10 import itertools
11 import json
12 import locale
13 import operator
14 import os
15 import random
16 import re
17 import shutil
18 import string
19 import subprocess
20 import sys
21 import tempfile
22 import time
23 import tokenize
24 import traceback
25 import unicodedata
27 from .cache import Cache
28 from .compat import urllib # isort: split
29 from .compat import urllib_req_to_req
30 from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies
31 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
32 from .downloader.rtmp import rtmpdump_version
33 from .extractor import gen_extractor_classes, get_info_extractor
34 from .extractor.common import UnsupportedURLIE
35 from .extractor.openload import PhantomJSwrapper
36 from .minicurses import format_text
37 from .networking import HEADRequest, Request, RequestDirector
38 from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
39 from .networking.exceptions import (
40 HTTPError,
41 NoSupportingHandlers,
42 RequestError,
43 SSLError,
44 network_exceptions,
46 from .networking.impersonate import ImpersonateRequestHandler
47 from .plugins import directories as plugin_directories
48 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
49 from .postprocessor import (
50 EmbedThumbnailPP,
51 FFmpegFixupDuplicateMoovPP,
52 FFmpegFixupDurationPP,
53 FFmpegFixupM3u8PP,
54 FFmpegFixupM4aPP,
55 FFmpegFixupStretchedPP,
56 FFmpegFixupTimestampPP,
57 FFmpegMergerPP,
58 FFmpegPostProcessor,
59 FFmpegVideoConvertorPP,
60 MoveFilesAfterDownloadPP,
61 get_postprocessor,
63 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
64 from .update import (
65 REPOSITORY,
66 _get_system_deprecation,
67 _make_label,
68 current_git_head,
69 detect_variant,
71 from .utils import (
72 DEFAULT_OUTTMPL,
73 IDENTITY,
74 LINK_TEMPLATES,
75 MEDIA_EXTENSIONS,
76 NO_DEFAULT,
77 NUMBER_RE,
78 OUTTMPL_TYPES,
79 POSTPROCESS_WHEN,
80 STR_FORMAT_RE_TMPL,
81 STR_FORMAT_TYPES,
82 ContentTooShortError,
83 DateRange,
84 DownloadCancelled,
85 DownloadError,
86 EntryNotInPlaylist,
87 ExistingVideoReached,
88 ExtractorError,
89 FormatSorter,
90 GeoRestrictedError,
91 ISO3166Utils,
92 LazyList,
93 MaxDownloadsReached,
94 Namespace,
95 PagedList,
96 PlaylistEntries,
97 Popen,
98 PostProcessingError,
99 ReExtractInfo,
100 RejectedVideoReached,
101 SameFileError,
102 UnavailableVideoError,
103 UserNotLive,
104 YoutubeDLError,
105 age_restricted,
106 bug_reports_message,
107 date_from_str,
108 deprecation_warning,
109 determine_ext,
110 determine_protocol,
111 encode_compat_str,
112 escapeHTML,
113 expand_path,
114 extract_basic_auth,
115 filter_dict,
116 float_or_none,
117 format_bytes,
118 format_decimal_suffix,
119 format_field,
120 formatSeconds,
121 get_compatible_ext,
122 get_domain,
123 int_or_none,
124 iri_to_uri,
125 is_path_like,
126 join_nonempty,
127 locked_file,
128 make_archive_id,
129 make_dir,
130 number_of_digits,
131 orderedSet,
132 orderedSet_from_options,
133 parse_filesize,
134 preferredencoding,
135 prepend_extension,
136 remove_terminal_sequences,
137 render_table,
138 replace_extension,
139 sanitize_filename,
140 sanitize_path,
141 sanitize_url,
142 shell_quote,
143 str_or_none,
144 strftime_or_none,
145 subtitles_filename,
146 supports_terminal_sequences,
147 system_identifier,
148 filesize_from_tbr,
149 timetuple_from_msec,
150 to_high_limit_path,
151 traverse_obj,
152 try_call,
153 try_get,
154 url_basename,
155 variadic,
156 windows_enable_vt_mode,
157 write_json_file,
158 write_string,
160 from .utils._utils import _UnsafeExtensionError, _YDLLogger
161 from .utils.networking import (
162 HTTPHeaderDict,
163 clean_headers,
164 clean_proxies,
165 std_headers,
167 from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
169 if os.name == 'nt':
170 import ctypes
173 def _catch_unsafe_extension_error(func):
174 @functools.wraps(func)
175 def wrapper(self, *args, **kwargs):
176 try:
177 return func(self, *args, **kwargs)
178 except _UnsafeExtensionError as error:
179 self.report_error(
180 f'The extracted extension ({error.extension!r}) is unusual '
181 'and will be skipped for safety reasons. '
182 f'If you believe this is an error{bug_reports_message(",")}')
184 return wrapper
187 class YoutubeDL:
188 """YoutubeDL class.
190 YoutubeDL objects are the ones responsible of downloading the
191 actual video file and writing it to disk if the user has requested
192 it, among some other tasks. In most cases there should be one per
193 program. As, given a video URL, the downloader doesn't know how to
194 extract all the needed information, task that InfoExtractors do, it
195 has to pass the URL to one of them.
197 For this, YoutubeDL objects have a method that allows
198 InfoExtractors to be registered in a given order. When it is passed
199 a URL, the YoutubeDL object handles it to the first InfoExtractor it
200 finds that reports being able to handle it. The InfoExtractor extracts
201 all the information about the video or videos the URL refers to, and
202 YoutubeDL process the extracted information, possibly using a File
203 Downloader to download the video.
205 YoutubeDL objects accept a lot of parameters. In order not to saturate
206 the object constructor with arguments, it receives a dictionary of
207 options instead. These options are available through the params
208 attribute for the InfoExtractors to use. The YoutubeDL also
209 registers itself as the downloader in charge for the InfoExtractors
210 that are added to it, so this is a "mutual registration".
212 Available options:
214 username: Username for authentication purposes.
215 password: Password for authentication purposes.
216 videopassword: Password for accessing a video.
217 ap_mso: Adobe Pass multiple-system operator identifier.
218 ap_username: Multiple-system operator account username.
219 ap_password: Multiple-system operator account password.
220 usenetrc: Use netrc for authentication instead.
221 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
222 netrc_cmd: Use a shell command to get credentials
223 verbose: Print additional info to stdout.
224 quiet: Do not print messages to stdout.
225 no_warnings: Do not print out anything for warnings.
226 forceprint: A dict with keys WHEN mapped to a list of templates to
227 print to stdout. The allowed keys are video or any of the
228 items in utils.POSTPROCESS_WHEN.
229 For compatibility, a single list is also accepted
230 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
231 a list of tuples with (template, filename)
232 forcejson: Force printing info_dict as JSON.
233 dump_single_json: Force printing the info_dict of the whole playlist
234 (or video) as a single JSON line.
235 force_write_download_archive: Force writing download archive regardless
236 of 'skip_download' or 'simulate'.
237 simulate: Do not download the video files. If unset (or None),
238 simulate only if listsubtitles, listformats or list_thumbnails is used
239 format: Video format code. see "FORMAT SELECTION" for more details.
240 You can also pass a function. The function takes 'ctx' as
241 argument and returns the formats to download.
242 See "build_format_selector" for an implementation
243 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
244 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
245 extracting metadata even if the video is not actually
246 available for download (experimental)
247 format_sort: A list of fields by which to sort the video formats.
248 See "Sorting Formats" for more details.
249 format_sort_force: Force the given format_sort. see "Sorting Formats"
250 for more details.
251 prefer_free_formats: Whether to prefer video formats with free containers
252 over non-free ones of the same quality.
253 allow_multiple_video_streams: Allow multiple video streams to be merged
254 into a single file
255 allow_multiple_audio_streams: Allow multiple audio streams to be merged
256 into a single file
257 check_formats Whether to test if the formats are downloadable.
258 Can be True (check all), False (check none),
259 'selected' (check selected formats),
260 or None (check only if requested by extractor)
261 paths: Dictionary of output paths. The allowed keys are 'home'
262 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
263 outtmpl: Dictionary of templates for output names. Allowed keys
264 are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
265 For compatibility with youtube-dl, a single string can also be used
266 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
267 restrictfilenames: Do not allow "&" and spaces in file names
268 trim_file_name: Limit length of filename (extension excluded)
269 windowsfilenames: True: Force filenames to be Windows compatible
270 False: Sanitize filenames only minimally
271 This option has no effect when running on Windows
272 ignoreerrors: Do not stop on download/postprocessing errors.
273 Can be 'only_download' to ignore only download errors.
274 Default is 'only_download' for CLI, but False for API
275 skip_playlist_after_errors: Number of allowed failures until the rest of
276 the playlist is skipped
277 allowed_extractors: List of regexes to match against extractor names that are allowed
278 overwrites: Overwrite all video and metadata files if True,
279 overwrite only non-video files if None
280 and don't overwrite any file if False
281 playlist_items: Specific indices of playlist to download.
282 playlistrandom: Download playlist items in random order.
283 lazy_playlist: Process playlist entries as they are received.
284 matchtitle: Download only matching titles.
285 rejecttitle: Reject downloads for matching titles.
286 logger: Log messages to a logging.Logger instance.
287 logtostderr: Print everything to stderr instead of stdout.
288 consoletitle: Display progress in the console window's titlebar.
289 writedescription: Write the video description to a .description file
290 writeinfojson: Write the video description to a .info.json file
291 clean_infojson: Remove internal metadata from the infojson
292 getcomments: Extract video comments. This will not be written to disk
293 unless writeinfojson is also given
294 writeannotations: Write the video annotations to a .annotations.xml file
295 writethumbnail: Write the thumbnail image to a file
296 allow_playlist_files: Whether to write playlists' description, infojson etc
297 also to disk when using the 'write*' options
298 write_all_thumbnails: Write all thumbnail formats to files
299 writelink: Write an internet shortcut file, depending on the
300 current platform (.url/.webloc/.desktop)
301 writeurllink: Write a Windows internet shortcut file (.url)
302 writewebloclink: Write a macOS internet shortcut file (.webloc)
303 writedesktoplink: Write a Linux internet shortcut file (.desktop)
304 writesubtitles: Write the video subtitles to a file
305 writeautomaticsub: Write the automatically generated subtitles to a file
306 listsubtitles: Lists all available subtitles for the video
307 subtitlesformat: The format code for subtitles
308 subtitleslangs: List of languages of the subtitles to download (can be regex).
309 The list may contain "all" to refer to all the available
310 subtitles. The language can be prefixed with a "-" to
311 exclude it from the requested languages, e.g. ['all', '-live_chat']
312 keepvideo: Keep the video file after post-processing
313 daterange: A utils.DateRange object, download only if the upload_date is in the range.
314 skip_download: Skip the actual download of the video file
315 cachedir: Location of the cache files in the filesystem.
316 False to disable filesystem cache.
317 noplaylist: Download single video instead of a playlist if in doubt.
318 age_limit: An integer representing the user's age in years.
319 Unsuitable videos for the given age are skipped.
320 min_views: An integer representing the minimum view count the video
321 must have in order to not be skipped.
322 Videos without view count information are always
323 downloaded. None for no limit.
324 max_views: An integer representing the maximum view count.
325 Videos that are more popular than that are not
326 downloaded.
327 Videos without view count information are always
328 downloaded. None for no limit.
329 download_archive: A set, or the name of a file where all downloads are recorded.
330 Videos already present in the file are not downloaded again.
331 break_on_existing: Stop the download process after attempting to download a
332 file that is in the archive.
333 break_per_url: Whether break_on_reject and break_on_existing
334 should act on each input URL as opposed to for the entire queue
335 cookiefile: File name or text stream from where cookies should be read and dumped to
336 cookiesfrombrowser: A tuple containing the name of the browser, the profile
337 name/path from where cookies are loaded, the name of the keyring,
338 and the container name, e.g. ('chrome', ) or
339 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
340 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
341 support RFC 5746 secure renegotiation
342 nocheckcertificate: Do not verify SSL certificates
343 client_certificate: Path to client certificate file in PEM format. May include the private key
344 client_certificate_key: Path to private key file for client certificate
345 client_certificate_password: Password for client certificate private key, if encrypted.
346 If not provided and the key is encrypted, yt-dlp will ask interactively
347 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
348 (Only supported by some extractors)
349 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
350 http_headers: A dictionary of custom headers to be used for all requests
351 proxy: URL of the proxy server to use
352 geo_verification_proxy: URL of the proxy to use for IP address verification
353 on geo-restricted sites.
354 socket_timeout: Time to wait for unresponsive hosts, in seconds
355 bidi_workaround: Work around buggy terminals without bidirectional text
356 support, using fridibi
357 debug_printtraffic:Print out sent and received HTTP traffic
358 default_search: Prepend this string if an input url is not valid.
359 'auto' for elaborate guessing
360 encoding: Use this encoding instead of the system-specified.
361 extract_flat: Whether to resolve and process url_results further
362 * False: Always process. Default for API
363 * True: Never process
364 * 'in_playlist': Do not process inside playlist/multi_video
365 * 'discard': Always process, but don't return the result
366 from inside playlist/multi_video
367 * 'discard_in_playlist': Same as "discard", but only for
368 playlists (not multi_video). Default for CLI
369 wait_for_video: If given, wait for scheduled streams to become available.
370 The value should be a tuple containing the range
371 (min_secs, max_secs) to wait between retries
372 postprocessors: A list of dictionaries, each with an entry
373 * key: The name of the postprocessor. See
374 yt_dlp/postprocessor/__init__.py for a list.
375 * when: When to run the postprocessor. Allowed values are
376 the entries of utils.POSTPROCESS_WHEN
377 Assumed to be 'post_process' if not given
378 progress_hooks: A list of functions that get called on download
379 progress, with a dictionary with the entries
380 * status: One of "downloading", "error", or "finished".
381 Check this first and ignore unknown values.
382 * info_dict: The extracted info_dict
384 If status is one of "downloading", or "finished", the
385 following properties may also be present:
386 * filename: The final filename (always present)
387 * tmpfilename: The filename we're currently writing to
388 * downloaded_bytes: Bytes on disk
389 * total_bytes: Size of the whole file, None if unknown
390 * total_bytes_estimate: Guess of the eventual file size,
391 None if unavailable.
392 * elapsed: The number of seconds since download started.
393 * eta: The estimated time in seconds, None if unknown
394 * speed: The download speed in bytes/second, None if
395 unknown
396 * fragment_index: The counter of the currently
397 downloaded video fragment.
398 * fragment_count: The number of fragments (= individual
399 files that will be merged)
401 Progress hooks are guaranteed to be called at least once
402 (with status "finished") if the download is successful.
403 postprocessor_hooks: A list of functions that get called on postprocessing
404 progress, with a dictionary with the entries
405 * status: One of "started", "processing", or "finished".
406 Check this first and ignore unknown values.
407 * postprocessor: Name of the postprocessor
408 * info_dict: The extracted info_dict
410 Progress hooks are guaranteed to be called at least twice
411 (with status "started" and "finished") if the processing is successful.
412 merge_output_format: "/" separated list of extensions to use when merging formats.
413 final_ext: Expected final extension; used to detect when the file was
414 already downloaded and converted
415 fixup: Automatically correct known faults of the file.
416 One of:
417 - "never": do nothing
418 - "warn": only emit a warning
419 - "detect_or_warn": check whether we can do anything
420 about it, warn otherwise (default)
421 source_address: Client-side IP address to bind to.
422 impersonate: Client to impersonate for requests.
423 An ImpersonateTarget (from yt_dlp.networking.impersonate)
424 sleep_interval_requests: Number of seconds to sleep between requests
425 during extraction
426 sleep_interval: Number of seconds to sleep before each download when
427 used alone or a lower bound of a range for randomized
428 sleep before each download (minimum possible number
429 of seconds to sleep) when used along with
430 max_sleep_interval.
431 max_sleep_interval:Upper bound of a range for randomized sleep before each
432 download (maximum possible number of seconds to sleep).
433 Must only be used along with sleep_interval.
434 Actual sleep time will be a random float from range
435 [sleep_interval; max_sleep_interval].
436 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
437 listformats: Print an overview of available video formats and exit.
438 list_thumbnails: Print a table of all thumbnails and exit.
439 match_filter: A function that gets called for every video with the signature
440 (info_dict, *, incomplete: bool) -> Optional[str]
441 For backward compatibility with youtube-dl, the signature
442 (info_dict) -> Optional[str] is also allowed.
443 - If it returns a message, the video is ignored.
444 - If it returns None, the video is downloaded.
445 - If it returns utils.NO_DEFAULT, the user is interactively
446 asked whether to download the video.
447 - Raise utils.DownloadCancelled(msg) to abort remaining
448 downloads when a video is rejected.
449 match_filter_func in utils/_utils.py is one example for this.
450 color: A Dictionary with output stream names as keys
451 and their respective color policy as values.
452 Can also just be a single color policy,
453 in which case it applies to all outputs.
454 Valid stream names are 'stdout' and 'stderr'.
455 Valid color policies are one of 'always', 'auto',
456 'no_color', 'never', 'auto-tty' or 'no_color-tty'.
457 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
458 HTTP header
459 geo_bypass_country:
460 Two-letter ISO 3166-2 country code that will be used for
461 explicit geographic restriction bypassing via faking
462 X-Forwarded-For HTTP header
463 geo_bypass_ip_block:
464 IP range in CIDR notation that will be used similarly to
465 geo_bypass_country
466 external_downloader: A dictionary of protocol keys and the executable of the
467 external downloader to use for it. The allowed protocols
468 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
469 Set the value to 'native' to use the native downloader
470 compat_opts: Compatibility options. See "Differences in default behavior".
471 The following options do not work when used through the API:
472 filename, abort-on-error, multistreams, no-live-chat,
473 format-sort, no-clean-infojson, no-playlist-metafiles,
474 no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort.
475 Refer __init__.py for their implementation
476 progress_template: Dictionary of templates for progress outputs.
477 Allowed keys are 'download', 'postprocess',
478 'download-title' (console title) and 'postprocess-title'.
479 The template is mapped on a dictionary with keys 'progress' and 'info'
480 retry_sleep_functions: Dictionary of functions that takes the number of attempts
481 as argument and returns the time to sleep in seconds.
482 Allowed keys are 'http', 'fragment', 'file_access'
483 download_ranges: A callback function that gets called for every video with
484 the signature (info_dict, ydl) -> Iterable[Section].
485 Only the returned sections will be downloaded.
486 Each Section is a dict with the following keys:
487 * start_time: Start time of the section in seconds
488 * end_time: End time of the section in seconds
489 * title: Section title (Optional)
490 * index: Section number (Optional)
491 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
492 noprogress: Do not print the progress bar
493 live_from_start: Whether to download livestreams videos from the start
495 The following parameters are not used by YoutubeDL itself, they are used by
496 the downloader (see yt_dlp/downloader/common.py):
497 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
498 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
499 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
500 external_downloader_args, concurrent_fragment_downloads, progress_delta.
502 The following options are used by the post processors:
503 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
504 to the binary or its containing directory.
505 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
506 and a list of additional command-line arguments for the
507 postprocessor/executable. The dict can also have "PP+EXE" keys
508 which are used when the given exe is used by the given PP.
509 Use 'default' as the name for arguments to passed to all PP
510 For compatibility with youtube-dl, a single list of args
511 can also be used
513 The following options are used by the extractors:
514 extractor_retries: Number of times to retry for known errors (default: 3)
515 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
516 hls_split_discontinuity: Split HLS playlists into different formats at
517 discontinuities such as ad breaks (default: False)
518 extractor_args: A dictionary of arguments to be passed to the extractors.
519 See "EXTRACTOR ARGUMENTS" for details.
520 E.g. {'youtube': {'skip': ['dash', 'hls']}}
521 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
523 The following options are deprecated and may be removed in the future:
525 break_on_reject: Stop the download process when encountering a video that
526 has been filtered out.
527 - `raise DownloadCancelled(msg)` in match_filter instead
528 force_generic_extractor: Force downloader to use the generic extractor
529 - Use allowed_extractors = ['generic', 'default']
530 playliststart: - Use playlist_items
531 Playlist item to start at.
532 playlistend: - Use playlist_items
533 Playlist item to end at.
534 playlistreverse: - Use playlist_items
535 Download playlist items in reverse order.
536 forceurl: - Use forceprint
537 Force printing final URL.
538 forcetitle: - Use forceprint
539 Force printing title.
540 forceid: - Use forceprint
541 Force printing ID.
542 forcethumbnail: - Use forceprint
543 Force printing thumbnail URL.
544 forcedescription: - Use forceprint
545 Force printing description.
546 forcefilename: - Use forceprint
547 Force printing final filename.
548 forceduration: - Use forceprint
549 Force printing duration.
550 allsubtitles: - Use subtitleslangs = ['all']
551 Downloads all the subtitles of the video
552 (requires writesubtitles or writeautomaticsub)
553 include_ads: - Doesn't work
554 Download ads as well
555 call_home: - Not implemented
556 Boolean, true if we are allowed to contact the
557 yt-dlp servers for debugging.
558 post_hooks: - Register a custom postprocessor
559 A list of functions that get called as the final step
560 for each video file, after all postprocessors have been
561 called. The filename will be passed as the only argument.
562 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
563 Use the native HLS downloader instead of ffmpeg/avconv
564 if True, otherwise use ffmpeg/avconv if False, otherwise
565 use downloader suggested by extractor if None.
566 prefer_ffmpeg: - avconv support is deprecated
567 If False, use avconv instead of ffmpeg if both are available,
568 otherwise prefer ffmpeg.
569 youtube_include_dash_manifest: - Use extractor_args
570 If True (default), DASH manifests and related
571 data will be downloaded and processed by extractor.
572 You can reduce network I/O by disabling it if you don't
573 care about DASH. (only for youtube)
574 youtube_include_hls_manifest: - Use extractor_args
575 If True (default), HLS manifests and related
576 data will be downloaded and processed by extractor.
577 You can reduce network I/O by disabling it if you don't
578 care about HLS. (only for youtube)
579 no_color: Same as `color='no_color'`
580 no_overwrites: Same as `overwrites=False`
583 _NUMERIC_FIELDS = {
584 'width', 'height', 'asr', 'audio_channels', 'fps',
585 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
586 'timestamp', 'release_timestamp',
587 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
588 'average_rating', 'comment_count', 'age_limit',
589 'start_time', 'end_time',
590 'chapter_number', 'season_number', 'episode_number',
591 'track_number', 'disc_number', 'release_year',
594 _format_fields = {
595 # NB: Keep in sync with the docstring of extractor/common.py
596 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
597 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
598 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
599 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
600 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
601 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url',
602 'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version',
603 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
605 _deprecated_multivalue_fields = {
606 'album_artist': 'album_artists',
607 'artist': 'artists',
608 'composer': 'composers',
609 'creator': 'creators',
610 'genre': 'genres',
612 _format_selection_exts = {
613 'audio': set(MEDIA_EXTENSIONS.common_audio),
614 'video': {*MEDIA_EXTENSIONS.common_video, '3gp'},
615 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
618 def __init__(self, params=None, auto_init=True):
619 """Create a FileDownloader object with the given options.
620 @param auto_init Whether to load the default extractors and print header (if verbose).
621 Set to 'no_verbose_header' to not print the header
623 if params is None:
624 params = {}
625 self.params = params
626 self._ies = {}
627 self._ies_instances = {}
628 self._pps = {k: [] for k in POSTPROCESS_WHEN}
629 self._printed_messages = set()
630 self._first_webpage_request = True
631 self._post_hooks = []
632 self._progress_hooks = []
633 self._postprocessor_hooks = []
634 self._download_retcode = 0
635 self._num_downloads = 0
636 self._num_videos = 0
637 self._playlist_level = 0
638 self._playlist_urls = set()
639 self.cache = Cache(self)
640 self.__header_cookies = []
642 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
643 self._out_files = Namespace(
644 out=stdout,
645 error=sys.stderr,
646 screen=sys.stderr if self.params.get('quiet') else stdout,
647 console=None if os.name == 'nt' else next(
648 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None),
651 try:
652 windows_enable_vt_mode()
653 except Exception as e:
654 self.write_debug(f'Failed to enable VT mode: {e}')
656 if self.params.get('no_color'):
657 if self.params.get('color') is not None:
658 self.params.setdefault('_warnings', []).append(
659 'Overwriting params from "color" with "no_color"')
660 self.params['color'] = 'no_color'
662 term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
663 base_no_color = bool(os.getenv('NO_COLOR'))
665 def process_color_policy(stream):
666 stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
667 policy = traverse_obj(self.params, ('color', (stream_name, None), {str}, any)) or 'auto'
668 if policy in ('auto', 'auto-tty', 'no_color-tty'):
669 no_color = base_no_color
670 if policy.endswith('tty'):
671 no_color = policy.startswith('no_color')
672 if term_allow_color and supports_terminal_sequences(stream):
673 return 'no_color' if no_color else True
674 return False
675 assert policy in ('always', 'never', 'no_color'), policy
676 return {'always': True, 'never': False}.get(policy, policy)
678 self._allow_colors = Namespace(**{
679 name: process_color_policy(stream)
680 for name, stream in self._out_files.items_ if name != 'console'
683 system_deprecation = _get_system_deprecation()
684 if system_deprecation:
685 self.deprecated_feature(system_deprecation.replace('\n', '\n '))
687 if self.params.get('allow_unplayable_formats'):
688 self.report_warning(
689 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
690 'This is a developer option intended for debugging. \n'
691 ' If you experience any issues while using this option, '
692 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
694 if self.params.get('bidi_workaround', False):
695 try:
696 import pty
697 master, slave = pty.openpty()
698 width = shutil.get_terminal_size().columns
699 width_args = [] if width is None else ['-w', str(width)]
700 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
701 try:
702 self._output_process = Popen(['bidiv', *width_args], **sp_kwargs)
703 except OSError:
704 self._output_process = Popen(['fribidi', '-c', 'UTF-8', *width_args], **sp_kwargs)
705 self._output_channel = os.fdopen(master, 'rb')
706 except OSError as ose:
707 if ose.errno == errno.ENOENT:
708 self.report_warning(
709 'Could not find fribidi executable, ignoring --bidi-workaround. '
710 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
711 else:
712 raise
714 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
715 self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
716 self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
717 self.params['http_headers'].pop('Cookie', None)
719 if auto_init and auto_init != 'no_verbose_header':
720 self.print_debug_header()
722 def check_deprecated(param, option, suggestion):
723 if self.params.get(param) is not None:
724 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
725 return True
726 return False
728 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
729 if self.params.get('geo_verification_proxy') is None:
730 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
732 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
733 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
734 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
736 for msg in self.params.get('_warnings', []):
737 self.report_warning(msg)
738 for msg in self.params.get('_deprecation_warnings', []):
739 self.deprecated_feature(msg)
741 if impersonate_target := self.params.get('impersonate'):
742 if not self._impersonate_target_available(impersonate_target):
743 raise YoutubeDLError(
744 f'Impersonate target "{impersonate_target}" is not available. '
745 f'Use --list-impersonate-targets to see available targets. '
746 f'You may be missing dependencies required to support this target.')
748 if 'list-formats' in self.params['compat_opts']:
749 self.params['listformats_table'] = False
751 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
752 # nooverwrites was unnecessarily changed to overwrites
753 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
754 # This ensures compatibility with both keys
755 self.params['overwrites'] = not self.params['nooverwrites']
756 elif self.params.get('overwrites') is None:
757 self.params.pop('overwrites', None)
758 else:
759 self.params['nooverwrites'] = not self.params['overwrites']
761 if self.params.get('simulate') is None and any((
762 self.params.get('list_thumbnails'),
763 self.params.get('listformats'),
764 self.params.get('listsubtitles'),
766 self.params['simulate'] = 'list_only'
768 self.params.setdefault('forceprint', {})
769 self.params.setdefault('print_to_file', {})
771 # Compatibility with older syntax
772 if not isinstance(params['forceprint'], dict):
773 self.params['forceprint'] = {'video': params['forceprint']}
775 if auto_init:
776 self.add_default_info_extractors()
778 if (sys.platform != 'win32'
779 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
780 and not self.params.get('restrictfilenames', False)):
781 # Unicode filesystem API will throw errors (#1474, #13027)
782 self.report_warning(
783 'Assuming --restrict-filenames since file system encoding '
784 'cannot encode all characters. '
785 'Set the LC_ALL environment variable to fix this.')
786 self.params['restrictfilenames'] = True
788 self._parse_outtmpl()
790 # Creating format selector here allows us to catch syntax errors before the extraction
791 self.format_selector = (
792 self.params.get('format') if self.params.get('format') in (None, '-')
793 else self.params['format'] if callable(self.params['format'])
794 else self.build_format_selector(self.params['format']))
796 hooks = {
797 'post_hooks': self.add_post_hook,
798 'progress_hooks': self.add_progress_hook,
799 'postprocessor_hooks': self.add_postprocessor_hook,
801 for opt, fn in hooks.items():
802 for ph in self.params.get(opt, []):
803 fn(ph)
805 for pp_def_raw in self.params.get('postprocessors', []):
806 pp_def = dict(pp_def_raw)
807 when = pp_def.pop('when', 'post_process')
808 self.add_post_processor(
809 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
810 when=when)
812 def preload_download_archive(fn):
813 """Preload the archive, if any is specified"""
814 archive = set()
815 if fn is None:
816 return archive
817 elif not is_path_like(fn):
818 return fn
820 self.write_debug(f'Loading archive file {fn!r}')
821 try:
822 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
823 for line in archive_file:
824 archive.add(line.strip())
825 except OSError as ioe:
826 if ioe.errno != errno.ENOENT:
827 raise
828 return archive
830 self.archive = preload_download_archive(self.params.get('download_archive'))
832 def warn_if_short_id(self, argv):
833 # short YouTube ID starting with dash?
834 idxs = [
835 i for i, a in enumerate(argv)
836 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
837 if idxs:
838 correct_argv = (
839 ['yt-dlp']
840 + [a for i, a in enumerate(argv) if i not in idxs]
841 + ['--'] + [argv[i] for i in idxs]
843 self.report_warning(
844 'Long argument string detected. '
845 f'Use -- to separate parameters and URLs, like this:\n{shell_quote(correct_argv)}')
847 def add_info_extractor(self, ie):
848 """Add an InfoExtractor object to the end of the list."""
849 ie_key = ie.ie_key()
850 self._ies[ie_key] = ie
851 if not isinstance(ie, type):
852 self._ies_instances[ie_key] = ie
853 ie.set_downloader(self)
855 def get_info_extractor(self, ie_key):
857 Get an instance of an IE with name ie_key, it will try to get one from
858 the _ies list, if there's no instance it will create a new one and add
859 it to the extractor list.
861 ie = self._ies_instances.get(ie_key)
862 if ie is None:
863 ie = get_info_extractor(ie_key)()
864 self.add_info_extractor(ie)
865 return ie
867 def add_default_info_extractors(self):
869 Add the InfoExtractors returned by gen_extractors to the end of the list
871 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
872 all_ies['end'] = UnsupportedURLIE()
873 try:
874 ie_names = orderedSet_from_options(
875 self.params.get('allowed_extractors', ['default']), {
876 'all': list(all_ies),
877 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
878 }, use_regex=True)
879 except re.error as e:
880 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
881 for name in ie_names:
882 self.add_info_extractor(all_ies[name])
883 self.write_debug(f'Loaded {len(ie_names)} extractors')
885 def add_post_processor(self, pp, when='post_process'):
886 """Add a PostProcessor object to the end of the chain."""
887 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
888 self._pps[when].append(pp)
889 pp.set_downloader(self)
891 def add_post_hook(self, ph):
892 """Add the post hook"""
893 self._post_hooks.append(ph)
895 def add_progress_hook(self, ph):
896 """Add the download progress hook"""
897 self._progress_hooks.append(ph)
899 def add_postprocessor_hook(self, ph):
900 """Add the postprocessing progress hook"""
901 self._postprocessor_hooks.append(ph)
902 for pps in self._pps.values():
903 for pp in pps:
904 pp.add_progress_hook(ph)
906 def _bidi_workaround(self, message):
907 if not hasattr(self, '_output_channel'):
908 return message
910 assert hasattr(self, '_output_process')
911 assert isinstance(message, str)
912 line_count = message.count('\n') + 1
913 self._output_process.stdin.write((message + '\n').encode())
914 self._output_process.stdin.flush()
915 res = ''.join(self._output_channel.readline().decode()
916 for _ in range(line_count))
917 return res[:-len('\n')]
919 def _write_string(self, message, out=None, only_once=False):
920 if only_once:
921 if message in self._printed_messages:
922 return
923 self._printed_messages.add(message)
924 write_string(message, out=out, encoding=self.params.get('encoding'))
926 def to_stdout(self, message, skip_eol=False, quiet=None):
927 """Print message to stdout"""
928 if quiet is not None:
929 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
930 'Use "YoutubeDL.to_screen" instead')
931 if skip_eol is not False:
932 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
933 'Use "YoutubeDL.to_screen" instead')
934 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
936 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
937 """Print message to screen if not in quiet mode"""
938 if self.params.get('logger'):
939 self.params['logger'].debug(message)
940 return
941 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
942 return
943 self._write_string(
944 '{}{}'.format(self._bidi_workaround(message), ('' if skip_eol else '\n')),
945 self._out_files.screen, only_once=only_once)
947 def to_stderr(self, message, only_once=False):
948 """Print message to stderr"""
949 assert isinstance(message, str)
950 if self.params.get('logger'):
951 self.params['logger'].error(message)
952 else:
953 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
955 def _send_console_code(self, code):
956 if os.name == 'nt' or not self._out_files.console:
957 return
958 self._write_string(code, self._out_files.console)
960 def to_console_title(self, message):
961 if not self.params.get('consoletitle', False):
962 return
963 message = remove_terminal_sequences(message)
964 if os.name == 'nt':
965 if ctypes.windll.kernel32.GetConsoleWindow():
966 # c_wchar_p() might not be necessary if `message` is
967 # already of type unicode()
968 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
969 else:
970 self._send_console_code(f'\033]0;{message}\007')
972 def save_console_title(self):
973 if not self.params.get('consoletitle') or self.params.get('simulate'):
974 return
975 self._send_console_code('\033[22;0t') # Save the title on stack
977 def restore_console_title(self):
978 if not self.params.get('consoletitle') or self.params.get('simulate'):
979 return
980 self._send_console_code('\033[23;0t') # Restore the title from stack
982 def __enter__(self):
983 self.save_console_title()
984 return self
986 def save_cookies(self):
987 if self.params.get('cookiefile') is not None:
988 self.cookiejar.save()
990 def __exit__(self, *args):
991 self.restore_console_title()
992 self.close()
994 def close(self):
995 self.save_cookies()
996 if '_request_director' in self.__dict__:
997 self._request_director.close()
998 del self._request_director
1000 def trouble(self, message=None, tb=None, is_error=True):
1001 """Determine action to take when a download problem appears.
1003 Depending on if the downloader has been configured to ignore
1004 download errors or not, this method may throw an exception or
1005 not when errors are found, after printing the message.
1007 @param tb If given, is additional traceback information
1008 @param is_error Whether to raise error according to ignorerrors
1010 if message is not None:
1011 self.to_stderr(message)
1012 if self.params.get('verbose'):
1013 if tb is None:
1014 if sys.exc_info()[0]: # if .trouble has been called from an except block
1015 tb = ''
1016 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
1017 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
1018 tb += encode_compat_str(traceback.format_exc())
1019 else:
1020 tb_data = traceback.format_list(traceback.extract_stack())
1021 tb = ''.join(tb_data)
1022 if tb:
1023 self.to_stderr(tb)
1024 if not is_error:
1025 return
1026 if not self.params.get('ignoreerrors'):
1027 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
1028 exc_info = sys.exc_info()[1].exc_info
1029 else:
1030 exc_info = sys.exc_info()
1031 raise DownloadError(message, exc_info)
1032 self._download_retcode = 1
1034 Styles = Namespace(
1035 HEADERS='yellow',
1036 EMPHASIS='light blue',
1037 FILENAME='green',
1038 ID='green',
1039 DELIM='blue',
1040 ERROR='red',
1041 BAD_FORMAT='light red',
1042 WARNING='yellow',
1043 SUPPRESS='light black',
1046 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
1047 text = str(text)
1048 if test_encoding:
1049 original_text = text
1050 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1051 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1052 text = text.encode(encoding, 'ignore').decode(encoding)
1053 if fallback is not None and text != original_text:
1054 text = fallback
1055 return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1057 def _format_out(self, *args, **kwargs):
1058 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1060 def _format_screen(self, *args, **kwargs):
1061 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1063 def _format_err(self, *args, **kwargs):
1064 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1066 def report_warning(self, message, only_once=False):
1068 Print the message to stderr, it will be prefixed with 'WARNING:'
1069 If stderr is a tty file the 'WARNING:' will be colored
1071 if self.params.get('logger') is not None:
1072 self.params['logger'].warning(message)
1073 else:
1074 if self.params.get('no_warnings'):
1075 return
1076 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1078 def deprecation_warning(self, message, *, stacklevel=0):
1079 deprecation_warning(
1080 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1082 def deprecated_feature(self, message):
1083 if self.params.get('logger') is not None:
1084 self.params['logger'].warning(f'Deprecated Feature: {message}')
1085 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1087 def report_error(self, message, *args, **kwargs):
1089 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1090 in red if stderr is a tty file.
1092 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1094 def write_debug(self, message, only_once=False):
1095 """Log debug message or Print message to stderr"""
1096 if not self.params.get('verbose', False):
1097 return
1098 message = f'[debug] {message}'
1099 if self.params.get('logger'):
1100 self.params['logger'].debug(message)
1101 else:
1102 self.to_stderr(message, only_once)
1104 def report_file_already_downloaded(self, file_name):
1105 """Report file has already been fully downloaded."""
1106 try:
1107 self.to_screen(f'[download] {file_name} has already been downloaded')
1108 except UnicodeEncodeError:
1109 self.to_screen('[download] The file has already been downloaded')
1111 def report_file_delete(self, file_name):
1112 """Report that existing file will be deleted."""
1113 try:
1114 self.to_screen(f'Deleting existing file {file_name}')
1115 except UnicodeEncodeError:
1116 self.to_screen('Deleting existing file')
1118 def raise_no_formats(self, info, forced=False, *, msg=None):
1119 has_drm = info.get('_has_drm')
1120 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1121 msg = msg or (has_drm and 'This video is DRM protected') or 'No video formats found!'
1122 if forced or not ignored:
1123 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1124 expected=has_drm or ignored or expected)
1125 else:
1126 self.report_warning(msg)
1128 def parse_outtmpl(self):
1129 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1130 self._parse_outtmpl()
1131 return self.params['outtmpl']
1133 def _parse_outtmpl(self):
1134 sanitize = IDENTITY
1135 if self.params.get('restrictfilenames'): # Remove spaces in the default template
1136 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1138 outtmpl = self.params.setdefault('outtmpl', {})
1139 if not isinstance(outtmpl, dict):
1140 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1141 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1143 def get_output_path(self, dir_type='', filename=None):
1144 paths = self.params.get('paths', {})
1145 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1146 path = os.path.join(
1147 expand_path(paths.get('home', '').strip()),
1148 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1149 filename or '')
1150 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1152 @staticmethod
1153 def _outtmpl_expandpath(outtmpl):
1154 # expand_path translates '%%' into '%' and '$$' into '$'
1155 # correspondingly that is not what we want since we need to keep
1156 # '%%' intact for template dict substitution step. Working around
1157 # with boundary-alike separator hack.
1158 sep = ''.join(random.choices(string.ascii_letters, k=32))
1159 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1161 # outtmpl should be expand_path'ed before template dict substitution
1162 # because meta fields may contain env variables we don't want to
1163 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1164 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1165 return expand_path(outtmpl).replace(sep, '')
1167 @staticmethod
1168 def escape_outtmpl(outtmpl):
1169 """ Escape any remaining strings like %s, %abc% etc. """
1170 return re.sub(
1171 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1172 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1173 outtmpl)
1175 @classmethod
1176 def validate_outtmpl(cls, outtmpl):
1177 """ @return None or Exception object """
1178 outtmpl = re.sub(
1179 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1180 lambda mobj: f'{mobj.group(0)[:-1]}s',
1181 cls._outtmpl_expandpath(outtmpl))
1182 try:
1183 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1184 return None
1185 except ValueError as err:
1186 return err
1188 @staticmethod
1189 def _copy_infodict(info_dict):
1190 info_dict = dict(info_dict)
1191 info_dict.pop('__postprocessors', None)
1192 info_dict.pop('__pending_error', None)
1193 return info_dict
1195 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1196 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1197 @param sanitize Whether to sanitize the output as a filename
1200 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1202 info_dict = self._copy_infodict(info_dict)
1203 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1204 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1205 if info_dict.get('duration', None) is not None
1206 else None)
1207 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1208 info_dict['video_autonumber'] = self._num_videos
1209 if info_dict.get('resolution') is None:
1210 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1212 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1213 # of %(field)s to %(field)0Nd for backward compatibility
1214 field_size_compat_map = {
1215 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1216 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1217 'autonumber': self.params.get('autonumber_size') or 5,
1220 TMPL_DICT = {}
1221 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1222 MATH_FUNCTIONS = {
1223 '+': float.__add__,
1224 '-': float.__sub__,
1225 '*': float.__mul__,
1227 # Field is of the form key1.key2...
1228 # where keys (except first) can be string, int, slice or "{field, ...}"
1229 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} # noqa: UP031
1230 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { # noqa: UP031
1231 'inner': FIELD_INNER_RE,
1232 'field': rf'\w*(?:\.{FIELD_INNER_RE})*',
1234 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1235 MATH_OPERATORS_RE = r'(?:{})'.format('|'.join(map(re.escape, MATH_FUNCTIONS.keys())))
1236 INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1237 (?P<negate>-)?
1238 (?P<fields>{FIELD_RE})
1239 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1240 (?:>(?P<strf_format>.+?))?
1241 (?P<remaining>
1242 (?P<alternate>(?<!\\),[^|&)]+)?
1243 (?:&(?P<replacement>.*?))?
1244 (?:\|(?P<default>.*?))?
1245 )$''')
1247 def _from_user_input(field):
1248 if field == ':':
1249 return ...
1250 elif ':' in field:
1251 return slice(*map(int_or_none, field.split(':')))
1252 elif int_or_none(field) is not None:
1253 return int(field)
1254 return field
1256 def _traverse_infodict(fields):
1257 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1258 for f in ([x] if x.startswith('{') else x.split('.'))]
1259 for i in (0, -1):
1260 if fields and not fields[i]:
1261 fields.pop(i)
1263 for i, f in enumerate(fields):
1264 if not f.startswith('{'):
1265 fields[i] = _from_user_input(f)
1266 continue
1267 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1268 fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}
1270 return traverse_obj(info_dict, fields, traverse_string=True)
1272 def get_value(mdict):
1273 # Object traversal
1274 value = _traverse_infodict(mdict['fields'])
1275 # Negative
1276 if mdict['negate']:
1277 value = float_or_none(value)
1278 if value is not None:
1279 value *= -1
1280 # Do maths
1281 offset_key = mdict['maths']
1282 if offset_key:
1283 value = float_or_none(value)
1284 operator = None
1285 while offset_key:
1286 item = re.match(
1287 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1288 offset_key).group(0)
1289 offset_key = offset_key[len(item):]
1290 if operator is None:
1291 operator = MATH_FUNCTIONS[item]
1292 continue
1293 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1294 offset = float_or_none(item)
1295 if offset is None:
1296 offset = float_or_none(_traverse_infodict(item))
1297 try:
1298 value = operator(value, multiplier * offset)
1299 except (TypeError, ZeroDivisionError):
1300 return None
1301 operator = None
1302 # Datetime formatting
1303 if mdict['strf_format']:
1304 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1306 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1307 if sanitize and value == '':
1308 value = None
1309 return value
1311 na = self.params.get('outtmpl_na_placeholder', 'NA')
1313 def filename_sanitizer(key, value, restricted):
1314 return sanitize_filename(str(value), restricted=restricted, is_id=(
1315 bool(re.search(r'(^|[_.])id(\.|$)', key))
1316 if 'filename-sanitization' in self.params['compat_opts']
1317 else NO_DEFAULT))
1319 if callable(sanitize):
1320 self.deprecation_warning('Passing a callable "sanitize" to YoutubeDL.prepare_outtmpl is deprecated')
1321 elif not sanitize:
1322 pass
1323 elif (sys.platform != 'win32' and not self.params.get('restrictfilenames')
1324 and self.params.get('windowsfilenames') is False):
1325 def sanitize(key, value):
1326 return value.replace('/', '\u29F8').replace('\0', '')
1327 else:
1328 def sanitize(key, value):
1329 return filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames'))
1331 def _dumpjson_default(obj):
1332 if isinstance(obj, (set, LazyList)):
1333 return list(obj)
1334 return repr(obj)
1336 class _ReplacementFormatter(string.Formatter):
1337 def get_field(self, field_name, args, kwargs):
1338 if field_name.isdigit():
1339 return args[0], -1
1340 raise ValueError('Unsupported field')
1342 replacement_formatter = _ReplacementFormatter()
1344 def create_key(outer_mobj):
1345 if not outer_mobj.group('has_key'):
1346 return outer_mobj.group(0)
1347 key = outer_mobj.group('key')
1348 mobj = re.match(INTERNAL_FORMAT_RE, key)
1349 value, replacement, default, last_field = None, None, na, ''
1350 while mobj:
1351 mobj = mobj.groupdict()
1352 default = mobj['default'] if mobj['default'] is not None else default
1353 value = get_value(mobj)
1354 last_field, replacement = mobj['fields'], mobj['replacement']
1355 if value is None and mobj['alternate']:
1356 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1357 else:
1358 break
1360 if None not in (value, replacement):
1361 try:
1362 value = replacement_formatter.format(replacement, value)
1363 except ValueError:
1364 value, default = None, na
1366 fmt = outer_mobj.group('format')
1367 if fmt == 's' and last_field in field_size_compat_map and isinstance(value, int):
1368 fmt = f'0{field_size_compat_map[last_field]:d}d'
1370 flags = outer_mobj.group('conversion') or ''
1371 str_fmt = f'{fmt[:-1]}s'
1372 if value is None:
1373 value, fmt = default, 's'
1374 elif fmt[-1] == 'l': # list
1375 delim = '\n' if '#' in flags else ', '
1376 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1377 elif fmt[-1] == 'j': # json
1378 value, fmt = json.dumps(
1379 value, default=_dumpjson_default,
1380 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1381 elif fmt[-1] == 'h': # html
1382 value, fmt = escapeHTML(str(value)), str_fmt
1383 elif fmt[-1] == 'q': # quoted
1384 value = map(str, variadic(value) if '#' in flags else [value])
1385 value, fmt = shell_quote(value, shell=True), str_fmt
1386 elif fmt[-1] == 'B': # bytes
1387 value = f'%{str_fmt}'.encode() % str(value).encode()
1388 value, fmt = value.decode('utf-8', 'ignore'), 's'
1389 elif fmt[-1] == 'U': # unicode normalized
1390 value, fmt = unicodedata.normalize(
1391 # "+" = compatibility equivalence, "#" = NFD
1392 'NF{}{}'.format('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1393 value), str_fmt
1394 elif fmt[-1] == 'D': # decimal suffix
1395 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1396 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1397 factor=1024 if '#' in flags else 1000)
1398 elif fmt[-1] == 'S': # filename sanitization
1399 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1400 elif fmt[-1] == 'c':
1401 if value:
1402 value = str(value)[0]
1403 else:
1404 fmt = str_fmt
1405 elif fmt[-1] not in 'rsa': # numeric
1406 value = float_or_none(value)
1407 if value is None:
1408 value, fmt = default, 's'
1410 if sanitize:
1411 # If value is an object, sanitize might convert it to a string
1412 # So we manually convert it before sanitizing
1413 if fmt[-1] == 'r':
1414 value, fmt = repr(value), str_fmt
1415 elif fmt[-1] == 'a':
1416 value, fmt = ascii(value), str_fmt
1417 if fmt[-1] in 'csra':
1418 value = sanitize(last_field, value)
1420 key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
1421 TMPL_DICT[key] = value
1422 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1424 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1426 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1427 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1428 return self.escape_outtmpl(outtmpl) % info_dict
1430 @_catch_unsafe_extension_error
1431 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1432 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1433 if outtmpl is None:
1434 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1435 try:
1436 outtmpl = self._outtmpl_expandpath(outtmpl)
1437 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1438 if not filename:
1439 return None
1441 if tmpl_type in ('', 'temp'):
1442 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1443 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1444 filename = replace_extension(filename, ext, final_ext)
1445 elif tmpl_type:
1446 force_ext = OUTTMPL_TYPES[tmpl_type]
1447 if force_ext:
1448 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1450 # https://github.com/blackjack4494/youtube-dlc/issues/85
1451 trim_file_name = self.params.get('trim_file_name', False)
1452 if trim_file_name:
1453 no_ext, *ext = filename.rsplit('.', 2)
1454 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1456 return filename
1457 except ValueError as err:
1458 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1459 return None
1461 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1462 """Generate the output filename"""
1463 if outtmpl:
1464 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1465 dir_type = None
1466 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1467 if not filename and dir_type not in ('', 'temp'):
1468 return ''
1470 if warn:
1471 if not self.params.get('paths'):
1472 pass
1473 elif filename == '-':
1474 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1475 elif os.path.isabs(filename):
1476 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1477 if filename == '-' or not filename:
1478 return filename
1480 return self.get_output_path(dir_type, filename)
1482 def _match_entry(self, info_dict, incomplete=False, silent=False):
1483 """Returns None if the file should be downloaded"""
1484 _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1485 assert incomplete or _type == 'video', 'Only video result can be considered complete'
1487 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1489 def check_filter():
1490 if _type in ('playlist', 'multi_video'):
1491 return
1492 elif _type in ('url', 'url_transparent') and not try_call(
1493 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1494 return
1496 if 'title' in info_dict:
1497 # This can happen when we're just evaluating the playlist
1498 title = info_dict['title']
1499 matchtitle = self.params.get('matchtitle', False)
1500 if matchtitle:
1501 if not re.search(matchtitle, title, re.IGNORECASE):
1502 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1503 rejecttitle = self.params.get('rejecttitle', False)
1504 if rejecttitle:
1505 if re.search(rejecttitle, title, re.IGNORECASE):
1506 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1508 date = info_dict.get('upload_date')
1509 if date is not None:
1510 date_range = self.params.get('daterange', DateRange())
1511 if date not in date_range:
1512 return f'{date_from_str(date).isoformat()} upload date is not in range {date_range}'
1513 view_count = info_dict.get('view_count')
1514 if view_count is not None:
1515 min_views = self.params.get('min_views')
1516 if min_views is not None and view_count < min_views:
1517 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1518 max_views = self.params.get('max_views')
1519 if max_views is not None and view_count > max_views:
1520 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1521 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1522 return f'Skipping "{video_title}" because it is age restricted'
1524 match_filter = self.params.get('match_filter')
1525 if match_filter is None:
1526 return None
1528 cancelled = None
1529 try:
1530 try:
1531 ret = match_filter(info_dict, incomplete=incomplete)
1532 except TypeError:
1533 # For backward compatibility
1534 ret = None if incomplete else match_filter(info_dict)
1535 except DownloadCancelled as err:
1536 if err.msg is not NO_DEFAULT:
1537 raise
1538 ret, cancelled = err.msg, err
1540 if ret is NO_DEFAULT:
1541 while True:
1542 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1543 reply = input(self._format_screen(
1544 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1545 if reply in {'y', ''}:
1546 return None
1547 elif reply == 'n':
1548 if cancelled:
1549 raise type(cancelled)(f'Skipping {video_title}')
1550 return f'Skipping {video_title}'
1551 return ret
1553 if self.in_download_archive(info_dict):
1554 reason = ''.join((
1555 format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
1556 format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1557 'has already been recorded in the archive'))
1558 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1559 else:
1560 try:
1561 reason = check_filter()
1562 except DownloadCancelled as e:
1563 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1564 else:
1565 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1566 if reason is not None:
1567 if not silent:
1568 self.to_screen('[download] ' + reason)
1569 if self.params.get(break_opt, False):
1570 raise break_err()
1571 return reason
1573 @staticmethod
1574 def add_extra_info(info_dict, extra_info):
1575 """Set the keys from extra_info in info dict if they are missing"""
1576 for key, value in extra_info.items():
1577 info_dict.setdefault(key, value)
1579 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1580 process=True, force_generic_extractor=False):
1582 Extract and return the information dictionary of the URL
1584 Arguments:
1585 @param url URL to extract
1587 Keyword arguments:
1588 @param download Whether to download videos
1589 @param process Whether to resolve all unresolved references (URLs, playlist items).
1590 Must be True for download to work
1591 @param ie_key Use only the extractor with this key
1593 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1594 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1597 if extra_info is None:
1598 extra_info = {}
1600 if not ie_key and force_generic_extractor:
1601 ie_key = 'Generic'
1603 if ie_key:
1604 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1605 else:
1606 ies = self._ies
1608 for key, ie in ies.items():
1609 if not ie.suitable(url):
1610 continue
1612 if not ie.working():
1613 self.report_warning('The program functionality for this site has been marked as broken, '
1614 'and will probably not work.')
1616 temp_id = ie.get_temp_id(url)
1617 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1618 self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1619 'has already been recorded in the archive')
1620 if self.params.get('break_on_existing', False):
1621 raise ExistingVideoReached
1622 break
1623 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1624 else:
1625 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1626 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1627 tb=False if extractors_restricted else None)
1629 def _handle_extraction_exceptions(func):
1630 @functools.wraps(func)
1631 def wrapper(self, *args, **kwargs):
1632 while True:
1633 try:
1634 return func(self, *args, **kwargs)
1635 except (CookieLoadError, DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1636 raise
1637 except ReExtractInfo as e:
1638 if e.expected:
1639 self.to_screen(f'{e}; Re-extracting data')
1640 else:
1641 self.to_stderr('\r')
1642 self.report_warning(f'{e}; Re-extracting data')
1643 continue
1644 except GeoRestrictedError as e:
1645 msg = e.msg
1646 if e.countries:
1647 msg += '\nThis video is available in {}.'.format(', '.join(
1648 map(ISO3166Utils.short2full, e.countries)))
1649 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1650 self.report_error(msg)
1651 except ExtractorError as e: # An error we somewhat expected
1652 self.report_error(str(e), e.format_traceback())
1653 except Exception as e:
1654 if self.params.get('ignoreerrors'):
1655 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1656 else:
1657 raise
1658 break
1659 return wrapper
1661 def _wait_for_video(self, ie_result={}):
1662 if (not self.params.get('wait_for_video')
1663 or ie_result.get('_type', 'video') != 'video'
1664 or ie_result.get('formats') or ie_result.get('url')):
1665 return
1667 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1668 last_msg = ''
1670 def progress(msg):
1671 nonlocal last_msg
1672 full_msg = f'{msg}\n'
1673 if not self.params.get('noprogress'):
1674 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1675 elif last_msg:
1676 return
1677 self.to_screen(full_msg, skip_eol=True)
1678 last_msg = msg
1680 min_wait, max_wait = self.params.get('wait_for_video')
1681 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1682 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1683 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1684 self.report_warning('Release time of video is not known')
1685 elif ie_result and (diff or 0) <= 0:
1686 self.report_warning('Video should already be available according to extracted info')
1687 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1688 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1690 wait_till = time.time() + diff
1691 try:
1692 while True:
1693 diff = wait_till - time.time()
1694 if diff <= 0:
1695 progress('')
1696 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1697 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1698 time.sleep(1)
1699 except KeyboardInterrupt:
1700 progress('')
1701 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1702 except BaseException as e:
1703 if not isinstance(e, ReExtractInfo):
1704 self.to_screen('')
1705 raise
1707 def _load_cookies(self, data, *, autoscope=True):
1708 """Loads cookies from a `Cookie` header
1710 This tries to work around the security vulnerability of passing cookies to every domain.
1711 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1713 @param data The Cookie header as string to load the cookies from
1714 @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1715 If `True`, save cookies for later to be stored in the jar with a limited scope
1716 If a URL, save cookies in the jar with the domain of the URL
1718 for cookie in LenientSimpleCookie(data).values():
1719 if autoscope and any(cookie.values()):
1720 raise ValueError('Invalid syntax in Cookie Header')
1722 domain = cookie.get('domain') or ''
1723 expiry = cookie.get('expires')
1724 if expiry == '': # 0 is valid
1725 expiry = None
1726 prepared_cookie = http.cookiejar.Cookie(
1727 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1728 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1729 cookie.get('secure') or False, expiry, False, None, None, {})
1731 if domain:
1732 self.cookiejar.set_cookie(prepared_cookie)
1733 elif autoscope is True:
1734 self.deprecated_feature(
1735 'Passing cookies as a header is a potential security risk; '
1736 'they will be scoped to the domain of the downloaded urls. '
1737 'Please consider loading cookies from a file or browser instead.')
1738 self.__header_cookies.append(prepared_cookie)
1739 elif autoscope:
1740 self.report_warning(
1741 'The extractor result contains an unscoped cookie as an HTTP header. '
1742 f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1743 only_once=True)
1744 self._apply_header_cookies(autoscope, [prepared_cookie])
1745 else:
1746 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1747 tb=False, is_error=False)
1749 def _apply_header_cookies(self, url, cookies=None):
1750 """Applies stray header cookies to the provided url
1752 This loads header cookies and scopes them to the domain provided in `url`.
1753 While this is not ideal, it helps reduce the risk of them being sent
1754 to an unintended destination while mostly maintaining compatibility.
1756 parsed = urllib.parse.urlparse(url)
1757 if not parsed.hostname:
1758 return
1760 for cookie in map(copy.copy, cookies or self.__header_cookies):
1761 cookie.domain = f'.{parsed.hostname}'
1762 self.cookiejar.set_cookie(cookie)
1764 @_handle_extraction_exceptions
1765 def __extract_info(self, url, ie, download, extra_info, process):
1766 self._apply_header_cookies(url)
1768 try:
1769 ie_result = ie.extract(url)
1770 except UserNotLive as e:
1771 if process:
1772 if self.params.get('wait_for_video'):
1773 self.report_warning(e)
1774 self._wait_for_video()
1775 raise
1776 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1777 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1778 return
1779 if isinstance(ie_result, list):
1780 # Backwards compatibility: old IE result format
1781 ie_result = {
1782 '_type': 'compat_list',
1783 'entries': ie_result,
1785 if extra_info.get('original_url'):
1786 ie_result.setdefault('original_url', extra_info['original_url'])
1787 self.add_default_extra_info(ie_result, ie, url)
1788 if process:
1789 self._wait_for_video(ie_result)
1790 return self.process_ie_result(ie_result, download, extra_info)
1791 else:
1792 return ie_result
1794 def add_default_extra_info(self, ie_result, ie, url):
1795 if url is not None:
1796 self.add_extra_info(ie_result, {
1797 'webpage_url': url,
1798 'original_url': url,
1800 webpage_url = ie_result.get('webpage_url')
1801 if webpage_url:
1802 self.add_extra_info(ie_result, {
1803 'webpage_url_basename': url_basename(webpage_url),
1804 'webpage_url_domain': get_domain(webpage_url),
1806 if ie is not None:
1807 self.add_extra_info(ie_result, {
1808 'extractor': ie.IE_NAME,
1809 'extractor_key': ie.ie_key(),
1812 def process_ie_result(self, ie_result, download=True, extra_info=None):
1814 Take the result of the ie(may be modified) and resolve all unresolved
1815 references (URLs, playlist items).
1817 It will also download the videos if 'download'.
1818 Returns the resolved ie_result.
1820 if extra_info is None:
1821 extra_info = {}
1822 result_type = ie_result.get('_type', 'video')
1824 if result_type in ('url', 'url_transparent'):
1825 ie_result['url'] = sanitize_url(
1826 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1827 if ie_result.get('original_url') and not extra_info.get('original_url'):
1828 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1830 extract_flat = self.params.get('extract_flat', False)
1831 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1832 or extract_flat is True):
1833 info_copy = ie_result.copy()
1834 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1835 if ie and not ie_result.get('id'):
1836 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1837 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1838 self.add_extra_info(info_copy, extra_info)
1839 info_copy, _ = self.pre_process(info_copy)
1840 self._fill_common_fields(info_copy, False)
1841 self.__forced_printings(info_copy)
1842 self._raise_pending_errors(info_copy)
1843 if self.params.get('force_write_download_archive', False):
1844 self.record_download_archive(info_copy)
1845 return ie_result
1847 if result_type == 'video':
1848 self.add_extra_info(ie_result, extra_info)
1849 ie_result = self.process_video_result(ie_result, download=download)
1850 self._raise_pending_errors(ie_result)
1851 additional_urls = (ie_result or {}).get('additional_urls')
1852 if additional_urls:
1853 # TODO: Improve MetadataParserPP to allow setting a list
1854 if isinstance(additional_urls, str):
1855 additional_urls = [additional_urls]
1856 self.to_screen(
1857 '[info] {}: {} additional URL(s) requested'.format(ie_result['id'], len(additional_urls)))
1858 self.write_debug('Additional URLs: "{}"'.format('", "'.join(additional_urls)))
1859 ie_result['additional_entries'] = [
1860 self.extract_info(
1861 url, download, extra_info=extra_info,
1862 force_generic_extractor=self.params.get('force_generic_extractor'))
1863 for url in additional_urls
1865 return ie_result
1866 elif result_type == 'url':
1867 # We have to add extra_info to the results because it may be
1868 # contained in a playlist
1869 return self.extract_info(
1870 ie_result['url'], download,
1871 ie_key=ie_result.get('ie_key'),
1872 extra_info=extra_info)
1873 elif result_type == 'url_transparent':
1874 # Use the information from the embedding page
1875 info = self.extract_info(
1876 ie_result['url'], ie_key=ie_result.get('ie_key'),
1877 extra_info=extra_info, download=False, process=False)
1879 # extract_info may return None when ignoreerrors is enabled and
1880 # extraction failed with an error, don't crash and return early
1881 # in this case
1882 if not info:
1883 return info
1885 exempted_fields = {'_type', 'url', 'ie_key'}
1886 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1887 # For video clips, the id etc of the clip extractor should be used
1888 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1890 new_result = info.copy()
1891 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1893 # Extracted info may not be a video result (i.e.
1894 # info.get('_type', 'video') != video) but rather an url or
1895 # url_transparent. In such cases outer metadata (from ie_result)
1896 # should be propagated to inner one (info). For this to happen
1897 # _type of info should be overridden with url_transparent. This
1898 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1899 if new_result.get('_type') == 'url':
1900 new_result['_type'] = 'url_transparent'
1902 return self.process_ie_result(
1903 new_result, download=download, extra_info=extra_info)
1904 elif result_type in ('playlist', 'multi_video'):
1905 # Protect from infinite recursion due to recursively nested playlists
1906 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1907 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1908 if webpage_url and webpage_url in self._playlist_urls:
1909 self.to_screen(
1910 '[download] Skipping already downloaded playlist: {}'.format(
1911 ie_result.get('title')) or ie_result.get('id'))
1912 return
1914 self._playlist_level += 1
1915 self._playlist_urls.add(webpage_url)
1916 self._fill_common_fields(ie_result, False)
1917 self._sanitize_thumbnails(ie_result)
1918 try:
1919 return self.__process_playlist(ie_result, download)
1920 finally:
1921 self._playlist_level -= 1
1922 if not self._playlist_level:
1923 self._playlist_urls.clear()
1924 elif result_type == 'compat_list':
1925 self.report_warning(
1926 'Extractor {} returned a compat_list result. '
1927 'It needs to be updated.'.format(ie_result.get('extractor')))
1929 def _fixup(r):
1930 self.add_extra_info(r, {
1931 'extractor': ie_result['extractor'],
1932 'webpage_url': ie_result['webpage_url'],
1933 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1934 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1935 'extractor_key': ie_result['extractor_key'],
1937 return r
1938 ie_result['entries'] = [
1939 self.process_ie_result(_fixup(r), download, extra_info)
1940 for r in ie_result['entries']
1942 return ie_result
1943 else:
1944 raise Exception(f'Invalid result type: {result_type}')
1946 def _ensure_dir_exists(self, path):
1947 return make_dir(path, self.report_error)
1949 @staticmethod
1950 def _playlist_infodict(ie_result, strict=False, **kwargs):
1951 info = {
1952 'playlist_count': ie_result.get('playlist_count'),
1953 'playlist': ie_result.get('title') or ie_result.get('id'),
1954 'playlist_id': ie_result.get('id'),
1955 'playlist_title': ie_result.get('title'),
1956 'playlist_uploader': ie_result.get('uploader'),
1957 'playlist_uploader_id': ie_result.get('uploader_id'),
1958 'playlist_channel': ie_result.get('channel'),
1959 'playlist_channel_id': ie_result.get('channel_id'),
1960 'playlist_webpage_url': ie_result.get('webpage_url'),
1961 **kwargs,
1963 if strict:
1964 return info
1965 if ie_result.get('webpage_url'):
1966 info.update({
1967 'webpage_url': ie_result['webpage_url'],
1968 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1969 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1971 return {
1972 **info,
1973 'playlist_index': 0,
1974 '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1975 'extractor': ie_result['extractor'],
1976 'extractor_key': ie_result['extractor_key'],
1979 def __process_playlist(self, ie_result, download):
1980 """Process each entry in the playlist"""
1981 assert ie_result['_type'] in ('playlist', 'multi_video')
1983 common_info = self._playlist_infodict(ie_result, strict=True)
1984 title = common_info.get('playlist') or '<Untitled>'
1985 if self._match_entry(common_info, incomplete=True) is not None:
1986 return
1987 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1989 all_entries = PlaylistEntries(self, ie_result)
1990 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1992 lazy = self.params.get('lazy_playlist')
1993 if lazy:
1994 resolved_entries, n_entries = [], 'N/A'
1995 ie_result['requested_entries'], ie_result['entries'] = None, None
1996 else:
1997 entries = resolved_entries = list(entries)
1998 n_entries = len(resolved_entries)
1999 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
2000 if not ie_result.get('playlist_count'):
2001 # Better to do this after potentially exhausting entries
2002 ie_result['playlist_count'] = all_entries.get_full_count()
2004 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
2005 ie_copy = collections.ChainMap(ie_result, extra)
2007 _infojson_written = False
2008 write_playlist_files = self.params.get('allow_playlist_files', True)
2009 if write_playlist_files and self.params.get('list_thumbnails'):
2010 self.list_thumbnails(ie_result)
2011 if write_playlist_files and not self.params.get('simulate'):
2012 _infojson_written = self._write_info_json(
2013 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
2014 if _infojson_written is None:
2015 return
2016 if self._write_description('playlist', ie_result,
2017 self.prepare_filename(ie_copy, 'pl_description')) is None:
2018 return
2019 # TODO: This should be passed to ThumbnailsConvertor if necessary
2020 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
2022 if lazy:
2023 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
2024 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
2025 elif self.params.get('playlistreverse'):
2026 entries.reverse()
2027 elif self.params.get('playlistrandom'):
2028 random.shuffle(entries)
2030 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
2031 f'{format_field(ie_result, "playlist_count", " of %s")}')
2033 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
2034 if self.params.get('extract_flat') == 'discard_in_playlist':
2035 keep_resolved_entries = ie_result['_type'] != 'playlist'
2036 if keep_resolved_entries:
2037 self.write_debug('The information of all playlist entries will be held in memory')
2039 failures = 0
2040 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
2041 for i, (playlist_index, entry) in enumerate(entries):
2042 if lazy:
2043 resolved_entries.append((playlist_index, entry))
2044 if not entry:
2045 continue
2047 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
2048 if not lazy and 'playlist-index' in self.params['compat_opts']:
2049 playlist_index = ie_result['requested_entries'][i]
2051 entry_copy = collections.ChainMap(entry, {
2052 **common_info,
2053 'n_entries': int_or_none(n_entries),
2054 'playlist_index': playlist_index,
2055 'playlist_autonumber': i + 1,
2058 if self._match_entry(entry_copy, incomplete=True) is not None:
2059 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
2060 resolved_entries[i] = (playlist_index, NO_DEFAULT)
2061 continue
2063 self.to_screen(
2064 f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} '
2065 f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}')
2067 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
2068 'playlist_index': playlist_index,
2069 'playlist_autonumber': i + 1,
2070 }, extra))
2071 if not entry_result:
2072 failures += 1
2073 if failures >= max_failures:
2074 self.report_error(
2075 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2076 break
2077 if keep_resolved_entries:
2078 resolved_entries[i] = (playlist_index, entry_result)
2080 # Update with processed data
2081 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
2082 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2083 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2084 # Do not set for full playlist
2085 ie_result.pop('requested_entries')
2087 # Write the updated info to json
2088 if _infojson_written is True and self._write_info_json(
2089 'updated playlist', ie_result,
2090 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2091 return
2093 ie_result = self.run_all_pps('playlist', ie_result)
2094 self.to_screen(f'[download] Finished downloading playlist: {title}')
2095 return ie_result
2097 @_handle_extraction_exceptions
2098 def __process_iterable_entry(self, entry, download, extra_info):
2099 return self.process_ie_result(
2100 entry, download=download, extra_info=extra_info)
2102 def _build_format_filter(self, filter_spec):
2103 " Returns a function to filter the formats according to the filter_spec "
2105 OPERATORS = {
2106 '<': operator.lt,
2107 '<=': operator.le,
2108 '>': operator.gt,
2109 '>=': operator.ge,
2110 '=': operator.eq,
2111 '!=': operator.ne,
2113 operator_rex = re.compile(r'''(?x)\s*
2114 (?P<key>[\w.-]+)\s*
2115 (?P<op>{})(?P<none_inclusive>\s*\?)?\s*
2116 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2117 '''.format('|'.join(map(re.escape, OPERATORS.keys()))))
2118 m = operator_rex.fullmatch(filter_spec)
2119 if m:
2120 try:
2121 comparison_value = int(m.group('value'))
2122 except ValueError:
2123 comparison_value = parse_filesize(m.group('value'))
2124 if comparison_value is None:
2125 comparison_value = parse_filesize(m.group('value') + 'B')
2126 if comparison_value is None:
2127 raise ValueError(
2128 'Invalid value {!r} in format specification {!r}'.format(
2129 m.group('value'), filter_spec))
2130 op = OPERATORS[m.group('op')]
2132 if not m:
2133 STR_OPERATORS = {
2134 '=': operator.eq,
2135 '^=': lambda attr, value: attr.startswith(value),
2136 '$=': lambda attr, value: attr.endswith(value),
2137 '*=': lambda attr, value: value in attr,
2138 '~=': lambda attr, value: value.search(attr) is not None,
2140 str_operator_rex = re.compile(r'''(?x)\s*
2141 (?P<key>[a-zA-Z0-9._-]+)\s*
2142 (?P<negation>!\s*)?(?P<op>{})\s*(?P<none_inclusive>\?\s*)?
2143 (?P<quote>["'])?
2144 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2145 (?(quote)(?P=quote))\s*
2146 '''.format('|'.join(map(re.escape, STR_OPERATORS.keys()))))
2147 m = str_operator_rex.fullmatch(filter_spec)
2148 if m:
2149 if m.group('op') == '~=':
2150 comparison_value = re.compile(m.group('value'))
2151 else:
2152 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2153 str_op = STR_OPERATORS[m.group('op')]
2154 if m.group('negation'):
2155 op = lambda attr, value: not str_op(attr, value)
2156 else:
2157 op = str_op
2159 if not m:
2160 raise SyntaxError(f'Invalid filter specification {filter_spec!r}')
2162 def _filter(f):
2163 actual_value = f.get(m.group('key'))
2164 if actual_value is None:
2165 return m.group('none_inclusive')
2166 return op(actual_value, comparison_value)
2167 return _filter
2169 def _check_formats(self, formats):
2170 for f in formats:
2171 working = f.get('__working')
2172 if working is not None:
2173 if working:
2174 yield f
2175 continue
2176 self.to_screen('[info] Testing format {}'.format(f['format_id']))
2177 path = self.get_output_path('temp')
2178 if not self._ensure_dir_exists(f'{path}/'):
2179 continue
2180 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2181 temp_file.close()
2182 try:
2183 success, _ = self.dl(temp_file.name, f, test=True)
2184 except (DownloadError, OSError, ValueError, *network_exceptions):
2185 success = False
2186 finally:
2187 if os.path.exists(temp_file.name):
2188 try:
2189 os.remove(temp_file.name)
2190 except OSError:
2191 self.report_warning(f'Unable to delete temporary file "{temp_file.name}"')
2192 f['__working'] = success
2193 if success:
2194 yield f
2195 else:
2196 self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id']))
2198 def _select_formats(self, formats, selector):
2199 return list(selector({
2200 'formats': formats,
2201 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2202 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
2203 or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
2206 def _default_format_spec(self, info_dict):
2207 prefer_best = (
2208 self.params['outtmpl']['default'] == '-'
2209 or (info_dict.get('is_live') and not self.params.get('live_from_start')))
2211 def can_merge():
2212 merger = FFmpegMergerPP(self)
2213 return merger.available and merger.can_merge()
2215 if not prefer_best and not can_merge():
2216 prefer_best = True
2217 formats = self._get_formats(info_dict)
2218 evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
2219 if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
2220 self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
2221 'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')
2223 compat = (self.params.get('allow_multiple_audio_streams')
2224 or 'format-spec' in self.params['compat_opts'])
2226 return ('best/bestvideo+bestaudio' if prefer_best
2227 else 'bestvideo+bestaudio/best' if compat
2228 else 'bestvideo*+bestaudio/best')
2230 def build_format_selector(self, format_spec):
2231 def syntax_error(note, start):
2232 message = (
2233 'Invalid format specification: '
2234 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2235 return SyntaxError(message)
2237 PICKFIRST = 'PICKFIRST'
2238 MERGE = 'MERGE'
2239 SINGLE = 'SINGLE'
2240 GROUP = 'GROUP'
2241 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2243 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2244 'video': self.params.get('allow_multiple_video_streams', False)}
2246 def _parse_filter(tokens):
2247 filter_parts = []
2248 for type_, string_, _start, _, _ in tokens:
2249 if type_ == tokenize.OP and string_ == ']':
2250 return ''.join(filter_parts)
2251 else:
2252 filter_parts.append(string_)
2254 def _remove_unused_ops(tokens):
2255 # Remove operators that we don't use and join them with the surrounding strings.
2256 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2257 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2258 last_string, last_start, last_end, last_line = None, None, None, None
2259 for type_, string_, start, end, line in tokens:
2260 if type_ == tokenize.OP and string_ == '[':
2261 if last_string:
2262 yield tokenize.NAME, last_string, last_start, last_end, last_line
2263 last_string = None
2264 yield type_, string_, start, end, line
2265 # everything inside brackets will be handled by _parse_filter
2266 for type_, string_, start, end, line in tokens:
2267 yield type_, string_, start, end, line
2268 if type_ == tokenize.OP and string_ == ']':
2269 break
2270 elif type_ == tokenize.OP and string_ in ALLOWED_OPS:
2271 if last_string:
2272 yield tokenize.NAME, last_string, last_start, last_end, last_line
2273 last_string = None
2274 yield type_, string_, start, end, line
2275 elif type_ in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2276 if not last_string:
2277 last_string = string_
2278 last_start = start
2279 last_end = end
2280 else:
2281 last_string += string_
2282 if last_string:
2283 yield tokenize.NAME, last_string, last_start, last_end, last_line
2285 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2286 selectors = []
2287 current_selector = None
2288 for type_, string_, start, _, _ in tokens:
2289 # ENCODING is only defined in Python 3.x
2290 if type_ == getattr(tokenize, 'ENCODING', None):
2291 continue
2292 elif type_ in [tokenize.NAME, tokenize.NUMBER]:
2293 current_selector = FormatSelector(SINGLE, string_, [])
2294 elif type_ == tokenize.OP:
2295 if string_ == ')':
2296 if not inside_group:
2297 # ')' will be handled by the parentheses group
2298 tokens.restore_last_token()
2299 break
2300 elif inside_merge and string_ in ['/', ',']:
2301 tokens.restore_last_token()
2302 break
2303 elif inside_choice and string_ == ',':
2304 tokens.restore_last_token()
2305 break
2306 elif string_ == ',':
2307 if not current_selector:
2308 raise syntax_error('"," must follow a format selector', start)
2309 selectors.append(current_selector)
2310 current_selector = None
2311 elif string_ == '/':
2312 if not current_selector:
2313 raise syntax_error('"/" must follow a format selector', start)
2314 first_choice = current_selector
2315 second_choice = _parse_format_selection(tokens, inside_choice=True)
2316 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2317 elif string_ == '[':
2318 if not current_selector:
2319 current_selector = FormatSelector(SINGLE, 'best', [])
2320 format_filter = _parse_filter(tokens)
2321 current_selector.filters.append(format_filter)
2322 elif string_ == '(':
2323 if current_selector:
2324 raise syntax_error('Unexpected "("', start)
2325 group = _parse_format_selection(tokens, inside_group=True)
2326 current_selector = FormatSelector(GROUP, group, [])
2327 elif string_ == '+':
2328 if not current_selector:
2329 raise syntax_error('Unexpected "+"', start)
2330 selector_1 = current_selector
2331 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2332 if not selector_2:
2333 raise syntax_error('Expected a selector', start)
2334 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2335 else:
2336 raise syntax_error(f'Operator not recognized: "{string_}"', start)
2337 elif type_ == tokenize.ENDMARKER:
2338 break
2339 if current_selector:
2340 selectors.append(current_selector)
2341 return selectors
2343 def _merge(formats_pair):
2344 format_1, format_2 = formats_pair
2346 formats_info = []
2347 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2348 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2350 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2351 get_no_more = {'video': False, 'audio': False}
2352 for (i, fmt_info) in enumerate(formats_info):
2353 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2354 formats_info.pop(i)
2355 continue
2356 for aud_vid in ['audio', 'video']:
2357 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2358 if get_no_more[aud_vid]:
2359 formats_info.pop(i)
2360 break
2361 get_no_more[aud_vid] = True
2363 if len(formats_info) == 1:
2364 return formats_info[0]
2366 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2367 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2369 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2370 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2372 output_ext = get_compatible_ext(
2373 vcodecs=[f.get('vcodec') for f in video_fmts],
2374 acodecs=[f.get('acodec') for f in audio_fmts],
2375 vexts=[f['ext'] for f in video_fmts],
2376 aexts=[f['ext'] for f in audio_fmts],
2377 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2378 or (self.params.get('prefer_free_formats') and ('webm', 'mkv'))))
2380 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2382 new_dict = {
2383 'requested_formats': formats_info,
2384 'format': '+'.join(filtered('format')),
2385 'format_id': '+'.join(filtered('format_id')),
2386 'ext': output_ext,
2387 'protocol': '+'.join(map(determine_protocol, formats_info)),
2388 'language': '+'.join(orderedSet(filtered('language'))) or None,
2389 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2390 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2391 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2394 if the_only_video:
2395 new_dict.update({
2396 'width': the_only_video.get('width'),
2397 'height': the_only_video.get('height'),
2398 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2399 'fps': the_only_video.get('fps'),
2400 'dynamic_range': the_only_video.get('dynamic_range'),
2401 'vcodec': the_only_video.get('vcodec'),
2402 'vbr': the_only_video.get('vbr'),
2403 'stretched_ratio': the_only_video.get('stretched_ratio'),
2404 'aspect_ratio': the_only_video.get('aspect_ratio'),
2407 if the_only_audio:
2408 new_dict.update({
2409 'acodec': the_only_audio.get('acodec'),
2410 'abr': the_only_audio.get('abr'),
2411 'asr': the_only_audio.get('asr'),
2412 'audio_channels': the_only_audio.get('audio_channels'),
2415 return new_dict
2417 def _check_formats(formats):
2418 if self.params.get('check_formats') == 'selected':
2419 yield from self._check_formats(formats)
2420 return
2421 elif (self.params.get('check_formats') is not None
2422 or self.params.get('allow_unplayable_formats')):
2423 yield from formats
2424 return
2426 for f in formats:
2427 if f.get('has_drm') or f.get('__needs_testing'):
2428 yield from self._check_formats([f])
2429 else:
2430 yield f
2432 def _build_selector_function(selector):
2433 if isinstance(selector, list): # ,
2434 fs = [_build_selector_function(s) for s in selector]
2436 def selector_function(ctx):
2437 for f in fs:
2438 yield from f(ctx)
2439 return selector_function
2441 elif selector.type == GROUP: # ()
2442 selector_function = _build_selector_function(selector.selector)
2444 elif selector.type == PICKFIRST: # /
2445 fs = [_build_selector_function(s) for s in selector.selector]
2447 def selector_function(ctx):
2448 for f in fs:
2449 picked_formats = list(f(ctx))
2450 if picked_formats:
2451 return picked_formats
2452 return []
2454 elif selector.type == MERGE: # +
2455 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2457 def selector_function(ctx):
2458 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2459 yield _merge(pair)
2461 elif selector.type == SINGLE: # atom
2462 format_spec = selector.selector or 'best'
2464 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2465 if format_spec == 'all':
2466 def selector_function(ctx):
2467 yield from _check_formats(ctx['formats'][::-1])
2468 elif format_spec == 'mergeall':
2469 def selector_function(ctx):
2470 formats = list(_check_formats(
2471 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2472 if not formats:
2473 return
2474 merged_format = formats[-1]
2475 for f in formats[-2::-1]:
2476 merged_format = _merge((merged_format, f))
2477 yield merged_format
2479 else:
2480 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2481 mobj = re.match(
2482 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2483 format_spec)
2484 if mobj is not None:
2485 format_idx = int_or_none(mobj.group('n'), default=1)
2486 format_reverse = mobj.group('bw')[0] == 'b'
2487 format_type = (mobj.group('type') or [None])[0]
2488 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2489 format_modified = mobj.group('mod') is not None
2491 format_fallback = not format_type and not format_modified # for b, w
2492 _filter_f = (
2493 (lambda f: f.get(f'{format_type}codec') != 'none')
2494 if format_type and format_modified # bv*, ba*, wv*, wa*
2495 else (lambda f: f.get(f'{not_format_type}codec') == 'none')
2496 if format_type # bv, ba, wv, wa
2497 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2498 if not format_modified # b, w
2499 else lambda f: True) # b*, w*
2500 filter_f = lambda f: _filter_f(f) and (
2501 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2502 else:
2503 if format_spec in self._format_selection_exts['audio']:
2504 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2505 elif format_spec in self._format_selection_exts['video']:
2506 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2507 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2508 elif format_spec in self._format_selection_exts['storyboards']:
2509 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2510 else:
2511 filter_f = lambda f: f.get('format_id') == format_spec # id
2513 def selector_function(ctx):
2514 formats = list(ctx['formats'])
2515 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2516 if not matches:
2517 if format_fallback and ctx['incomplete_formats']:
2518 # for extractors with incomplete formats (audio only (soundcloud)
2519 # or video only (imgur)) best/worst will fallback to
2520 # best/worst {video,audio}-only format
2521 matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))
2522 elif seperate_fallback and not ctx['has_merged_format']:
2523 # for compatibility with youtube-dl when there is no pre-merged format
2524 matches = list(filter(seperate_fallback, formats))
2525 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2526 try:
2527 yield matches[format_idx - 1]
2528 except LazyList.IndexError:
2529 return
2531 filters = [self._build_format_filter(f) for f in selector.filters]
2533 def final_selector(ctx):
2534 ctx_copy = dict(ctx)
2535 for _filter in filters:
2536 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2537 return selector_function(ctx_copy)
2538 return final_selector
2540 # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
2541 # Prefix numbers with random letters to avoid it being classified as a number
2542 # See: https://github.com/yt-dlp/yt-dlp/pulls/8797
2543 # TODO: Implement parser not reliant on tokenize.tokenize
2544 prefix = ''.join(random.choices(string.ascii_letters, k=32))
2545 stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
2546 try:
2547 tokens = list(_remove_unused_ops(
2548 token._replace(string=token.string.replace(prefix, ''))
2549 for token in tokenize.tokenize(stream.readline)))
2550 except tokenize.TokenError:
2551 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2553 class TokenIterator:
2554 def __init__(self, tokens):
2555 self.tokens = tokens
2556 self.counter = 0
2558 def __iter__(self):
2559 return self
2561 def __next__(self):
2562 if self.counter >= len(self.tokens):
2563 raise StopIteration
2564 value = self.tokens[self.counter]
2565 self.counter += 1
2566 return value
2568 next = __next__
2570 def restore_last_token(self):
2571 self.counter -= 1
2573 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2574 return _build_selector_function(parsed_selector)
2576 def _calc_headers(self, info_dict, load_cookies=False):
2577 res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
2578 clean_headers(res)
2580 if load_cookies: # For --load-info-json
2581 self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat
2582 self._load_cookies(info_dict.get('cookies'), autoscope=False)
2583 # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2584 # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2585 res.pop('Cookie', None)
2586 cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2587 if cookies:
2588 encoder = LenientSimpleCookie()
2589 values = []
2590 for cookie in cookies:
2591 _, value = encoder.value_encode(cookie.value)
2592 values.append(f'{cookie.name}={value}')
2593 if cookie.domain:
2594 values.append(f'Domain={cookie.domain}')
2595 if cookie.path:
2596 values.append(f'Path={cookie.path}')
2597 if cookie.secure:
2598 values.append('Secure')
2599 if cookie.expires:
2600 values.append(f'Expires={cookie.expires}')
2601 if cookie.version:
2602 values.append(f'Version={cookie.version}')
2603 info_dict['cookies'] = '; '.join(values)
2605 if 'X-Forwarded-For' not in res:
2606 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2607 if x_forwarded_for_ip:
2608 res['X-Forwarded-For'] = x_forwarded_for_ip
2610 return res
2612 def _calc_cookies(self, url):
2613 self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2614 return self.cookiejar.get_cookie_header(url)
2616 def _sort_thumbnails(self, thumbnails):
2617 thumbnails.sort(key=lambda t: (
2618 t.get('preference') if t.get('preference') is not None else -1,
2619 t.get('width') if t.get('width') is not None else -1,
2620 t.get('height') if t.get('height') is not None else -1,
2621 t.get('id') if t.get('id') is not None else '',
2622 t.get('url')))
2624 def _sanitize_thumbnails(self, info_dict):
2625 thumbnails = info_dict.get('thumbnails')
2626 if thumbnails is None:
2627 thumbnail = info_dict.get('thumbnail')
2628 if thumbnail:
2629 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2630 if not thumbnails:
2631 return
2633 def check_thumbnails(thumbnails):
2634 for t in thumbnails:
2635 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2636 try:
2637 self.urlopen(HEADRequest(t['url']))
2638 except network_exceptions as err:
2639 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2640 continue
2641 yield t
2643 self._sort_thumbnails(thumbnails)
2644 for i, t in enumerate(thumbnails):
2645 if t.get('id') is None:
2646 t['id'] = str(i)
2647 if t.get('width') and t.get('height'):
2648 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2649 t['url'] = sanitize_url(t['url'])
2651 if self.params.get('check_formats') is True:
2652 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2653 else:
2654 info_dict['thumbnails'] = thumbnails
2656 def _fill_common_fields(self, info_dict, final=True):
2657 # TODO: move sanitization here
2658 if final:
2659 title = info_dict['fulltitle'] = info_dict.get('title')
2660 if not title:
2661 if title == '':
2662 self.write_debug('Extractor gave empty title. Creating a generic title')
2663 else:
2664 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2665 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2667 if info_dict.get('duration') is not None:
2668 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2670 for ts_key, date_key in (
2671 ('timestamp', 'upload_date'),
2672 ('release_timestamp', 'release_date'),
2673 ('modified_timestamp', 'modified_date'),
2675 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2676 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2677 # see http://bugs.python.org/issue1646728)
2678 with contextlib.suppress(ValueError, OverflowError, OSError):
2679 upload_date = dt.datetime.fromtimestamp(info_dict[ts_key], dt.timezone.utc)
2680 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2682 if not info_dict.get('release_year'):
2683 info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))
2685 live_keys = ('is_live', 'was_live')
2686 live_status = info_dict.get('live_status')
2687 if live_status is None:
2688 for key in live_keys:
2689 if info_dict.get(key) is False:
2690 continue
2691 if info_dict.get(key):
2692 live_status = key
2693 break
2694 if all(info_dict.get(key) is False for key in live_keys):
2695 live_status = 'not_live'
2696 if live_status:
2697 info_dict['live_status'] = live_status
2698 for key in live_keys:
2699 if info_dict.get(key) is None:
2700 info_dict[key] = (live_status == key)
2701 if live_status == 'post_live':
2702 info_dict['was_live'] = True
2704 # Auto generate title fields corresponding to the *_number fields when missing
2705 # in order to always have clean titles. This is very common for TV series.
2706 for field in ('chapter', 'season', 'episode'):
2707 if final and info_dict.get(f'{field}_number') is not None and not info_dict.get(field):
2708 info_dict[field] = '%s %d' % (field.capitalize(), info_dict[f'{field}_number'])
2710 for old_key, new_key in self._deprecated_multivalue_fields.items():
2711 if new_key in info_dict and old_key in info_dict:
2712 if '_version' not in info_dict: # HACK: Do not warn when using --load-info-json
2713 self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
2714 elif old_value := info_dict.get(old_key):
2715 info_dict[new_key] = old_value.split(', ')
2716 elif new_value := info_dict.get(new_key):
2717 info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
2719 def _raise_pending_errors(self, info):
2720 err = info.pop('__pending_error', None)
2721 if err:
2722 self.report_error(err, tb=False)
2724 def sort_formats(self, info_dict):
2725 formats = self._get_formats(info_dict)
2726 formats.sort(key=FormatSorter(
2727 self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2729 def process_video_result(self, info_dict, download=True):
2730 assert info_dict.get('_type', 'video') == 'video'
2731 self._num_videos += 1
2733 if 'id' not in info_dict:
2734 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2735 elif not info_dict.get('id'):
2736 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2738 def report_force_conversion(field, field_not, conversion):
2739 self.report_warning(
2740 f'"{field}" field is not {field_not} - forcing {conversion} conversion, '
2741 'there is an error in extractor')
2743 def sanitize_string_field(info, string_field):
2744 field = info.get(string_field)
2745 if field is None or isinstance(field, str):
2746 return
2747 report_force_conversion(string_field, 'a string', 'string')
2748 info[string_field] = str(field)
2750 def sanitize_numeric_fields(info):
2751 for numeric_field in self._NUMERIC_FIELDS:
2752 field = info.get(numeric_field)
2753 if field is None or isinstance(field, (int, float)):
2754 continue
2755 report_force_conversion(numeric_field, 'numeric', 'int')
2756 info[numeric_field] = int_or_none(field)
2758 sanitize_string_field(info_dict, 'id')
2759 sanitize_numeric_fields(info_dict)
2760 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2761 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2762 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2763 self.report_warning('"duration" field is negative, there is an error in extractor')
2765 chapters = info_dict.get('chapters') or []
2766 if chapters and chapters[0].get('start_time'):
2767 chapters.insert(0, {'start_time': 0})
2769 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2770 for idx, (prev, current, next_) in enumerate(zip(
2771 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2772 if current.get('start_time') is None:
2773 current['start_time'] = prev.get('end_time')
2774 if not current.get('end_time'):
2775 current['end_time'] = next_.get('start_time')
2776 if not current.get('title'):
2777 current['title'] = f'<Untitled Chapter {idx}>'
2779 if 'playlist' not in info_dict:
2780 # It isn't part of a playlist
2781 info_dict['playlist'] = None
2782 info_dict['playlist_index'] = None
2784 self._sanitize_thumbnails(info_dict)
2786 thumbnail = info_dict.get('thumbnail')
2787 thumbnails = info_dict.get('thumbnails')
2788 if thumbnail:
2789 info_dict['thumbnail'] = sanitize_url(thumbnail)
2790 elif thumbnails:
2791 info_dict['thumbnail'] = thumbnails[-1]['url']
2793 if info_dict.get('display_id') is None and 'id' in info_dict:
2794 info_dict['display_id'] = info_dict['id']
2796 self._fill_common_fields(info_dict)
2798 for cc_kind in ('subtitles', 'automatic_captions'):
2799 cc = info_dict.get(cc_kind)
2800 if cc:
2801 for _, subtitle in cc.items():
2802 for subtitle_format in subtitle:
2803 if subtitle_format.get('url'):
2804 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2805 if subtitle_format.get('ext') is None:
2806 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2808 automatic_captions = info_dict.get('automatic_captions')
2809 subtitles = info_dict.get('subtitles')
2811 info_dict['requested_subtitles'] = self.process_subtitles(
2812 info_dict['id'], subtitles, automatic_captions)
2814 formats = self._get_formats(info_dict)
2816 # Backward compatibility with InfoExtractor._sort_formats
2817 field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2818 if field_preference:
2819 info_dict['_format_sort_fields'] = field_preference
2821 info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
2822 f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2823 if not self.params.get('allow_unplayable_formats'):
2824 formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2826 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2827 self.report_warning(
2828 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2829 'only images are available for download. Use --list-formats to see them'.capitalize())
2831 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2832 if not get_from_start:
2833 info_dict['title'] += ' ' + dt.datetime.now().strftime('%Y-%m-%d %H:%M')
2834 if info_dict.get('is_live') and formats:
2835 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2836 if get_from_start and not formats:
2837 self.raise_no_formats(info_dict, msg=(
2838 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2839 'If you want to download from the current time, use --no-live-from-start'))
2841 def is_wellformed(f):
2842 url = f.get('url')
2843 if not url:
2844 self.report_warning(
2845 '"url" field is missing or empty - skipping format, '
2846 'there is an error in extractor')
2847 return False
2848 if isinstance(url, bytes):
2849 sanitize_string_field(f, 'url')
2850 return True
2852 # Filter out malformed formats for better extraction robustness
2853 formats = list(filter(is_wellformed, formats or []))
2855 if not formats:
2856 self.raise_no_formats(info_dict)
2858 for fmt in formats:
2859 sanitize_string_field(fmt, 'format_id')
2860 sanitize_numeric_fields(fmt)
2861 fmt['url'] = sanitize_url(fmt['url'])
2862 FormatSorter._fill_sorting_fields(fmt)
2863 if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
2864 if fmt.get('acodec') is None:
2865 fmt['acodec'] = fmt['ext']
2866 if fmt.get('resolution') is None:
2867 fmt['resolution'] = self.format_resolution(fmt, default=None)
2868 if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none':
2869 fmt['dynamic_range'] = 'SDR'
2870 if fmt.get('aspect_ratio') is None:
2871 fmt['aspect_ratio'] = try_call(lambda: round(fmt['width'] / fmt['height'], 2))
2872 # For fragmented formats, "tbr" is often max bitrate and not average
2873 if (('manifest-filesize-approx' in self.params['compat_opts'] or not fmt.get('manifest_url'))
2874 and not fmt.get('filesize') and not fmt.get('filesize_approx')):
2875 fmt['filesize_approx'] = filesize_from_tbr(fmt.get('tbr'), info_dict.get('duration'))
2876 fmt['http_headers'] = self._calc_headers(collections.ChainMap(fmt, info_dict), load_cookies=True)
2878 # Safeguard against old/insecure infojson when using --load-info-json
2879 if info_dict.get('http_headers'):
2880 info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
2881 info_dict['http_headers'].pop('Cookie', None)
2883 # This is copied to http_headers by the above _calc_headers and can now be removed
2884 if '__x_forwarded_for_ip' in info_dict:
2885 del info_dict['__x_forwarded_for_ip']
2887 self.sort_formats({
2888 'formats': formats,
2889 '_format_sort_fields': info_dict.get('_format_sort_fields'),
2892 # Sanitize and group by format_id
2893 formats_dict = {}
2894 for i, fmt in enumerate(formats):
2895 if not fmt.get('format_id'):
2896 fmt['format_id'] = str(i)
2897 else:
2898 # Sanitize format_id from characters used in format selector expression
2899 fmt['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', fmt['format_id'])
2900 formats_dict.setdefault(fmt['format_id'], []).append(fmt)
2902 # Make sure all formats have unique format_id
2903 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2904 for format_id, ambiguous_formats in formats_dict.items():
2905 ambigious_id = len(ambiguous_formats) > 1
2906 for i, fmt in enumerate(ambiguous_formats):
2907 if ambigious_id:
2908 fmt['format_id'] = f'{format_id}-{i}'
2909 # Ensure there is no conflict between id and ext in format selection
2910 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2911 if fmt['format_id'] != fmt['ext'] and fmt['format_id'] in common_exts:
2912 fmt['format_id'] = 'f{}'.format(fmt['format_id'])
2914 if fmt.get('format') is None:
2915 fmt['format'] = '{id} - {res}{note}'.format(
2916 id=fmt['format_id'],
2917 res=self.format_resolution(fmt),
2918 note=format_field(fmt, 'format_note', ' (%s)'),
2921 if self.params.get('check_formats') is True:
2922 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2924 if not formats or formats[0] is not info_dict:
2925 # only set the 'formats' fields if the original info_dict list them
2926 # otherwise we end up with a circular reference, the first (and unique)
2927 # element in the 'formats' field in info_dict is info_dict itself,
2928 # which can't be exported to json
2929 info_dict['formats'] = formats
2931 info_dict, _ = self.pre_process(info_dict)
2933 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2934 return info_dict
2936 self.post_extract(info_dict)
2937 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2939 # The pre-processors may have modified the formats
2940 formats = self._get_formats(info_dict)
2942 list_only = self.params.get('simulate') == 'list_only'
2943 interactive_format_selection = not list_only and self.format_selector == '-'
2944 if self.params.get('list_thumbnails'):
2945 self.list_thumbnails(info_dict)
2946 if self.params.get('listsubtitles'):
2947 if 'automatic_captions' in info_dict:
2948 self.list_subtitles(
2949 info_dict['id'], automatic_captions, 'automatic captions')
2950 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2951 if self.params.get('listformats') or interactive_format_selection:
2952 self.list_formats(info_dict)
2953 if list_only:
2954 # Without this printing, -F --print-json will not work
2955 self.__forced_printings(info_dict)
2956 return info_dict
2958 format_selector = self.format_selector
2959 while True:
2960 if interactive_format_selection:
2961 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2962 + '(Press ENTER for default, or Ctrl+C to quit)'
2963 + self._format_screen(': ', self.Styles.EMPHASIS))
2964 try:
2965 format_selector = self.build_format_selector(req_format) if req_format else None
2966 except SyntaxError as err:
2967 self.report_error(err, tb=False, is_error=False)
2968 continue
2970 if format_selector is None:
2971 req_format = self._default_format_spec(info_dict)
2972 self.write_debug(f'Default format spec: {req_format}')
2973 format_selector = self.build_format_selector(req_format)
2975 formats_to_download = self._select_formats(formats, format_selector)
2976 if interactive_format_selection and not formats_to_download:
2977 self.report_error('Requested format is not available', tb=False, is_error=False)
2978 continue
2979 break
2981 if not formats_to_download:
2982 if not self.params.get('ignore_no_formats_error'):
2983 raise ExtractorError(
2984 'Requested format is not available. Use --list-formats for a list of available formats',
2985 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2986 self.report_warning('Requested format is not available')
2987 # Process what we can, even without any available formats.
2988 formats_to_download = [{}]
2990 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2991 best_format, downloaded_formats = formats_to_download[-1], []
2992 if download:
2993 if best_format and requested_ranges:
2994 def to_screen(*msg):
2995 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2997 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2998 (f['format_id'] for f in formats_to_download))
2999 if requested_ranges != ({}, ):
3000 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
3001 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
3002 max_downloads_reached = False
3004 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
3005 new_info = self._copy_infodict(info_dict)
3006 new_info.update(fmt)
3007 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
3008 end_time = offset + min(chapter.get('end_time', duration), duration)
3009 # duration may not be accurate. So allow deviations <1sec
3010 if end_time == float('inf') or end_time > offset + duration + 1:
3011 end_time = None
3012 if chapter or offset:
3013 new_info.update({
3014 'section_start': offset + chapter.get('start_time', 0),
3015 'section_end': end_time,
3016 'section_title': chapter.get('title'),
3017 'section_number': chapter.get('index'),
3019 downloaded_formats.append(new_info)
3020 try:
3021 self.process_info(new_info)
3022 except MaxDownloadsReached:
3023 max_downloads_reached = True
3024 self._raise_pending_errors(new_info)
3025 # Remove copied info
3026 for key, val in tuple(new_info.items()):
3027 if info_dict.get(key) == val:
3028 new_info.pop(key)
3029 if max_downloads_reached:
3030 break
3032 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
3033 assert write_archive.issubset({True, False, 'ignore'})
3034 if True in write_archive and False not in write_archive:
3035 self.record_download_archive(info_dict)
3037 info_dict['requested_downloads'] = downloaded_formats
3038 info_dict = self.run_all_pps('after_video', info_dict)
3039 if max_downloads_reached:
3040 raise MaxDownloadsReached
3042 # We update the info dict with the selected best quality format (backwards compatibility)
3043 info_dict.update(best_format)
3044 return info_dict
3046 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
3047 """Select the requested subtitles and their format"""
3048 available_subs, normal_sub_langs = {}, []
3049 if normal_subtitles and self.params.get('writesubtitles'):
3050 available_subs.update(normal_subtitles)
3051 normal_sub_langs = tuple(normal_subtitles.keys())
3052 if automatic_captions and self.params.get('writeautomaticsub'):
3053 for lang, cap_info in automatic_captions.items():
3054 if lang not in available_subs:
3055 available_subs[lang] = cap_info
3057 if not available_subs or (
3058 not self.params.get('writesubtitles')
3059 and not self.params.get('writeautomaticsub')):
3060 return None
3062 all_sub_langs = tuple(available_subs.keys())
3063 if self.params.get('allsubtitles', False):
3064 requested_langs = all_sub_langs
3065 elif self.params.get('subtitleslangs', False):
3066 try:
3067 requested_langs = orderedSet_from_options(
3068 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
3069 except re.error as e:
3070 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
3071 else:
3072 requested_langs = LazyList(itertools.chain(
3073 ['en'] if 'en' in normal_sub_langs else [],
3074 filter(lambda f: f.startswith('en'), normal_sub_langs),
3075 ['en'] if 'en' in all_sub_langs else [],
3076 filter(lambda f: f.startswith('en'), all_sub_langs),
3077 normal_sub_langs, all_sub_langs,
3078 ))[:1]
3079 if requested_langs:
3080 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
3082 formats_query = self.params.get('subtitlesformat', 'best')
3083 formats_preference = formats_query.split('/') if formats_query else []
3084 subs = {}
3085 for lang in requested_langs:
3086 formats = available_subs.get(lang)
3087 if formats is None:
3088 self.report_warning(f'{lang} subtitles not available for {video_id}')
3089 continue
3090 for ext in formats_preference:
3091 if ext == 'best':
3092 f = formats[-1]
3093 break
3094 matches = list(filter(lambda f: f['ext'] == ext, formats))
3095 if matches:
3096 f = matches[-1]
3097 break
3098 else:
3099 f = formats[-1]
3100 self.report_warning(
3101 'No subtitle format found matching "{}" for language {}, '
3102 'using {}. Use --list-subs for a list of available subtitles'.format(formats_query, lang, f['ext']))
3103 subs[lang] = f
3104 return subs
3106 def _forceprint(self, key, info_dict):
3107 if info_dict is None:
3108 return
3109 info_copy = info_dict.copy()
3110 info_copy.setdefault('filename', self.prepare_filename(info_dict))
3111 if info_dict.get('requested_formats') is not None:
3112 # For RTMP URLs, also include the playpath
3113 info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3114 elif info_dict.get('url'):
3115 info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
3116 info_copy['formats_table'] = self.render_formats_table(info_dict)
3117 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3118 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3119 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3121 def format_tmpl(tmpl):
3122 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
3123 if not mobj:
3124 return tmpl
3126 fmt = '%({})s'
3127 if tmpl.startswith('{'):
3128 tmpl, fmt = f'.{tmpl}', '%({})j'
3129 if tmpl.endswith('='):
3130 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3131 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3133 for tmpl in self.params['forceprint'].get(key, []):
3134 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3136 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3137 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3138 tmpl = format_tmpl(tmpl)
3139 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3140 if self._ensure_dir_exists(filename):
3141 with open(filename, 'a', encoding='utf-8', newline='') as f:
3142 f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3144 return info_copy
3146 def __forced_printings(self, info_dict, filename=None, incomplete=True):
3147 if (self.params.get('forcejson')
3148 or self.params['forceprint'].get('video')
3149 or self.params['print_to_file'].get('video')):
3150 self.post_extract(info_dict)
3151 if filename:
3152 info_dict['filename'] = filename
3153 info_copy = self._forceprint('video', info_dict)
3155 def print_field(field, actual_field=None, optional=False):
3156 if actual_field is None:
3157 actual_field = field
3158 if self.params.get(f'force{field}') and (
3159 info_copy.get(field) is not None or (not optional and not incomplete)):
3160 self.to_stdout(info_copy[actual_field])
3162 print_field('title')
3163 print_field('id')
3164 print_field('url', 'urls')
3165 print_field('thumbnail', optional=True)
3166 print_field('description', optional=True)
3167 print_field('filename')
3168 if self.params.get('forceduration') and info_copy.get('duration') is not None:
3169 self.to_stdout(formatSeconds(info_copy['duration']))
3170 print_field('format')
3172 if self.params.get('forcejson'):
3173 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3175 def dl(self, name, info, subtitle=False, test=False):
3176 if not info.get('url'):
3177 self.raise_no_formats(info, True)
3179 if test:
3180 verbose = self.params.get('verbose')
3181 quiet = self.params.get('quiet') or not verbose
3182 params = {
3183 'test': True,
3184 'quiet': quiet,
3185 'verbose': verbose,
3186 'noprogress': quiet,
3187 'nopart': True,
3188 'skip_unavailable_fragments': False,
3189 'keep_fragments': False,
3190 'overwrites': True,
3191 '_no_ytdl_file': True,
3193 else:
3194 params = self.params
3195 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3196 if not test:
3197 for ph in self._progress_hooks:
3198 fd.add_progress_hook(ph)
3199 urls = '", "'.join(
3200 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3201 for f in info.get('requested_formats', []) or [info])
3202 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3204 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3205 # But it may contain objects that are not deep-copyable
3206 new_info = self._copy_infodict(info)
3207 if new_info.get('http_headers') is None:
3208 new_info['http_headers'] = self._calc_headers(new_info)
3209 return fd.download(name, new_info, subtitle)
3211 def existing_file(self, filepaths, *, default_overwrite=True):
3212 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3213 if existing_files and not self.params.get('overwrites', default_overwrite):
3214 return existing_files[0]
3216 for file in existing_files:
3217 self.report_file_delete(file)
3218 os.remove(file)
3219 return None
3221 @_catch_unsafe_extension_error
3222 def process_info(self, info_dict):
3223 """Process a single resolved IE result. (Modifies it in-place)"""
3225 assert info_dict.get('_type', 'video') == 'video'
3226 original_infodict = info_dict
3228 if 'format' not in info_dict and 'ext' in info_dict:
3229 info_dict['format'] = info_dict['ext']
3231 if self._match_entry(info_dict) is not None:
3232 info_dict['__write_download_archive'] = 'ignore'
3233 return
3235 # Does nothing under normal operation - for backward compatibility of process_info
3236 self.post_extract(info_dict)
3238 def replace_info_dict(new_info):
3239 nonlocal info_dict
3240 if new_info == info_dict:
3241 return
3242 info_dict.clear()
3243 info_dict.update(new_info)
3245 new_info, _ = self.pre_process(info_dict, 'video')
3246 replace_info_dict(new_info)
3247 self._num_downloads += 1
3249 # info_dict['_filename'] needs to be set for backward compatibility
3250 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3251 temp_filename = self.prepare_filename(info_dict, 'temp')
3252 files_to_move = {}
3254 # Forced printings
3255 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3257 def check_max_downloads():
3258 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3259 raise MaxDownloadsReached
3261 if self.params.get('simulate'):
3262 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3263 check_max_downloads()
3264 return
3266 if full_filename is None:
3267 return
3268 if not self._ensure_dir_exists(full_filename):
3269 return
3270 if not self._ensure_dir_exists(temp_filename):
3271 return
3273 if self._write_description('video', info_dict,
3274 self.prepare_filename(info_dict, 'description')) is None:
3275 return
3277 sub_files = self._write_subtitles(info_dict, temp_filename)
3278 if sub_files is None:
3279 return
3280 files_to_move.update(dict(sub_files))
3282 thumb_files = self._write_thumbnails(
3283 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3284 if thumb_files is None:
3285 return
3286 files_to_move.update(dict(thumb_files))
3288 infofn = self.prepare_filename(info_dict, 'infojson')
3289 _infojson_written = self._write_info_json('video', info_dict, infofn)
3290 if _infojson_written:
3291 info_dict['infojson_filename'] = infofn
3292 # For backward compatibility, even though it was a private field
3293 info_dict['__infojson_filename'] = infofn
3294 elif _infojson_written is None:
3295 return
3297 # Note: Annotations are deprecated
3298 annofn = None
3299 if self.params.get('writeannotations', False):
3300 annofn = self.prepare_filename(info_dict, 'annotation')
3301 if annofn:
3302 if not self._ensure_dir_exists(annofn):
3303 return
3304 if not self.params.get('overwrites', True) and os.path.exists(annofn):
3305 self.to_screen('[info] Video annotations are already present')
3306 elif not info_dict.get('annotations'):
3307 self.report_warning('There are no annotations to write.')
3308 else:
3309 try:
3310 self.to_screen('[info] Writing video annotations to: ' + annofn)
3311 with open(annofn, 'w', encoding='utf-8') as annofile:
3312 annofile.write(info_dict['annotations'])
3313 except (KeyError, TypeError):
3314 self.report_warning('There are no annotations to write.')
3315 except OSError:
3316 self.report_error('Cannot write annotations file: ' + annofn)
3317 return
3319 # Write internet shortcut files
3320 def _write_link_file(link_type):
3321 url = try_get(info_dict['webpage_url'], iri_to_uri)
3322 if not url:
3323 self.report_warning(
3324 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3325 return True
3326 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3327 if not self._ensure_dir_exists(linkfn):
3328 return False
3329 if self.params.get('overwrites', True) and os.path.exists(linkfn):
3330 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3331 return True
3332 try:
3333 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3334 with open(to_high_limit_path(linkfn), 'w', encoding='utf-8',
3335 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3336 template_vars = {'url': url}
3337 if link_type == 'desktop':
3338 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3339 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3340 except OSError:
3341 self.report_error(f'Cannot write internet shortcut {linkfn}')
3342 return False
3343 return True
3345 write_links = {
3346 'url': self.params.get('writeurllink'),
3347 'webloc': self.params.get('writewebloclink'),
3348 'desktop': self.params.get('writedesktoplink'),
3350 if self.params.get('writelink'):
3351 link_type = ('webloc' if sys.platform == 'darwin'
3352 else 'desktop' if sys.platform.startswith('linux')
3353 else 'url')
3354 write_links[link_type] = True
3356 if any(should_write and not _write_link_file(link_type)
3357 for link_type, should_write in write_links.items()):
3358 return
3360 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3361 replace_info_dict(new_info)
3363 if self.params.get('skip_download'):
3364 info_dict['filepath'] = temp_filename
3365 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename))
3366 info_dict['__files_to_move'] = files_to_move
3367 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3368 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3369 else:
3370 # Download
3371 info_dict.setdefault('__postprocessors', [])
3372 try:
3374 def existing_video_file(*filepaths):
3375 ext = info_dict.get('ext')
3376 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3377 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3378 default_overwrite=False)
3379 if file:
3380 info_dict['ext'] = os.path.splitext(file)[1][1:]
3381 return file
3383 fd, success = None, True
3384 if info_dict.get('protocol') or info_dict.get('url'):
3385 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3386 if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3387 info_dict.get('section_start') or info_dict.get('section_end')):
3388 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3389 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3390 self.report_error(f'{msg}. Aborting')
3391 return
3393 if info_dict.get('requested_formats') is not None:
3394 old_ext = info_dict['ext']
3395 if self.params.get('merge_output_format') is None:
3396 if (info_dict['ext'] == 'webm'
3397 and info_dict.get('thumbnails')
3398 # check with type instead of pp_key, __name__, or isinstance
3399 # since we dont want any custom PPs to trigger this
3400 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
3401 info_dict['ext'] = 'mkv'
3402 self.report_warning(
3403 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3404 new_ext = info_dict['ext']
3406 def correct_ext(filename, ext=new_ext):
3407 if filename == '-':
3408 return filename
3409 filename_real_ext = os.path.splitext(filename)[1][1:]
3410 filename_wo_ext = (
3411 os.path.splitext(filename)[0]
3412 if filename_real_ext in (old_ext, new_ext)
3413 else filename)
3414 return f'{filename_wo_ext}.{ext}'
3416 # Ensure filename always has a correct extension for successful merge
3417 full_filename = correct_ext(full_filename)
3418 temp_filename = correct_ext(temp_filename)
3419 dl_filename = existing_video_file(full_filename, temp_filename)
3421 info_dict['__real_download'] = False
3422 # NOTE: Copy so that original format dicts are not modified
3423 info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3425 merger = FFmpegMergerPP(self)
3426 downloaded = []
3427 if dl_filename is not None:
3428 self.report_file_already_downloaded(dl_filename)
3429 elif fd:
3430 for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3431 f['filepath'] = fname = prepend_extension(
3432 correct_ext(temp_filename, info_dict['ext']),
3433 'f{}'.format(f['format_id']), info_dict['ext'])
3434 downloaded.append(fname)
3435 info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3436 success, real_download = self.dl(temp_filename, info_dict)
3437 info_dict['__real_download'] = real_download
3438 else:
3439 if self.params.get('allow_unplayable_formats'):
3440 self.report_warning(
3441 'You have requested merging of multiple formats '
3442 'while also allowing unplayable formats to be downloaded. '
3443 'The formats won\'t be merged to prevent data corruption.')
3444 elif not merger.available:
3445 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3446 if not self.params.get('ignoreerrors'):
3447 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3448 return
3449 self.report_warning(f'{msg}. The formats won\'t be merged')
3451 if temp_filename == '-':
3452 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3453 else 'but the formats are incompatible for simultaneous download' if merger.available
3454 else 'but ffmpeg is not installed')
3455 self.report_warning(
3456 f'You have requested downloading multiple formats to stdout {reason}. '
3457 'The formats will be streamed one after the other')
3458 fname = temp_filename
3459 for f in info_dict['requested_formats']:
3460 new_info = dict(info_dict)
3461 del new_info['requested_formats']
3462 new_info.update(f)
3463 if temp_filename != '-':
3464 fname = prepend_extension(
3465 correct_ext(temp_filename, new_info['ext']),
3466 'f{}'.format(f['format_id']), new_info['ext'])
3467 if not self._ensure_dir_exists(fname):
3468 return
3469 f['filepath'] = fname
3470 downloaded.append(fname)
3471 partial_success, real_download = self.dl(fname, new_info)
3472 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3473 success = success and partial_success
3475 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3476 info_dict['__postprocessors'].append(merger)
3477 info_dict['__files_to_merge'] = downloaded
3478 # Even if there were no downloads, it is being merged only now
3479 info_dict['__real_download'] = True
3480 else:
3481 for file in downloaded:
3482 files_to_move[file] = None
3483 else:
3484 # Just a single file
3485 dl_filename = existing_video_file(full_filename, temp_filename)
3486 if dl_filename is None or dl_filename == temp_filename:
3487 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3488 # So we should try to resume the download
3489 success, real_download = self.dl(temp_filename, info_dict)
3490 info_dict['__real_download'] = real_download
3491 else:
3492 self.report_file_already_downloaded(dl_filename)
3494 dl_filename = dl_filename or temp_filename
3495 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename))
3497 except network_exceptions as err:
3498 self.report_error(f'unable to download video data: {err}')
3499 return
3500 except OSError as err:
3501 raise UnavailableVideoError(err)
3502 except ContentTooShortError as err:
3503 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3504 return
3506 self._raise_pending_errors(info_dict)
3507 if success and full_filename != '-':
3509 def fixup():
3510 do_fixup = True
3511 fixup_policy = self.params.get('fixup')
3512 vid = info_dict['id']
3514 if fixup_policy in ('ignore', 'never'):
3515 return
3516 elif fixup_policy == 'warn':
3517 do_fixup = 'warn'
3518 elif fixup_policy != 'force':
3519 assert fixup_policy in ('detect_or_warn', None)
3520 if not info_dict.get('__real_download'):
3521 do_fixup = False
3523 def ffmpeg_fixup(cndn, msg, cls):
3524 if not (do_fixup and cndn):
3525 return
3526 elif do_fixup == 'warn':
3527 self.report_warning(f'{vid}: {msg}')
3528 return
3529 pp = cls(self)
3530 if pp.available:
3531 info_dict['__postprocessors'].append(pp)
3532 else:
3533 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3535 stretched_ratio = info_dict.get('stretched_ratio')
3536 ffmpeg_fixup(stretched_ratio not in (1, None),
3537 f'Non-uniform pixel ratio {stretched_ratio}',
3538 FFmpegFixupStretchedPP)
3540 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3541 downloader = downloader.FD_NAME if downloader else None
3543 ext = info_dict.get('ext')
3544 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3545 isinstance(pp, FFmpegVideoConvertorPP)
3546 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3547 ) for pp in self._pps['post_process'])
3549 if not postprocessed_by_ffmpeg:
3550 ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
3551 and info_dict.get('container') == 'm4a_dash',
3552 'writing DASH m4a. Only some players support this container',
3553 FFmpegFixupM4aPP)
3554 ffmpeg_fixup((downloader == 'hlsnative' and not self.params.get('hls_use_mpegts'))
3555 or (info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None),
3556 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3557 FFmpegFixupM3u8PP)
3558 ffmpeg_fixup(downloader == 'dashsegments'
3559 and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
3560 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3562 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3563 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3565 fixup()
3566 try:
3567 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3568 except PostProcessingError as err:
3569 self.report_error(f'Postprocessing: {err}')
3570 return
3571 try:
3572 for ph in self._post_hooks:
3573 ph(info_dict['filepath'])
3574 except Exception as err:
3575 self.report_error(f'post hooks: {err}')
3576 return
3577 info_dict['__write_download_archive'] = True
3579 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
3580 if self.params.get('force_write_download_archive'):
3581 info_dict['__write_download_archive'] = True
3582 check_max_downloads()
3584 def __download_wrapper(self, func):
3585 @functools.wraps(func)
3586 def wrapper(*args, **kwargs):
3587 try:
3588 res = func(*args, **kwargs)
3589 except CookieLoadError:
3590 raise
3591 except UnavailableVideoError as e:
3592 self.report_error(e)
3593 except DownloadCancelled as e:
3594 self.to_screen(f'[info] {e}')
3595 if not self.params.get('break_per_url'):
3596 raise
3597 self._num_downloads = 0
3598 else:
3599 if self.params.get('dump_single_json', False):
3600 self.post_extract(res)
3601 self.to_stdout(json.dumps(self.sanitize_info(res)))
3602 return wrapper
3604 def download(self, url_list):
3605 """Download a given list of URLs."""
3606 url_list = variadic(url_list) # Passing a single URL is a common mistake
3607 outtmpl = self.params['outtmpl']['default']
3608 if (len(url_list) > 1
3609 and outtmpl != '-'
3610 and '%' not in outtmpl
3611 and self.params.get('max_downloads') != 1):
3612 raise SameFileError(outtmpl)
3614 for url in url_list:
3615 self.__download_wrapper(self.extract_info)(
3616 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3618 return self._download_retcode
3620 def download_with_info_file(self, info_filename):
3621 with contextlib.closing(fileinput.FileInput(
3622 [info_filename], mode='r',
3623 openhook=fileinput.hook_encoded('utf-8'))) as f:
3624 # FileInput doesn't have a read method, we can't call json.load
3625 infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3626 for info in variadic(json.loads('\n'.join(f)))]
3627 for info in infos:
3628 try:
3629 self.__download_wrapper(self.process_ie_result)(info, download=True)
3630 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3631 if not isinstance(e, EntryNotInPlaylist):
3632 self.to_stderr('\r')
3633 webpage_url = info.get('webpage_url')
3634 if webpage_url is None:
3635 raise
3636 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3637 self.download([webpage_url])
3638 except ExtractorError as e:
3639 self.report_error(e)
3640 return self._download_retcode
3642 @staticmethod
3643 def sanitize_info(info_dict, remove_private_keys=False):
3644 """ Sanitize the infodict for converting to json """
3645 if info_dict is None:
3646 return info_dict
3647 info_dict.setdefault('epoch', int(time.time()))
3648 info_dict.setdefault('_type', 'video')
3649 info_dict.setdefault('_version', {
3650 'version': __version__,
3651 'current_git_head': current_git_head(),
3652 'release_git_head': RELEASE_GIT_HEAD,
3653 'repository': ORIGIN,
3656 if remove_private_keys:
3657 reject = lambda k, v: v is None or k.startswith('__') or k in {
3658 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3659 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3660 'playlist_autonumber',
3662 else:
3663 reject = lambda k, v: False
3665 def filter_fn(obj):
3666 if isinstance(obj, dict):
3667 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3668 elif isinstance(obj, (list, tuple, set, LazyList)):
3669 return list(map(filter_fn, obj))
3670 elif obj is None or isinstance(obj, (str, int, float, bool)):
3671 return obj
3672 else:
3673 return repr(obj)
3675 return filter_fn(info_dict)
3677 @staticmethod
3678 def filter_requested_info(info_dict, actually_filter=True):
3679 """ Alias of sanitize_info for backward compatibility """
3680 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3682 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3683 for filename in set(filter(None, files_to_delete)):
3684 if msg:
3685 self.to_screen(msg % filename)
3686 try:
3687 os.remove(filename)
3688 except OSError:
3689 self.report_warning(f'Unable to delete file {filename}')
3690 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3691 del info['__files_to_move'][filename]
3693 @staticmethod
3694 def post_extract(info_dict):
3695 def actual_post_extract(info_dict):
3696 if info_dict.get('_type') in ('playlist', 'multi_video'):
3697 for video_dict in info_dict.get('entries', {}):
3698 actual_post_extract(video_dict or {})
3699 return
3701 post_extractor = info_dict.pop('__post_extractor', None) or dict
3702 info_dict.update(post_extractor())
3704 actual_post_extract(info_dict or {})
3706 def run_pp(self, pp, infodict):
3707 files_to_delete = []
3708 if '__files_to_move' not in infodict:
3709 infodict['__files_to_move'] = {}
3710 try:
3711 files_to_delete, infodict = pp.run(infodict)
3712 except PostProcessingError as e:
3713 # Must be True and not 'only_download'
3714 if self.params.get('ignoreerrors') is True:
3715 self.report_error(e)
3716 return infodict
3717 raise
3719 if not files_to_delete:
3720 return infodict
3721 if self.params.get('keepvideo', False):
3722 for f in files_to_delete:
3723 infodict['__files_to_move'].setdefault(f, '')
3724 else:
3725 self._delete_downloaded_files(
3726 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3727 return infodict
3729 def run_all_pps(self, key, info, *, additional_pps=None):
3730 if key != 'video':
3731 self._forceprint(key, info)
3732 for pp in (additional_pps or []) + self._pps[key]:
3733 info = self.run_pp(pp, info)
3734 return info
3736 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3737 info = dict(ie_info)
3738 info['__files_to_move'] = files_to_move or {}
3739 try:
3740 info = self.run_all_pps(key, info)
3741 except PostProcessingError as err:
3742 msg = f'Preprocessing: {err}'
3743 info.setdefault('__pending_error', msg)
3744 self.report_error(msg, is_error=False)
3745 return info, info.pop('__files_to_move', None)
3747 def post_process(self, filename, info, files_to_move=None):
3748 """Run all the postprocessors on the given file."""
3749 info['filepath'] = filename
3750 info['__files_to_move'] = files_to_move or {}
3751 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3752 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3753 del info['__files_to_move']
3754 return self.run_all_pps('after_move', info)
3756 def _make_archive_id(self, info_dict):
3757 video_id = info_dict.get('id')
3758 if not video_id:
3759 return
3760 # Future-proof against any change in case
3761 # and backwards compatibility with prior versions
3762 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3763 if extractor is None:
3764 url = str_or_none(info_dict.get('url'))
3765 if not url:
3766 return
3767 # Try to find matching extractor for the URL and take its ie_key
3768 for ie_key, ie in self._ies.items():
3769 if ie.suitable(url):
3770 extractor = ie_key
3771 break
3772 else:
3773 return
3774 return make_archive_id(extractor, video_id)
3776 def in_download_archive(self, info_dict):
3777 if not self.archive:
3778 return False
3780 vid_ids = [self._make_archive_id(info_dict)]
3781 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3782 return any(id_ in self.archive for id_ in vid_ids)
3784 def record_download_archive(self, info_dict):
3785 fn = self.params.get('download_archive')
3786 if fn is None:
3787 return
3788 vid_id = self._make_archive_id(info_dict)
3789 assert vid_id
3791 self.write_debug(f'Adding to archive: {vid_id}')
3792 if is_path_like(fn):
3793 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3794 archive_file.write(vid_id + '\n')
3795 self.archive.add(vid_id)
3797 @staticmethod
3798 def format_resolution(format, default='unknown'):
3799 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3800 return 'audio only'
3801 if format.get('resolution') is not None:
3802 return format['resolution']
3803 if format.get('width') and format.get('height'):
3804 return '%dx%d' % (format['width'], format['height'])
3805 elif format.get('height'):
3806 return '{}p'.format(format['height'])
3807 elif format.get('width'):
3808 return '%dx?' % format['width']
3809 return default
3811 def _list_format_headers(self, *headers):
3812 if self.params.get('listformats_table', True) is not False:
3813 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3814 return headers
3816 def _format_note(self, fdict):
3817 res = ''
3818 if fdict.get('ext') in ['f4f', 'f4m']:
3819 res += '(unsupported)'
3820 if fdict.get('language'):
3821 if res:
3822 res += ' '
3823 res += '[{}]'.format(fdict['language'])
3824 if fdict.get('format_note') is not None:
3825 if res:
3826 res += ' '
3827 res += fdict['format_note']
3828 if fdict.get('tbr') is not None:
3829 if res:
3830 res += ', '
3831 res += '%4dk' % fdict['tbr']
3832 if fdict.get('container') is not None:
3833 if res:
3834 res += ', '
3835 res += '{} container'.format(fdict['container'])
3836 if (fdict.get('vcodec') is not None
3837 and fdict.get('vcodec') != 'none'):
3838 if res:
3839 res += ', '
3840 res += fdict['vcodec']
3841 if fdict.get('vbr') is not None:
3842 res += '@'
3843 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3844 res += 'video@'
3845 if fdict.get('vbr') is not None:
3846 res += '%4dk' % fdict['vbr']
3847 if fdict.get('fps') is not None:
3848 if res:
3849 res += ', '
3850 res += '{}fps'.format(fdict['fps'])
3851 if fdict.get('acodec') is not None:
3852 if res:
3853 res += ', '
3854 if fdict['acodec'] == 'none':
3855 res += 'video only'
3856 else:
3857 res += '%-5s' % fdict['acodec']
3858 elif fdict.get('abr') is not None:
3859 if res:
3860 res += ', '
3861 res += 'audio'
3862 if fdict.get('abr') is not None:
3863 res += '@%3dk' % fdict['abr']
3864 if fdict.get('asr') is not None:
3865 res += ' (%5dHz)' % fdict['asr']
3866 if fdict.get('filesize') is not None:
3867 if res:
3868 res += ', '
3869 res += format_bytes(fdict['filesize'])
3870 elif fdict.get('filesize_approx') is not None:
3871 if res:
3872 res += ', '
3873 res += '~' + format_bytes(fdict['filesize_approx'])
3874 return res
3876 def _get_formats(self, info_dict):
3877 if info_dict.get('formats') is None:
3878 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3879 return [info_dict]
3880 return []
3881 return info_dict['formats']
3883 def render_formats_table(self, info_dict):
3884 formats = self._get_formats(info_dict)
3885 if not formats:
3886 return
3887 if not self.params.get('listformats_table', True) is not False:
3888 table = [
3890 format_field(f, 'format_id'),
3891 format_field(f, 'ext'),
3892 self.format_resolution(f),
3893 self._format_note(f),
3894 ] for f in formats if (f.get('preference') or 0) >= -1000]
3895 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3897 def simplified_codec(f, field):
3898 assert field in ('acodec', 'vcodec')
3899 codec = f.get(field)
3900 if not codec:
3901 return 'unknown'
3902 elif codec != 'none':
3903 return '.'.join(codec.split('.')[:4])
3905 if field == 'vcodec' and f.get('acodec') == 'none':
3906 return 'images'
3907 elif field == 'acodec' and f.get('vcodec') == 'none':
3908 return ''
3909 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3910 self.Styles.SUPPRESS)
3912 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3913 table = [
3915 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3916 format_field(f, 'ext'),
3917 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3918 format_field(f, 'fps', '\t%d', func=round),
3919 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3920 format_field(f, 'audio_channels', '\t%s'),
3921 delim, (
3922 format_field(f, 'filesize', ' \t%s', func=format_bytes)
3923 or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3924 or format_field(filesize_from_tbr(f.get('tbr'), info_dict.get('duration')), None,
3925 self._format_out('~\t%s', self.Styles.SUPPRESS), func=format_bytes)),
3926 format_field(f, 'tbr', '\t%dk', func=round),
3927 shorten_protocol_name(f.get('protocol', '')),
3928 delim,
3929 simplified_codec(f, 'vcodec'),
3930 format_field(f, 'vbr', '\t%dk', func=round),
3931 simplified_codec(f, 'acodec'),
3932 format_field(f, 'abr', '\t%dk', func=round),
3933 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3934 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3935 self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3936 (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3937 else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3938 format_field(f, 'format_note'),
3939 format_field(f, 'container', ignore=(None, f.get('ext'))),
3940 delim=', '), delim=' '),
3941 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3942 header_line = self._list_format_headers(
3943 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3944 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3946 return render_table(
3947 header_line, table, hide_empty=True,
3948 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3950 def render_thumbnails_table(self, info_dict):
3951 thumbnails = list(info_dict.get('thumbnails') or [])
3952 if not thumbnails:
3953 return None
3954 return render_table(
3955 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3956 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3958 def render_subtitles_table(self, video_id, subtitles):
3959 def _row(lang, formats):
3960 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3961 if len(set(names)) == 1:
3962 names = [] if names[0] == 'unknown' else names[:1]
3963 return [lang, ', '.join(names), ', '.join(exts)]
3965 if not subtitles:
3966 return None
3967 return render_table(
3968 self._list_format_headers('Language', 'Name', 'Formats'),
3969 [_row(lang, formats) for lang, formats in subtitles.items()],
3970 hide_empty=True)
3972 def __list_table(self, video_id, name, func, *args):
3973 table = func(*args)
3974 if not table:
3975 self.to_screen(f'{video_id} has no {name}')
3976 return
3977 self.to_screen(f'[info] Available {name} for {video_id}:')
3978 self.to_stdout(table)
3980 def list_formats(self, info_dict):
3981 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3983 def list_thumbnails(self, info_dict):
3984 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3986 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3987 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3989 def print_debug_header(self):
3990 if not self.params.get('verbose'):
3991 return
3993 from . import _IN_CLI # Must be delayed import
3995 # These imports can be slow. So import them only as needed
3996 from .extractor.extractors import _LAZY_LOADER
3997 from .extractor.extractors import (
3998 _PLUGIN_CLASSES as plugin_ies,
3999 _PLUGIN_OVERRIDES as plugin_ie_overrides,
4002 def get_encoding(stream):
4003 ret = str(getattr(stream, 'encoding', f'missing ({type(stream).__name__})'))
4004 additional_info = []
4005 if os.environ.get('TERM', '').lower() == 'dumb':
4006 additional_info.append('dumb')
4007 if not supports_terminal_sequences(stream):
4008 from .utils import WINDOWS_VT_MODE # Must be imported locally
4009 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
4010 if additional_info:
4011 ret = f'{ret} ({",".join(additional_info)})'
4012 return ret
4014 encoding_str = 'Encodings: locale {}, fs {}, pref {}, {}'.format(
4015 locale.getpreferredencoding(),
4016 sys.getfilesystemencoding(),
4017 self.get_encoding(),
4018 ', '.join(
4019 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
4020 if stream is not None and key != 'console'),
4023 logger = self.params.get('logger')
4024 if logger:
4025 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
4026 write_debug(encoding_str)
4027 else:
4028 write_string(f'[debug] {encoding_str}\n', encoding=None)
4029 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
4031 source = detect_variant()
4032 if VARIANT not in (None, 'pip'):
4033 source += '*'
4034 klass = type(self)
4035 write_debug(join_nonempty(
4036 f'{REPOSITORY.rpartition("/")[2]} version',
4037 _make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),
4038 f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
4039 '' if source == 'unknown' else f'({source})',
4040 '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
4041 delim=' '))
4043 if not _IN_CLI:
4044 write_debug(f'params: {self.params}')
4046 if not _LAZY_LOADER:
4047 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
4048 write_debug('Lazy loading extractors is forcibly disabled')
4049 else:
4050 write_debug('Lazy loading extractors is disabled')
4051 if self.params['compat_opts']:
4052 write_debug('Compatibility options: {}'.format(', '.join(self.params['compat_opts'])))
4054 if current_git_head():
4055 write_debug(f'Git HEAD: {current_git_head()}')
4056 write_debug(system_identifier())
4058 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
4059 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
4060 if ffmpeg_features:
4061 exe_versions['ffmpeg'] += ' ({})'.format(','.join(sorted(ffmpeg_features)))
4063 exe_versions['rtmpdump'] = rtmpdump_version()
4064 exe_versions['phantomjs'] = PhantomJSwrapper._version()
4065 exe_str = ', '.join(
4066 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
4067 ) or 'none'
4068 write_debug(f'exe versions: {exe_str}')
4070 from .compat.compat_utils import get_package_info
4071 from .dependencies import available_dependencies
4073 write_debug('Optional libraries: %s' % (', '.join(sorted({
4074 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
4075 })) or 'none'))
4077 write_debug(f'Proxy map: {self.proxies}')
4078 write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
4079 if os.environ.get('YTDLP_NO_PLUGINS'):
4080 write_debug('Plugins are forcibly disabled')
4081 return
4083 for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
4084 display_list = ['{}{}'.format(
4085 klass.__name__, '' if klass.__name__ == name else f' as {name}')
4086 for name, klass in plugins.items()]
4087 if plugin_type == 'Extractor':
4088 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
4089 for parent, plugins in plugin_ie_overrides.items())
4090 if not display_list:
4091 continue
4092 write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
4094 plugin_dirs = plugin_directories()
4095 if plugin_dirs:
4096 write_debug(f'Plugin directories: {plugin_dirs}')
4098 @functools.cached_property
4099 def proxies(self):
4100 """Global proxy configuration"""
4101 opts_proxy = self.params.get('proxy')
4102 if opts_proxy is not None:
4103 if opts_proxy == '':
4104 opts_proxy = '__noproxy__'
4105 proxies = {'all': opts_proxy}
4106 else:
4107 proxies = urllib.request.getproxies()
4108 # compat. Set HTTPS_PROXY to __noproxy__ to revert
4109 if 'http' in proxies and 'https' not in proxies:
4110 proxies['https'] = proxies['http']
4112 return proxies
4114 @functools.cached_property
4115 def cookiejar(self):
4116 """Global cookiejar instance"""
4117 try:
4118 return load_cookies(
4119 self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4120 except CookieLoadError as error:
4121 cause = error.__context__
4122 # compat: <=py3.9: `traceback.format_exception` has a different signature
4123 self.report_error(str(cause), tb=''.join(traceback.format_exception(None, cause, cause.__traceback__)))
4124 raise
4126 @property
4127 def _opener(self):
4129 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4131 self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
4132 handler = self._request_director.handlers['Urllib']
4133 return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4135 def _get_available_impersonate_targets(self):
4136 # TODO(future): make available as public API
4137 return [
4138 (target, rh.RH_NAME)
4139 for rh in self._request_director.handlers.values()
4140 if isinstance(rh, ImpersonateRequestHandler)
4141 for target in rh.supported_targets
4144 def _impersonate_target_available(self, target):
4145 # TODO(future): make available as public API
4146 return any(
4147 rh.is_supported_target(target)
4148 for rh in self._request_director.handlers.values()
4149 if isinstance(rh, ImpersonateRequestHandler))
4151 def urlopen(self, req):
4152 """ Start an HTTP download """
4153 if isinstance(req, str):
4154 req = Request(req)
4155 elif isinstance(req, urllib.request.Request):
4156 self.deprecation_warning(
4157 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4158 'Use yt_dlp.networking.common.Request instead.')
4159 req = urllib_req_to_req(req)
4160 assert isinstance(req, Request)
4162 # compat: Assume user:pass url params are basic auth
4163 url, basic_auth_header = extract_basic_auth(req.url)
4164 if basic_auth_header:
4165 req.headers['Authorization'] = basic_auth_header
4166 req.url = sanitize_url(url)
4168 clean_proxies(proxies=req.proxies, headers=req.headers)
4169 clean_headers(req.headers)
4171 try:
4172 return self._request_director.send(req)
4173 except NoSupportingHandlers as e:
4174 for ue in e.unsupported_errors:
4175 # FIXME: This depends on the order of errors.
4176 if not (ue.handler and ue.msg):
4177 continue
4178 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4179 raise RequestError(
4180 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4181 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
4182 if (
4183 'unsupported proxy type: "https"' in ue.msg.lower()
4184 and 'requests' not in self._request_director.handlers
4185 and 'curl_cffi' not in self._request_director.handlers
4187 raise RequestError(
4188 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests, curl_cffi')
4190 elif (
4191 re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
4192 and 'websockets' not in self._request_director.handlers
4194 raise RequestError(
4195 'This request requires WebSocket support. '
4196 'Ensure one of the following dependencies are installed: websockets',
4197 cause=ue) from ue
4199 elif re.match(r'unsupported (?:extensions: impersonate|impersonate target)', ue.msg.lower()):
4200 raise RequestError(
4201 f'Impersonate target "{req.extensions["impersonate"]}" is not available.'
4202 f' See --list-impersonate-targets for available targets.'
4203 f' This request requires browser impersonation, however you may be missing dependencies'
4204 f' required to support this target.')
4205 raise
4206 except SSLError as e:
4207 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4208 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4209 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4210 raise RequestError(
4211 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4212 'Try using --legacy-server-connect', cause=e) from e
4213 raise
4215 def build_request_director(self, handlers, preferences=None):
4216 logger = _YDLLogger(self)
4217 headers = self.params['http_headers'].copy()
4218 proxies = self.proxies.copy()
4219 clean_headers(headers)
4220 clean_proxies(proxies, headers)
4222 director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4223 for handler in handlers:
4224 director.add_handler(handler(
4225 logger=logger,
4226 headers=headers,
4227 cookiejar=self.cookiejar,
4228 proxies=proxies,
4229 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4230 verify=not self.params.get('nocheckcertificate'),
4231 **traverse_obj(self.params, {
4232 'verbose': 'debug_printtraffic',
4233 'source_address': 'source_address',
4234 'timeout': 'socket_timeout',
4235 'legacy_ssl_support': 'legacyserverconnect',
4236 'enable_file_urls': 'enable_file_urls',
4237 'impersonate': 'impersonate',
4238 'client_cert': {
4239 'client_certificate': 'client_certificate',
4240 'client_certificate_key': 'client_certificate_key',
4241 'client_certificate_password': 'client_certificate_password',
4245 director.preferences.update(preferences or [])
4246 if 'prefer-legacy-http-handler' in self.params['compat_opts']:
4247 director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
4248 return director
4250 @functools.cached_property
4251 def _request_director(self):
4252 return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
4254 def encode(self, s):
4255 if isinstance(s, bytes):
4256 return s # Already encoded
4258 try:
4259 return s.encode(self.get_encoding())
4260 except UnicodeEncodeError as err:
4261 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4262 raise
4264 def get_encoding(self):
4265 encoding = self.params.get('encoding')
4266 if encoding is None:
4267 encoding = preferredencoding()
4268 return encoding
4270 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4271 """ Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error """
4272 if overwrite is None:
4273 overwrite = self.params.get('overwrites', True)
4274 if not self.params.get('writeinfojson'):
4275 return False
4276 elif not infofn:
4277 self.write_debug(f'Skipping writing {label} infojson')
4278 return False
4279 elif not self._ensure_dir_exists(infofn):
4280 return None
4281 elif not overwrite and os.path.exists(infofn):
4282 self.to_screen(f'[info] {label.title()} metadata is already present')
4283 return 'exists'
4285 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4286 try:
4287 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4288 return True
4289 except OSError:
4290 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4291 return None
4293 def _write_description(self, label, ie_result, descfn):
4294 """ Write description and returns True = written, False = skip, None = error """
4295 if not self.params.get('writedescription'):
4296 return False
4297 elif not descfn:
4298 self.write_debug(f'Skipping writing {label} description')
4299 return False
4300 elif not self._ensure_dir_exists(descfn):
4301 return None
4302 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4303 self.to_screen(f'[info] {label.title()} description is already present')
4304 elif ie_result.get('description') is None:
4305 self.to_screen(f'[info] There\'s no {label} description to write')
4306 return False
4307 else:
4308 try:
4309 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4310 with open(descfn, 'w', encoding='utf-8') as descfile:
4311 descfile.write(ie_result['description'])
4312 except OSError:
4313 self.report_error(f'Cannot write {label} description file {descfn}')
4314 return None
4315 return True
4317 def _write_subtitles(self, info_dict, filename):
4318 """ Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error"""
4319 ret = []
4320 subtitles = info_dict.get('requested_subtitles')
4321 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4322 # subtitles download errors are already managed as troubles in relevant IE
4323 # that way it will silently go on when used with unsupporting IE
4324 return ret
4325 elif not subtitles:
4326 self.to_screen('[info] There are no subtitles for the requested languages')
4327 return ret
4328 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4329 if not sub_filename_base:
4330 self.to_screen('[info] Skipping writing video subtitles')
4331 return ret
4333 for sub_lang, sub_info in subtitles.items():
4334 sub_format = sub_info['ext']
4335 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4336 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4337 existing_sub = self.existing_file((sub_filename_final, sub_filename))
4338 if existing_sub:
4339 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4340 sub_info['filepath'] = existing_sub
4341 ret.append((existing_sub, sub_filename_final))
4342 continue
4344 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4345 if sub_info.get('data') is not None:
4346 try:
4347 # Use newline='' to prevent conversion of newline characters
4348 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4349 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4350 subfile.write(sub_info['data'])
4351 sub_info['filepath'] = sub_filename
4352 ret.append((sub_filename, sub_filename_final))
4353 continue
4354 except OSError:
4355 self.report_error(f'Cannot write video subtitles file {sub_filename}')
4356 return None
4358 try:
4359 sub_copy = sub_info.copy()
4360 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4361 self.dl(sub_filename, sub_copy, subtitle=True)
4362 sub_info['filepath'] = sub_filename
4363 ret.append((sub_filename, sub_filename_final))
4364 except (DownloadError, ExtractorError, OSError, ValueError, *network_exceptions) as err:
4365 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4366 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
4367 if not self.params.get('ignoreerrors'):
4368 self.report_error(msg)
4369 raise DownloadError(msg)
4370 self.report_warning(msg)
4371 return ret
4373 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4374 """ Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error """
4375 write_all = self.params.get('write_all_thumbnails', False)
4376 thumbnails, ret = [], []
4377 if write_all or self.params.get('writethumbnail', False):
4378 thumbnails = info_dict.get('thumbnails') or []
4379 if not thumbnails:
4380 self.to_screen(f'[info] There are no {label} thumbnails to download')
4381 return ret
4382 multiple = write_all and len(thumbnails) > 1
4384 if thumb_filename_base is None:
4385 thumb_filename_base = filename
4386 if thumbnails and not thumb_filename_base:
4387 self.write_debug(f'Skipping writing {label} thumbnail')
4388 return ret
4390 if thumbnails and not self._ensure_dir_exists(filename):
4391 return None
4393 for idx, t in list(enumerate(thumbnails))[::-1]:
4394 thumb_ext = t.get('ext') or determine_ext(t['url'], 'jpg')
4395 if multiple:
4396 thumb_ext = f'{t["id"]}.{thumb_ext}'
4397 thumb_display_id = f'{label} thumbnail {t["id"]}'
4398 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4399 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4401 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4402 if existing_thumb:
4403 self.to_screen('[info] {} is already present'.format((
4404 thumb_display_id if multiple else f'{label} thumbnail').capitalize()))
4405 t['filepath'] = existing_thumb
4406 ret.append((existing_thumb, thumb_filename_final))
4407 else:
4408 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4409 try:
4410 uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
4411 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4412 with open(thumb_filename, 'wb') as thumbf:
4413 shutil.copyfileobj(uf, thumbf)
4414 ret.append((thumb_filename, thumb_filename_final))
4415 t['filepath'] = thumb_filename
4416 except network_exceptions as err:
4417 if isinstance(err, HTTPError) and err.status == 404:
4418 self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4419 else:
4420 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4421 thumbnails.pop(idx)
4422 if ret and not write_all:
4423 break
4424 return ret