27 from .cache
import Cache
28 from .compat
import urllib
# isort: split
29 from .compat
import urllib_req_to_req
30 from .cookies
import CookieLoadError
, LenientSimpleCookie
, load_cookies
31 from .downloader
import FFmpegFD
, get_suitable_downloader
, shorten_protocol_name
32 from .downloader
.rtmp
import rtmpdump_version
33 from .extractor
import gen_extractor_classes
, get_info_extractor
34 from .extractor
.common
import UnsupportedURLIE
35 from .extractor
.openload
import PhantomJSwrapper
36 from .minicurses
import format_text
37 from .networking
import HEADRequest
, Request
, RequestDirector
38 from .networking
.common
import _REQUEST_HANDLERS
, _RH_PREFERENCES
39 from .networking
.exceptions
import (
46 from .networking
.impersonate
import ImpersonateRequestHandler
47 from .plugins
import directories
as plugin_directories
48 from .postprocessor
import _PLUGIN_CLASSES
as plugin_pps
49 from .postprocessor
import (
51 FFmpegFixupDuplicateMoovPP
,
52 FFmpegFixupDurationPP
,
55 FFmpegFixupStretchedPP
,
56 FFmpegFixupTimestampPP
,
59 FFmpegVideoConvertorPP
,
60 MoveFilesAfterDownloadPP
,
63 from .postprocessor
.ffmpeg
import resolve_mapping
as resolve_recode_mapping
66 _get_system_deprecation
,
100 RejectedVideoReached
,
102 UnavailableVideoError
,
118 format_decimal_suffix
,
132 orderedSet_from_options
,
136 remove_terminal_sequences
,
146 supports_terminal_sequences
,
156 windows_enable_vt_mode
,
160 from .utils
._utils
import _UnsafeExtensionError
, _YDLLogger
161 from .utils
.networking
import (
167 from .version
import CHANNEL
, ORIGIN
, RELEASE_GIT_HEAD
, VARIANT
, __version__
173 def _catch_unsafe_extension_error(func
):
174 @functools.wraps(func
)
175 def wrapper(self
, *args
, **kwargs
):
177 return func(self
, *args
, **kwargs
)
178 except _UnsafeExtensionError
as error
:
180 f
'The extracted extension ({error.extension!r}) is unusual '
181 'and will be skipped for safety reasons. '
182 f
'If you believe this is an error{bug_reports_message(",")}')
190 YoutubeDL objects are the ones responsible of downloading the
191 actual video file and writing it to disk if the user has requested
192 it, among some other tasks. In most cases there should be one per
193 program. As, given a video URL, the downloader doesn't know how to
194 extract all the needed information, task that InfoExtractors do, it
195 has to pass the URL to one of them.
197 For this, YoutubeDL objects have a method that allows
198 InfoExtractors to be registered in a given order. When it is passed
199 a URL, the YoutubeDL object handles it to the first InfoExtractor it
200 finds that reports being able to handle it. The InfoExtractor extracts
201 all the information about the video or videos the URL refers to, and
202 YoutubeDL process the extracted information, possibly using a File
203 Downloader to download the video.
205 YoutubeDL objects accept a lot of parameters. In order not to saturate
206 the object constructor with arguments, it receives a dictionary of
207 options instead. These options are available through the params
208 attribute for the InfoExtractors to use. The YoutubeDL also
209 registers itself as the downloader in charge for the InfoExtractors
210 that are added to it, so this is a "mutual registration".
214 username: Username for authentication purposes.
215 password: Password for authentication purposes.
216 videopassword: Password for accessing a video.
217 ap_mso: Adobe Pass multiple-system operator identifier.
218 ap_username: Multiple-system operator account username.
219 ap_password: Multiple-system operator account password.
220 usenetrc: Use netrc for authentication instead.
221 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
222 netrc_cmd: Use a shell command to get credentials
223 verbose: Print additional info to stdout.
224 quiet: Do not print messages to stdout.
225 no_warnings: Do not print out anything for warnings.
226 forceprint: A dict with keys WHEN mapped to a list of templates to
227 print to stdout. The allowed keys are video or any of the
228 items in utils.POSTPROCESS_WHEN.
229 For compatibility, a single list is also accepted
230 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
231 a list of tuples with (template, filename)
232 forcejson: Force printing info_dict as JSON.
233 dump_single_json: Force printing the info_dict of the whole playlist
234 (or video) as a single JSON line.
235 force_write_download_archive: Force writing download archive regardless
236 of 'skip_download' or 'simulate'.
237 simulate: Do not download the video files. If unset (or None),
238 simulate only if listsubtitles, listformats or list_thumbnails is used
239 format: Video format code. see "FORMAT SELECTION" for more details.
240 You can also pass a function. The function takes 'ctx' as
241 argument and returns the formats to download.
242 See "build_format_selector" for an implementation
243 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
244 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
245 extracting metadata even if the video is not actually
246 available for download (experimental)
247 format_sort: A list of fields by which to sort the video formats.
248 See "Sorting Formats" for more details.
249 format_sort_force: Force the given format_sort. see "Sorting Formats"
251 prefer_free_formats: Whether to prefer video formats with free containers
252 over non-free ones of the same quality.
253 allow_multiple_video_streams: Allow multiple video streams to be merged
255 allow_multiple_audio_streams: Allow multiple audio streams to be merged
257 check_formats Whether to test if the formats are downloadable.
258 Can be True (check all), False (check none),
259 'selected' (check selected formats),
260 or None (check only if requested by extractor)
261 paths: Dictionary of output paths. The allowed keys are 'home'
262 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
263 outtmpl: Dictionary of templates for output names. Allowed keys
264 are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
265 For compatibility with youtube-dl, a single string can also be used
266 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
267 restrictfilenames: Do not allow "&" and spaces in file names
268 trim_file_name: Limit length of filename (extension excluded)
269 windowsfilenames: True: Force filenames to be Windows compatible
270 False: Sanitize filenames only minimally
271 This option has no effect when running on Windows
272 ignoreerrors: Do not stop on download/postprocessing errors.
273 Can be 'only_download' to ignore only download errors.
274 Default is 'only_download' for CLI, but False for API
275 skip_playlist_after_errors: Number of allowed failures until the rest of
276 the playlist is skipped
277 allowed_extractors: List of regexes to match against extractor names that are allowed
278 overwrites: Overwrite all video and metadata files if True,
279 overwrite only non-video files if None
280 and don't overwrite any file if False
281 playlist_items: Specific indices of playlist to download.
282 playlistrandom: Download playlist items in random order.
283 lazy_playlist: Process playlist entries as they are received.
284 matchtitle: Download only matching titles.
285 rejecttitle: Reject downloads for matching titles.
286 logger: Log messages to a logging.Logger instance.
287 logtostderr: Print everything to stderr instead of stdout.
288 consoletitle: Display progress in the console window's titlebar.
289 writedescription: Write the video description to a .description file
290 writeinfojson: Write the video description to a .info.json file
291 clean_infojson: Remove internal metadata from the infojson
292 getcomments: Extract video comments. This will not be written to disk
293 unless writeinfojson is also given
294 writeannotations: Write the video annotations to a .annotations.xml file
295 writethumbnail: Write the thumbnail image to a file
296 allow_playlist_files: Whether to write playlists' description, infojson etc
297 also to disk when using the 'write*' options
298 write_all_thumbnails: Write all thumbnail formats to files
299 writelink: Write an internet shortcut file, depending on the
300 current platform (.url/.webloc/.desktop)
301 writeurllink: Write a Windows internet shortcut file (.url)
302 writewebloclink: Write a macOS internet shortcut file (.webloc)
303 writedesktoplink: Write a Linux internet shortcut file (.desktop)
304 writesubtitles: Write the video subtitles to a file
305 writeautomaticsub: Write the automatically generated subtitles to a file
306 listsubtitles: Lists all available subtitles for the video
307 subtitlesformat: The format code for subtitles
308 subtitleslangs: List of languages of the subtitles to download (can be regex).
309 The list may contain "all" to refer to all the available
310 subtitles. The language can be prefixed with a "-" to
311 exclude it from the requested languages, e.g. ['all', '-live_chat']
312 keepvideo: Keep the video file after post-processing
313 daterange: A utils.DateRange object, download only if the upload_date is in the range.
314 skip_download: Skip the actual download of the video file
315 cachedir: Location of the cache files in the filesystem.
316 False to disable filesystem cache.
317 noplaylist: Download single video instead of a playlist if in doubt.
318 age_limit: An integer representing the user's age in years.
319 Unsuitable videos for the given age are skipped.
320 min_views: An integer representing the minimum view count the video
321 must have in order to not be skipped.
322 Videos without view count information are always
323 downloaded. None for no limit.
324 max_views: An integer representing the maximum view count.
325 Videos that are more popular than that are not
327 Videos without view count information are always
328 downloaded. None for no limit.
329 download_archive: A set, or the name of a file where all downloads are recorded.
330 Videos already present in the file are not downloaded again.
331 break_on_existing: Stop the download process after attempting to download a
332 file that is in the archive.
333 break_per_url: Whether break_on_reject and break_on_existing
334 should act on each input URL as opposed to for the entire queue
335 cookiefile: File name or text stream from where cookies should be read and dumped to
336 cookiesfrombrowser: A tuple containing the name of the browser, the profile
337 name/path from where cookies are loaded, the name of the keyring,
338 and the container name, e.g. ('chrome', ) or
339 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
340 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
341 support RFC 5746 secure renegotiation
342 nocheckcertificate: Do not verify SSL certificates
343 client_certificate: Path to client certificate file in PEM format. May include the private key
344 client_certificate_key: Path to private key file for client certificate
345 client_certificate_password: Password for client certificate private key, if encrypted.
346 If not provided and the key is encrypted, yt-dlp will ask interactively
347 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
348 (Only supported by some extractors)
349 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
350 http_headers: A dictionary of custom headers to be used for all requests
351 proxy: URL of the proxy server to use
352 geo_verification_proxy: URL of the proxy to use for IP address verification
353 on geo-restricted sites.
354 socket_timeout: Time to wait for unresponsive hosts, in seconds
355 bidi_workaround: Work around buggy terminals without bidirectional text
356 support, using fridibi
357 debug_printtraffic:Print out sent and received HTTP traffic
358 default_search: Prepend this string if an input url is not valid.
359 'auto' for elaborate guessing
360 encoding: Use this encoding instead of the system-specified.
361 extract_flat: Whether to resolve and process url_results further
362 * False: Always process. Default for API
363 * True: Never process
364 * 'in_playlist': Do not process inside playlist/multi_video
365 * 'discard': Always process, but don't return the result
366 from inside playlist/multi_video
367 * 'discard_in_playlist': Same as "discard", but only for
368 playlists (not multi_video). Default for CLI
369 wait_for_video: If given, wait for scheduled streams to become available.
370 The value should be a tuple containing the range
371 (min_secs, max_secs) to wait between retries
372 postprocessors: A list of dictionaries, each with an entry
373 * key: The name of the postprocessor. See
374 yt_dlp/postprocessor/__init__.py for a list.
375 * when: When to run the postprocessor. Allowed values are
376 the entries of utils.POSTPROCESS_WHEN
377 Assumed to be 'post_process' if not given
378 progress_hooks: A list of functions that get called on download
379 progress, with a dictionary with the entries
380 * status: One of "downloading", "error", or "finished".
381 Check this first and ignore unknown values.
382 * info_dict: The extracted info_dict
384 If status is one of "downloading", or "finished", the
385 following properties may also be present:
386 * filename: The final filename (always present)
387 * tmpfilename: The filename we're currently writing to
388 * downloaded_bytes: Bytes on disk
389 * total_bytes: Size of the whole file, None if unknown
390 * total_bytes_estimate: Guess of the eventual file size,
392 * elapsed: The number of seconds since download started.
393 * eta: The estimated time in seconds, None if unknown
394 * speed: The download speed in bytes/second, None if
396 * fragment_index: The counter of the currently
397 downloaded video fragment.
398 * fragment_count: The number of fragments (= individual
399 files that will be merged)
401 Progress hooks are guaranteed to be called at least once
402 (with status "finished") if the download is successful.
403 postprocessor_hooks: A list of functions that get called on postprocessing
404 progress, with a dictionary with the entries
405 * status: One of "started", "processing", or "finished".
406 Check this first and ignore unknown values.
407 * postprocessor: Name of the postprocessor
408 * info_dict: The extracted info_dict
410 Progress hooks are guaranteed to be called at least twice
411 (with status "started" and "finished") if the processing is successful.
412 merge_output_format: "/" separated list of extensions to use when merging formats.
413 final_ext: Expected final extension; used to detect when the file was
414 already downloaded and converted
415 fixup: Automatically correct known faults of the file.
417 - "never": do nothing
418 - "warn": only emit a warning
419 - "detect_or_warn": check whether we can do anything
420 about it, warn otherwise (default)
421 source_address: Client-side IP address to bind to.
422 impersonate: Client to impersonate for requests.
423 An ImpersonateTarget (from yt_dlp.networking.impersonate)
424 sleep_interval_requests: Number of seconds to sleep between requests
426 sleep_interval: Number of seconds to sleep before each download when
427 used alone or a lower bound of a range for randomized
428 sleep before each download (minimum possible number
429 of seconds to sleep) when used along with
431 max_sleep_interval:Upper bound of a range for randomized sleep before each
432 download (maximum possible number of seconds to sleep).
433 Must only be used along with sleep_interval.
434 Actual sleep time will be a random float from range
435 [sleep_interval; max_sleep_interval].
436 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
437 listformats: Print an overview of available video formats and exit.
438 list_thumbnails: Print a table of all thumbnails and exit.
439 match_filter: A function that gets called for every video with the signature
440 (info_dict, *, incomplete: bool) -> Optional[str]
441 For backward compatibility with youtube-dl, the signature
442 (info_dict) -> Optional[str] is also allowed.
443 - If it returns a message, the video is ignored.
444 - If it returns None, the video is downloaded.
445 - If it returns utils.NO_DEFAULT, the user is interactively
446 asked whether to download the video.
447 - Raise utils.DownloadCancelled(msg) to abort remaining
448 downloads when a video is rejected.
449 match_filter_func in utils/_utils.py is one example for this.
450 color: A Dictionary with output stream names as keys
451 and their respective color policy as values.
452 Can also just be a single color policy,
453 in which case it applies to all outputs.
454 Valid stream names are 'stdout' and 'stderr'.
455 Valid color policies are one of 'always', 'auto',
456 'no_color', 'never', 'auto-tty' or 'no_color-tty'.
457 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
460 Two-letter ISO 3166-2 country code that will be used for
461 explicit geographic restriction bypassing via faking
462 X-Forwarded-For HTTP header
464 IP range in CIDR notation that will be used similarly to
466 external_downloader: A dictionary of protocol keys and the executable of the
467 external downloader to use for it. The allowed protocols
468 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
469 Set the value to 'native' to use the native downloader
470 compat_opts: Compatibility options. See "Differences in default behavior".
471 The following options do not work when used through the API:
472 filename, abort-on-error, multistreams, no-live-chat,
473 format-sort, no-clean-infojson, no-playlist-metafiles,
474 no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort.
475 Refer __init__.py for their implementation
476 progress_template: Dictionary of templates for progress outputs.
477 Allowed keys are 'download', 'postprocess',
478 'download-title' (console title) and 'postprocess-title'.
479 The template is mapped on a dictionary with keys 'progress' and 'info'
480 retry_sleep_functions: Dictionary of functions that takes the number of attempts
481 as argument and returns the time to sleep in seconds.
482 Allowed keys are 'http', 'fragment', 'file_access'
483 download_ranges: A callback function that gets called for every video with
484 the signature (info_dict, ydl) -> Iterable[Section].
485 Only the returned sections will be downloaded.
486 Each Section is a dict with the following keys:
487 * start_time: Start time of the section in seconds
488 * end_time: End time of the section in seconds
489 * title: Section title (Optional)
490 * index: Section number (Optional)
491 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
492 noprogress: Do not print the progress bar
493 live_from_start: Whether to download livestreams videos from the start
495 The following parameters are not used by YoutubeDL itself, they are used by
496 the downloader (see yt_dlp/downloader/common.py):
497 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
498 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
499 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
500 external_downloader_args, concurrent_fragment_downloads, progress_delta.
502 The following options are used by the post processors:
503 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
504 to the binary or its containing directory.
505 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
506 and a list of additional command-line arguments for the
507 postprocessor/executable. The dict can also have "PP+EXE" keys
508 which are used when the given exe is used by the given PP.
509 Use 'default' as the name for arguments to passed to all PP
510 For compatibility with youtube-dl, a single list of args
513 The following options are used by the extractors:
514 extractor_retries: Number of times to retry for known errors (default: 3)
515 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
516 hls_split_discontinuity: Split HLS playlists into different formats at
517 discontinuities such as ad breaks (default: False)
518 extractor_args: A dictionary of arguments to be passed to the extractors.
519 See "EXTRACTOR ARGUMENTS" for details.
520 E.g. {'youtube': {'skip': ['dash', 'hls']}}
521 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
523 The following options are deprecated and may be removed in the future:
525 break_on_reject: Stop the download process when encountering a video that
526 has been filtered out.
527 - `raise DownloadCancelled(msg)` in match_filter instead
528 force_generic_extractor: Force downloader to use the generic extractor
529 - Use allowed_extractors = ['generic', 'default']
530 playliststart: - Use playlist_items
531 Playlist item to start at.
532 playlistend: - Use playlist_items
533 Playlist item to end at.
534 playlistreverse: - Use playlist_items
535 Download playlist items in reverse order.
536 forceurl: - Use forceprint
537 Force printing final URL.
538 forcetitle: - Use forceprint
539 Force printing title.
540 forceid: - Use forceprint
542 forcethumbnail: - Use forceprint
543 Force printing thumbnail URL.
544 forcedescription: - Use forceprint
545 Force printing description.
546 forcefilename: - Use forceprint
547 Force printing final filename.
548 forceduration: - Use forceprint
549 Force printing duration.
550 allsubtitles: - Use subtitleslangs = ['all']
551 Downloads all the subtitles of the video
552 (requires writesubtitles or writeautomaticsub)
553 include_ads: - Doesn't work
555 call_home: - Not implemented
556 Boolean, true if we are allowed to contact the
557 yt-dlp servers for debugging.
558 post_hooks: - Register a custom postprocessor
559 A list of functions that get called as the final step
560 for each video file, after all postprocessors have been
561 called. The filename will be passed as the only argument.
562 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
563 Use the native HLS downloader instead of ffmpeg/avconv
564 if True, otherwise use ffmpeg/avconv if False, otherwise
565 use downloader suggested by extractor if None.
566 prefer_ffmpeg: - avconv support is deprecated
567 If False, use avconv instead of ffmpeg if both are available,
568 otherwise prefer ffmpeg.
569 youtube_include_dash_manifest: - Use extractor_args
570 If True (default), DASH manifests and related
571 data will be downloaded and processed by extractor.
572 You can reduce network I/O by disabling it if you don't
573 care about DASH. (only for youtube)
574 youtube_include_hls_manifest: - Use extractor_args
575 If True (default), HLS manifests and related
576 data will be downloaded and processed by extractor.
577 You can reduce network I/O by disabling it if you don't
578 care about HLS. (only for youtube)
579 no_color: Same as `color='no_color'`
580 no_overwrites: Same as `overwrites=False`
584 'width', 'height', 'asr', 'audio_channels', 'fps',
585 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
586 'timestamp', 'release_timestamp',
587 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
588 'average_rating', 'comment_count', 'age_limit',
589 'start_time', 'end_time',
590 'chapter_number', 'season_number', 'episode_number',
591 'track_number', 'disc_number', 'release_year',
595 # NB: Keep in sync with the docstring of extractor/common.py
596 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
597 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
598 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
599 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
600 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
601 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url',
602 'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version',
603 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
605 _deprecated_multivalue_fields
= {
606 'album_artist': 'album_artists',
608 'composer': 'composers',
609 'creator': 'creators',
612 _format_selection_exts
= {
613 'audio': set(MEDIA_EXTENSIONS
.common_audio
),
614 'video': {*MEDIA_EXTENSIONS
.common_video
, '3gp'},
615 'storyboards': set(MEDIA_EXTENSIONS
.storyboards
),
618 def __init__(self
, params
=None, auto_init
=True):
619 """Create a FileDownloader object with the given options.
620 @param auto_init Whether to load the default extractors and print header (if verbose).
621 Set to 'no_verbose_header' to not print the header
627 self
._ies
_instances
= {}
628 self
._pps
= {k
: [] for k
in POSTPROCESS_WHEN
}
629 self
._printed
_messages
= set()
630 self
._first
_webpage
_request
= True
631 self
._post
_hooks
= []
632 self
._progress
_hooks
= []
633 self
._postprocessor
_hooks
= []
634 self
._download
_retcode
= 0
635 self
._num
_downloads
= 0
637 self
._playlist
_level
= 0
638 self
._playlist
_urls
= set()
639 self
.cache
= Cache(self
)
640 self
.__header
_cookies
= []
642 stdout
= sys
.stderr
if self
.params
.get('logtostderr') else sys
.stdout
643 self
._out
_files
= Namespace(
646 screen
=sys
.stderr
if self
.params
.get('quiet') else stdout
,
647 console
=None if os
.name
== 'nt' else next(
648 filter(supports_terminal_sequences
, (sys
.stderr
, sys
.stdout
)), None),
652 windows_enable_vt_mode()
653 except Exception as e
:
654 self
.write_debug(f
'Failed to enable VT mode: {e}')
656 if self
.params
.get('no_color'):
657 if self
.params
.get('color') is not None:
658 self
.params
.setdefault('_warnings', []).append(
659 'Overwriting params from "color" with "no_color"')
660 self
.params
['color'] = 'no_color'
662 term_allow_color
= os
.getenv('TERM', '').lower() != 'dumb'
663 base_no_color
= bool(os
.getenv('NO_COLOR'))
665 def process_color_policy(stream
):
666 stream_name
= {sys
.stdout
: 'stdout', sys
.stderr
: 'stderr'}[stream
]
667 policy
= traverse_obj(self
.params
, ('color', (stream_name
, None), {str}
, any
)) or 'auto'
668 if policy
in ('auto', 'auto-tty', 'no_color-tty'):
669 no_color
= base_no_color
670 if policy
.endswith('tty'):
671 no_color
= policy
.startswith('no_color')
672 if term_allow_color
and supports_terminal_sequences(stream
):
673 return 'no_color' if no_color
else True
675 assert policy
in ('always', 'never', 'no_color'), policy
676 return {'always': True, 'never': False}.get(policy
, policy
)
678 self
._allow
_colors
= Namespace(**{
679 name
: process_color_policy(stream
)
680 for name
, stream
in self
._out
_files
.items_
if name
!= 'console'
683 system_deprecation
= _get_system_deprecation()
684 if system_deprecation
:
685 self
.deprecated_feature(system_deprecation
.replace('\n', '\n '))
687 if self
.params
.get('allow_unplayable_formats'):
689 f
'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
690 'This is a developer option intended for debugging. \n'
691 ' If you experience any issues while using this option, '
692 f
'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
694 if self
.params
.get('bidi_workaround', False):
697 master
, slave
= pty
.openpty()
698 width
= shutil
.get_terminal_size().columns
699 width_args
= [] if width
is None else ['-w', str(width
)]
700 sp_kwargs
= {'stdin': subprocess
.PIPE
, 'stdout': slave
, 'stderr': self
._out
_files
.error
}
702 self
._output
_process
= Popen(['bidiv', *width_args
], **sp_kwargs
)
704 self
._output
_process
= Popen(['fribidi', '-c', 'UTF-8', *width_args
], **sp_kwargs
)
705 self
._output
_channel
= os
.fdopen(master
, 'rb')
706 except OSError as ose
:
707 if ose
.errno
== errno
.ENOENT
:
709 'Could not find fribidi executable, ignoring --bidi-workaround. '
710 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
714 self
.params
['compat_opts'] = set(self
.params
.get('compat_opts', ()))
715 self
.params
['http_headers'] = HTTPHeaderDict(std_headers
, self
.params
.get('http_headers'))
716 self
._load
_cookies
(self
.params
['http_headers'].get('Cookie')) # compat
717 self
.params
['http_headers'].pop('Cookie', None)
719 if auto_init
and auto_init
!= 'no_verbose_header':
720 self
.print_debug_header()
722 def check_deprecated(param
, option
, suggestion
):
723 if self
.params
.get(param
) is not None:
724 self
.report_warning(f
'{option} is deprecated. Use {suggestion} instead')
728 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
729 if self
.params
.get('geo_verification_proxy') is None:
730 self
.params
['geo_verification_proxy'] = self
.params
['cn_verification_proxy']
732 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
733 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
734 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
736 for msg
in self
.params
.get('_warnings', []):
737 self
.report_warning(msg
)
738 for msg
in self
.params
.get('_deprecation_warnings', []):
739 self
.deprecated_feature(msg
)
741 if impersonate_target
:= self
.params
.get('impersonate'):
742 if not self
._impersonate
_target
_available
(impersonate_target
):
743 raise YoutubeDLError(
744 f
'Impersonate target "{impersonate_target}" is not available. '
745 f
'Use --list-impersonate-targets to see available targets. '
746 f
'You may be missing dependencies required to support this target.')
748 if 'list-formats' in self
.params
['compat_opts']:
749 self
.params
['listformats_table'] = False
751 if 'overwrites' not in self
.params
and self
.params
.get('nooverwrites') is not None:
752 # nooverwrites was unnecessarily changed to overwrites
753 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
754 # This ensures compatibility with both keys
755 self
.params
['overwrites'] = not self
.params
['nooverwrites']
756 elif self
.params
.get('overwrites') is None:
757 self
.params
.pop('overwrites', None)
759 self
.params
['nooverwrites'] = not self
.params
['overwrites']
761 if self
.params
.get('simulate') is None and any((
762 self
.params
.get('list_thumbnails'),
763 self
.params
.get('listformats'),
764 self
.params
.get('listsubtitles'),
766 self
.params
['simulate'] = 'list_only'
768 self
.params
.setdefault('forceprint', {})
769 self
.params
.setdefault('print_to_file', {})
771 # Compatibility with older syntax
772 if not isinstance(params
['forceprint'], dict):
773 self
.params
['forceprint'] = {'video': params
['forceprint']}
776 self
.add_default_info_extractors()
778 if (sys
.platform
!= 'win32'
779 and sys
.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
780 and not self
.params
.get('restrictfilenames', False)):
781 # Unicode filesystem API will throw errors (#1474, #13027)
783 'Assuming --restrict-filenames since file system encoding '
784 'cannot encode all characters. '
785 'Set the LC_ALL environment variable to fix this.')
786 self
.params
['restrictfilenames'] = True
788 self
._parse
_outtmpl
()
790 # Creating format selector here allows us to catch syntax errors before the extraction
791 self
.format_selector
= (
792 self
.params
.get('format') if self
.params
.get('format') in (None, '-')
793 else self
.params
['format'] if callable(self
.params
['format'])
794 else self
.build_format_selector(self
.params
['format']))
797 'post_hooks': self
.add_post_hook
,
798 'progress_hooks': self
.add_progress_hook
,
799 'postprocessor_hooks': self
.add_postprocessor_hook
,
801 for opt
, fn
in hooks
.items():
802 for ph
in self
.params
.get(opt
, []):
805 for pp_def_raw
in self
.params
.get('postprocessors', []):
806 pp_def
= dict(pp_def_raw
)
807 when
= pp_def
.pop('when', 'post_process')
808 self
.add_post_processor(
809 get_postprocessor(pp_def
.pop('key'))(self
, **pp_def
),
812 def preload_download_archive(fn
):
813 """Preload the archive, if any is specified"""
817 elif not is_path_like(fn
):
820 self
.write_debug(f
'Loading archive file {fn!r}')
822 with
locked_file(fn
, 'r', encoding
='utf-8') as archive_file
:
823 for line
in archive_file
:
824 archive
.add(line
.strip())
825 except OSError as ioe
:
826 if ioe
.errno
!= errno
.ENOENT
:
830 self
.archive
= preload_download_archive(self
.params
.get('download_archive'))
832 def warn_if_short_id(self
, argv
):
833 # short YouTube ID starting with dash?
835 i
for i
, a
in enumerate(argv
)
836 if re
.match(r
'^-[0-9A-Za-z_-]{10}$', a
)]
840 + [a
for i
, a
in enumerate(argv
) if i
not in idxs
]
841 + ['--'] + [argv
[i
] for i
in idxs
]
844 'Long argument string detected. '
845 f
'Use -- to separate parameters and URLs, like this:\n{shell_quote(correct_argv)}')
847 def add_info_extractor(self
, ie
):
848 """Add an InfoExtractor object to the end of the list."""
850 self
._ies
[ie_key
] = ie
851 if not isinstance(ie
, type):
852 self
._ies
_instances
[ie_key
] = ie
853 ie
.set_downloader(self
)
855 def get_info_extractor(self
, ie_key
):
857 Get an instance of an IE with name ie_key, it will try to get one from
858 the _ies list, if there's no instance it will create a new one and add
859 it to the extractor list.
861 ie
= self
._ies
_instances
.get(ie_key
)
863 ie
= get_info_extractor(ie_key
)()
864 self
.add_info_extractor(ie
)
867 def add_default_info_extractors(self
):
869 Add the InfoExtractors returned by gen_extractors to the end of the list
871 all_ies
= {ie
.IE_NAME
.lower(): ie
for ie
in gen_extractor_classes()}
872 all_ies
['end'] = UnsupportedURLIE()
874 ie_names
= orderedSet_from_options(
875 self
.params
.get('allowed_extractors', ['default']), {
876 'all': list(all_ies
),
877 'default': [name
for name
, ie
in all_ies
.items() if ie
._ENABLED
],
879 except re
.error
as e
:
880 raise ValueError(f
'Wrong regex for allowed_extractors: {e.pattern}')
881 for name
in ie_names
:
882 self
.add_info_extractor(all_ies
[name
])
883 self
.write_debug(f
'Loaded {len(ie_names)} extractors')
885 def add_post_processor(self
, pp
, when
='post_process'):
886 """Add a PostProcessor object to the end of the chain."""
887 assert when
in POSTPROCESS_WHEN
, f
'Invalid when={when}'
888 self
._pps
[when
].append(pp
)
889 pp
.set_downloader(self
)
891 def add_post_hook(self
, ph
):
892 """Add the post hook"""
893 self
._post
_hooks
.append(ph
)
895 def add_progress_hook(self
, ph
):
896 """Add the download progress hook"""
897 self
._progress
_hooks
.append(ph
)
899 def add_postprocessor_hook(self
, ph
):
900 """Add the postprocessing progress hook"""
901 self
._postprocessor
_hooks
.append(ph
)
902 for pps
in self
._pps
.values():
904 pp
.add_progress_hook(ph
)
906 def _bidi_workaround(self
, message
):
907 if not hasattr(self
, '_output_channel'):
910 assert hasattr(self
, '_output_process')
911 assert isinstance(message
, str)
912 line_count
= message
.count('\n') + 1
913 self
._output
_process
.stdin
.write((message
+ '\n').encode())
914 self
._output
_process
.stdin
.flush()
915 res
= ''.join(self
._output
_channel
.readline().decode()
916 for _
in range(line_count
))
917 return res
[:-len('\n')]
919 def _write_string(self
, message
, out
=None, only_once
=False):
921 if message
in self
._printed
_messages
:
923 self
._printed
_messages
.add(message
)
924 write_string(message
, out
=out
, encoding
=self
.params
.get('encoding'))
926 def to_stdout(self
, message
, skip_eol
=False, quiet
=None):
927 """Print message to stdout"""
928 if quiet
is not None:
929 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
930 'Use "YoutubeDL.to_screen" instead')
931 if skip_eol
is not False:
932 self
.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
933 'Use "YoutubeDL.to_screen" instead')
934 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.out
)
936 def to_screen(self
, message
, skip_eol
=False, quiet
=None, only_once
=False):
937 """Print message to screen if not in quiet mode"""
938 if self
.params
.get('logger'):
939 self
.params
['logger'].debug(message
)
941 if (self
.params
.get('quiet') if quiet
is None else quiet
) and not self
.params
.get('verbose'):
944 '{}{}'.format(self
._bidi
_workaround
(message
), ('' if skip_eol
else '\n')),
945 self
._out
_files
.screen
, only_once
=only_once
)
947 def to_stderr(self
, message
, only_once
=False):
948 """Print message to stderr"""
949 assert isinstance(message
, str)
950 if self
.params
.get('logger'):
951 self
.params
['logger'].error(message
)
953 self
._write
_string
(f
'{self._bidi_workaround(message)}\n', self
._out
_files
.error
, only_once
=only_once
)
955 def _send_console_code(self
, code
):
956 if os
.name
== 'nt' or not self
._out
_files
.console
:
958 self
._write
_string
(code
, self
._out
_files
.console
)
960 def to_console_title(self
, message
):
961 if not self
.params
.get('consoletitle', False):
963 message
= remove_terminal_sequences(message
)
965 if ctypes
.windll
.kernel32
.GetConsoleWindow():
966 # c_wchar_p() might not be necessary if `message` is
967 # already of type unicode()
968 ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
))
970 self
._send
_console
_code
(f
'\033]0;{message}\007')
972 def save_console_title(self
):
973 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
975 self
._send
_console
_code
('\033[22;0t') # Save the title on stack
977 def restore_console_title(self
):
978 if not self
.params
.get('consoletitle') or self
.params
.get('simulate'):
980 self
._send
_console
_code
('\033[23;0t') # Restore the title from stack
983 self
.save_console_title()
986 def save_cookies(self
):
987 if self
.params
.get('cookiefile') is not None:
988 self
.cookiejar
.save()
990 def __exit__(self
, *args
):
991 self
.restore_console_title()
996 if '_request_director' in self
.__dict
__:
997 self
._request
_director
.close()
998 del self
._request
_director
1000 def trouble(self
, message
=None, tb
=None, is_error
=True):
1001 """Determine action to take when a download problem appears.
1003 Depending on if the downloader has been configured to ignore
1004 download errors or not, this method may throw an exception or
1005 not when errors are found, after printing the message.
1007 @param tb If given, is additional traceback information
1008 @param is_error Whether to raise error according to ignorerrors
1010 if message
is not None:
1011 self
.to_stderr(message
)
1012 if self
.params
.get('verbose'):
1014 if sys
.exc_info()[0]: # if .trouble has been called from an except block
1016 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
1017 tb
+= ''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
1018 tb
+= encode_compat_str(traceback
.format_exc())
1020 tb_data
= traceback
.format_list(traceback
.extract_stack())
1021 tb
= ''.join(tb_data
)
1026 if not self
.params
.get('ignoreerrors'):
1027 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
1028 exc_info
= sys
.exc_info()[1].exc_info
1030 exc_info
= sys
.exc_info()
1031 raise DownloadError(message
, exc_info
)
1032 self
._download
_retcode
= 1
1036 EMPHASIS
='light blue',
1041 BAD_FORMAT
='light red',
1043 SUPPRESS
='light black',
1046 def _format_text(self
, handle
, allow_colors
, text
, f
, fallback
=None, *, test_encoding
=False):
1049 original_text
= text
1050 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1051 encoding
= self
.params
.get('encoding') or getattr(handle
, 'encoding', None) or 'ascii'
1052 text
= text
.encode(encoding
, 'ignore').decode(encoding
)
1053 if fallback
is not None and text
!= original_text
:
1055 return format_text(text
, f
) if allow_colors
is True else text
if fallback
is None else fallback
1057 def _format_out(self
, *args
, **kwargs
):
1058 return self
._format
_text
(self
._out
_files
.out
, self
._allow
_colors
.out
, *args
, **kwargs
)
1060 def _format_screen(self
, *args
, **kwargs
):
1061 return self
._format
_text
(self
._out
_files
.screen
, self
._allow
_colors
.screen
, *args
, **kwargs
)
1063 def _format_err(self
, *args
, **kwargs
):
1064 return self
._format
_text
(self
._out
_files
.error
, self
._allow
_colors
.error
, *args
, **kwargs
)
1066 def report_warning(self
, message
, only_once
=False):
1068 Print the message to stderr, it will be prefixed with 'WARNING:'
1069 If stderr is a tty file the 'WARNING:' will be colored
1071 if self
.params
.get('logger') is not None:
1072 self
.params
['logger'].warning(message
)
1074 if self
.params
.get('no_warnings'):
1076 self
.to_stderr(f
'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once
)
1078 def deprecation_warning(self
, message
, *, stacklevel
=0):
1079 deprecation_warning(
1080 message
, stacklevel
=stacklevel
+ 1, printer
=self
.report_error
, is_error
=False)
1082 def deprecated_feature(self
, message
):
1083 if self
.params
.get('logger') is not None:
1084 self
.params
['logger'].warning(f
'Deprecated Feature: {message}')
1085 self
.to_stderr(f
'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1087 def report_error(self
, message
, *args
, **kwargs
):
1089 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1090 in red if stderr is a tty file.
1092 self
.trouble(f
'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args
, **kwargs
)
1094 def write_debug(self
, message
, only_once
=False):
1095 """Log debug message or Print message to stderr"""
1096 if not self
.params
.get('verbose', False):
1098 message
= f
'[debug] {message}'
1099 if self
.params
.get('logger'):
1100 self
.params
['logger'].debug(message
)
1102 self
.to_stderr(message
, only_once
)
1104 def report_file_already_downloaded(self
, file_name
):
1105 """Report file has already been fully downloaded."""
1107 self
.to_screen(f
'[download] {file_name} has already been downloaded')
1108 except UnicodeEncodeError:
1109 self
.to_screen('[download] The file has already been downloaded')
1111 def report_file_delete(self
, file_name
):
1112 """Report that existing file will be deleted."""
1114 self
.to_screen(f
'Deleting existing file {file_name}')
1115 except UnicodeEncodeError:
1116 self
.to_screen('Deleting existing file')
1118 def raise_no_formats(self
, info
, forced
=False, *, msg
=None):
1119 has_drm
= info
.get('_has_drm')
1120 ignored
, expected
= self
.params
.get('ignore_no_formats_error'), bool(msg
)
1121 msg
= msg
or (has_drm
and 'This video is DRM protected') or 'No video formats found!'
1122 if forced
or not ignored
:
1123 raise ExtractorError(msg
, video_id
=info
['id'], ie
=info
['extractor'],
1124 expected
=has_drm
or ignored
or expected
)
1126 self
.report_warning(msg
)
1128 def parse_outtmpl(self
):
1129 self
.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1130 self
._parse
_outtmpl
()
1131 return self
.params
['outtmpl']
1133 def _parse_outtmpl(self
):
1135 if self
.params
.get('restrictfilenames'): # Remove spaces in the default template
1136 sanitize
= lambda x
: x
.replace(' - ', ' ').replace(' ', '-')
1138 outtmpl
= self
.params
.setdefault('outtmpl', {})
1139 if not isinstance(outtmpl
, dict):
1140 self
.params
['outtmpl'] = outtmpl
= {'default': outtmpl
}
1141 outtmpl
.update({k
: sanitize(v
) for k
, v
in DEFAULT_OUTTMPL
.items() if outtmpl
.get(k
) is None})
1143 def get_output_path(self
, dir_type
='', filename
=None):
1144 paths
= self
.params
.get('paths', {})
1145 assert isinstance(paths
, dict), '"paths" parameter must be a dictionary'
1146 path
= os
.path
.join(
1147 expand_path(paths
.get('home', '').strip()),
1148 expand_path(paths
.get(dir_type
, '').strip()) if dir_type
else '',
1150 return sanitize_path(path
, force
=self
.params
.get('windowsfilenames'))
1153 def _outtmpl_expandpath(outtmpl
):
1154 # expand_path translates '%%' into '%' and '$$' into '$'
1155 # correspondingly that is not what we want since we need to keep
1156 # '%%' intact for template dict substitution step. Working around
1157 # with boundary-alike separator hack.
1158 sep
= ''.join(random
.choices(string
.ascii_letters
, k
=32))
1159 outtmpl
= outtmpl
.replace('%%', f
'%{sep}%').replace('$$', f
'${sep}$')
1161 # outtmpl should be expand_path'ed before template dict substitution
1162 # because meta fields may contain env variables we don't want to
1163 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1164 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1165 return expand_path(outtmpl
).replace(sep
, '')
1168 def escape_outtmpl(outtmpl
):
1169 """ Escape any remaining strings like %s, %abc% etc. """
1171 STR_FORMAT_RE_TMPL
.format('', '(?![%(\0])'),
1172 lambda mobj
: ('' if mobj
.group('has_key') else '%') + mobj
.group(0),
1176 def validate_outtmpl(cls
, outtmpl
):
1177 """ @return None or Exception object """
1179 STR_FORMAT_RE_TMPL
.format('[^)]*', '[ljhqBUDS]'),
1180 lambda mobj
: f
'{mobj.group(0)[:-1]}s',
1181 cls
._outtmpl
_expandpath
(outtmpl
))
1183 cls
.escape_outtmpl(outtmpl
) % collections
.defaultdict(int)
1185 except ValueError as err
:
1189 def _copy_infodict(info_dict
):
1190 info_dict
= dict(info_dict
)
1191 info_dict
.pop('__postprocessors', None)
1192 info_dict
.pop('__pending_error', None)
1195 def prepare_outtmpl(self
, outtmpl
, info_dict
, sanitize
=False):
1196 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1197 @param sanitize Whether to sanitize the output as a filename
1200 info_dict
.setdefault('epoch', int(time
.time())) # keep epoch consistent once set
1202 info_dict
= self
._copy
_infodict
(info_dict
)
1203 info_dict
['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1204 formatSeconds(info_dict
['duration'], '-' if sanitize
else ':')
1205 if info_dict
.get('duration', None) is not None
1207 info_dict
['autonumber'] = int(self
.params
.get('autonumber_start', 1) - 1 + self
._num
_downloads
)
1208 info_dict
['video_autonumber'] = self
._num
_videos
1209 if info_dict
.get('resolution') is None:
1210 info_dict
['resolution'] = self
.format_resolution(info_dict
, default
=None)
1212 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1213 # of %(field)s to %(field)0Nd for backward compatibility
1214 field_size_compat_map
= {
1215 'playlist_index': number_of_digits(info_dict
.get('__last_playlist_index') or 0),
1216 'playlist_autonumber': number_of_digits(info_dict
.get('n_entries') or 0),
1217 'autonumber': self
.params
.get('autonumber_size') or 5,
1221 EXTERNAL_FORMAT_RE
= re
.compile(STR_FORMAT_RE_TMPL
.format('[^)]*', f
'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1227 # Field is of the form key1.key2...
1228 # where keys (except first) can be string, int, slice or "{field, ...}"
1229 FIELD_INNER_RE
= r
'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r
'(?:-?\d+)'} # noqa: UP031
1230 FIELD_RE
= r
'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { # noqa: UP031
1231 'inner': FIELD_INNER_RE
,
1232 'field': rf
'\w*(?:\.{FIELD_INNER_RE})*',
1234 MATH_FIELD_RE
= rf
'(?:{FIELD_RE}|-?{NUMBER_RE})'
1235 MATH_OPERATORS_RE
= r
'(?:{})'.format('|'.join(map(re
.escape
, MATH_FUNCTIONS
.keys())))
1236 INTERNAL_FORMAT_RE
= re
.compile(rf
'''(?xs)
1238 (?P<fields>{FIELD_RE})
1239 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1240 (?:>(?P<strf_format>.+?))?
1242 (?P<alternate>(?<!\\),[^|&)]+)?
1243 (?:&(?P<replacement>.*?))?
1244 (?:\|(?P<default>.*?))?
1247 def _from_user_input(field
):
1251 return slice(*map(int_or_none
, field
.split(':')))
1252 elif int_or_none(field
) is not None:
1256 def _traverse_infodict(fields
):
1257 fields
= [f
for x
in re
.split(r
'\.({.+?})\.?', fields
)
1258 for f
in ([x
] if x
.startswith('{') else x
.split('.'))]
1260 if fields
and not fields
[i
]:
1263 for i
, f
in enumerate(fields
):
1264 if not f
.startswith('{'):
1265 fields
[i
] = _from_user_input(f
)
1267 assert f
.endswith('}'), f
'No closing brace for {f} in {fields}'
1268 fields
[i
] = {k
: list(map(_from_user_input
, k
.split('.'))) for k
in f
[1:-1].split(',')}
1270 return traverse_obj(info_dict
, fields
, traverse_string
=True)
1272 def get_value(mdict
):
1274 value
= _traverse_infodict(mdict
['fields'])
1277 value
= float_or_none(value
)
1278 if value
is not None:
1281 offset_key
= mdict
['maths']
1283 value
= float_or_none(value
)
1287 MATH_FIELD_RE
if operator
else MATH_OPERATORS_RE
,
1288 offset_key
).group(0)
1289 offset_key
= offset_key
[len(item
):]
1290 if operator
is None:
1291 operator
= MATH_FUNCTIONS
[item
]
1293 item
, multiplier
= (item
[1:], -1) if item
[0] == '-' else (item
, 1)
1294 offset
= float_or_none(item
)
1296 offset
= float_or_none(_traverse_infodict(item
))
1298 value
= operator(value
, multiplier
* offset
)
1299 except (TypeError, ZeroDivisionError):
1302 # Datetime formatting
1303 if mdict
['strf_format']:
1304 value
= strftime_or_none(value
, mdict
['strf_format'].replace('\\,', ','))
1306 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1307 if sanitize
and value
== '':
1311 na
= self
.params
.get('outtmpl_na_placeholder', 'NA')
1313 def filename_sanitizer(key
, value
, restricted
):
1314 return sanitize_filename(str(value
), restricted
=restricted
, is_id
=(
1315 bool(re
.search(r
'(^|[_.])id(\.|$)', key
))
1316 if 'filename-sanitization' in self
.params
['compat_opts']
1319 if callable(sanitize
):
1320 self
.deprecation_warning('Passing a callable "sanitize" to YoutubeDL.prepare_outtmpl is deprecated')
1323 elif (sys
.platform
!= 'win32' and not self
.params
.get('restrictfilenames')
1324 and self
.params
.get('windowsfilenames') is False):
1325 def sanitize(key
, value
):
1326 return value
.replace('/', '\u29F8').replace('\0', '')
1328 def sanitize(key
, value
):
1329 return filename_sanitizer(key
, value
, restricted
=self
.params
.get('restrictfilenames'))
1331 def _dumpjson_default(obj
):
1332 if isinstance(obj
, (set, LazyList
)):
1336 class _ReplacementFormatter(string
.Formatter
):
1337 def get_field(self
, field_name
, args
, kwargs
):
1338 if field_name
.isdigit():
1340 raise ValueError('Unsupported field')
1342 replacement_formatter
= _ReplacementFormatter()
1344 def create_key(outer_mobj
):
1345 if not outer_mobj
.group('has_key'):
1346 return outer_mobj
.group(0)
1347 key
= outer_mobj
.group('key')
1348 mobj
= re
.match(INTERNAL_FORMAT_RE
, key
)
1349 value
, replacement
, default
, last_field
= None, None, na
, ''
1351 mobj
= mobj
.groupdict()
1352 default
= mobj
['default'] if mobj
['default'] is not None else default
1353 value
= get_value(mobj
)
1354 last_field
, replacement
= mobj
['fields'], mobj
['replacement']
1355 if value
is None and mobj
['alternate']:
1356 mobj
= re
.match(INTERNAL_FORMAT_RE
, mobj
['remaining'][1:])
1360 if None not in (value
, replacement
):
1362 value
= replacement_formatter
.format(replacement
, value
)
1364 value
, default
= None, na
1366 fmt
= outer_mobj
.group('format')
1367 if fmt
== 's' and last_field
in field_size_compat_map
and isinstance(value
, int):
1368 fmt
= f
'0{field_size_compat_map[last_field]:d}d'
1370 flags
= outer_mobj
.group('conversion') or ''
1371 str_fmt
= f
'{fmt[:-1]}s'
1373 value
, fmt
= default
, 's'
1374 elif fmt
[-1] == 'l': # list
1375 delim
= '\n' if '#' in flags
else ', '
1376 value
, fmt
= delim
.join(map(str, variadic(value
, allowed_types
=(str, bytes
)))), str_fmt
1377 elif fmt
[-1] == 'j': # json
1378 value
, fmt
= json
.dumps(
1379 value
, default
=_dumpjson_default
,
1380 indent
=4 if '#' in flags
else None, ensure_ascii
='+' not in flags
), str_fmt
1381 elif fmt
[-1] == 'h': # html
1382 value
, fmt
= escapeHTML(str(value
)), str_fmt
1383 elif fmt
[-1] == 'q': # quoted
1384 value
= map(str, variadic(value
) if '#' in flags
else [value
])
1385 value
, fmt
= shell_quote(value
, shell
=True), str_fmt
1386 elif fmt
[-1] == 'B': # bytes
1387 value
= f
'%{str_fmt}'.encode() % str(value
).encode()
1388 value
, fmt
= value
.decode('utf-8', 'ignore'), 's'
1389 elif fmt
[-1] == 'U': # unicode normalized
1390 value
, fmt
= unicodedata
.normalize(
1391 # "+" = compatibility equivalence, "#" = NFD
1392 'NF{}{}'.format('K' if '+' in flags
else '', 'D' if '#' in flags
else 'C'),
1394 elif fmt
[-1] == 'D': # decimal suffix
1395 num_fmt
, fmt
= fmt
[:-1].replace('#', ''), 's'
1396 value
= format_decimal_suffix(value
, f
'%{num_fmt}f%s' if num_fmt
else '%d%s',
1397 factor
=1024 if '#' in flags
else 1000)
1398 elif fmt
[-1] == 'S': # filename sanitization
1399 value
, fmt
= filename_sanitizer(last_field
, value
, restricted
='#' in flags
), str_fmt
1400 elif fmt
[-1] == 'c':
1402 value
= str(value
)[0]
1405 elif fmt
[-1] not in 'rsa': # numeric
1406 value
= float_or_none(value
)
1408 value
, fmt
= default
, 's'
1411 # If value is an object, sanitize might convert it to a string
1412 # So we manually convert it before sanitizing
1414 value
, fmt
= repr(value
), str_fmt
1415 elif fmt
[-1] == 'a':
1416 value
, fmt
= ascii(value
), str_fmt
1417 if fmt
[-1] in 'csra':
1418 value
= sanitize(last_field
, value
)
1420 key
= '{}\0{}'.format(key
.replace('%', '%\0'), outer_mobj
.group('format'))
1421 TMPL_DICT
[key
] = value
1422 return '{prefix}%({key}){fmt}'.format(key
=key
, fmt
=fmt
, prefix
=outer_mobj
.group('prefix'))
1424 return EXTERNAL_FORMAT_RE
.sub(create_key
, outtmpl
), TMPL_DICT
1426 def evaluate_outtmpl(self
, outtmpl
, info_dict
, *args
, **kwargs
):
1427 outtmpl
, info_dict
= self
.prepare_outtmpl(outtmpl
, info_dict
, *args
, **kwargs
)
1428 return self
.escape_outtmpl(outtmpl
) % info_dict
1430 @_catch_unsafe_extension_error
1431 def _prepare_filename(self
, info_dict
, *, outtmpl
=None, tmpl_type
=None):
1432 assert None in (outtmpl
, tmpl_type
), 'outtmpl and tmpl_type are mutually exclusive'
1434 outtmpl
= self
.params
['outtmpl'].get(tmpl_type
or 'default', self
.params
['outtmpl']['default'])
1436 outtmpl
= self
._outtmpl
_expandpath
(outtmpl
)
1437 filename
= self
.evaluate_outtmpl(outtmpl
, info_dict
, True)
1441 if tmpl_type
in ('', 'temp'):
1442 final_ext
, ext
= self
.params
.get('final_ext'), info_dict
.get('ext')
1443 if final_ext
and ext
and final_ext
!= ext
and filename
.endswith(f
'.{final_ext}'):
1444 filename
= replace_extension(filename
, ext
, final_ext
)
1446 force_ext
= OUTTMPL_TYPES
[tmpl_type
]
1448 filename
= replace_extension(filename
, force_ext
, info_dict
.get('ext'))
1450 # https://github.com/blackjack4494/youtube-dlc/issues/85
1451 trim_file_name
= self
.params
.get('trim_file_name', False)
1453 no_ext
, *ext
= filename
.rsplit('.', 2)
1454 filename
= join_nonempty(no_ext
[:trim_file_name
], *ext
, delim
='.')
1457 except ValueError as err
:
1458 self
.report_error('Error in output template: ' + str(err
) + ' (encoding: ' + repr(preferredencoding()) + ')')
1461 def prepare_filename(self
, info_dict
, dir_type
='', *, outtmpl
=None, warn
=False):
1462 """Generate the output filename"""
1464 assert not dir_type
, 'outtmpl and dir_type are mutually exclusive'
1466 filename
= self
._prepare
_filename
(info_dict
, tmpl_type
=dir_type
, outtmpl
=outtmpl
)
1467 if not filename
and dir_type
not in ('', 'temp'):
1471 if not self
.params
.get('paths'):
1473 elif filename
== '-':
1474 self
.report_warning('--paths is ignored when an outputting to stdout', only_once
=True)
1475 elif os
.path
.isabs(filename
):
1476 self
.report_warning('--paths is ignored since an absolute path is given in output template', only_once
=True)
1477 if filename
== '-' or not filename
:
1480 return self
.get_output_path(dir_type
, filename
)
1482 def _match_entry(self
, info_dict
, incomplete
=False, silent
=False):
1483 """Returns None if the file should be downloaded"""
1484 _type
= 'video' if 'playlist-match-filter' in self
.params
['compat_opts'] else info_dict
.get('_type', 'video')
1485 assert incomplete
or _type
== 'video', 'Only video result can be considered complete'
1487 video_title
= info_dict
.get('title', info_dict
.get('id', 'entry'))
1490 if _type
in ('playlist', 'multi_video'):
1492 elif _type
in ('url', 'url_transparent') and not try_call(
1493 lambda: self
.get_info_extractor(info_dict
['ie_key']).is_single_video(info_dict
['url'])):
1496 if 'title' in info_dict
:
1497 # This can happen when we're just evaluating the playlist
1498 title
= info_dict
['title']
1499 matchtitle
= self
.params
.get('matchtitle', False)
1501 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
1502 return '"' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
1503 rejecttitle
= self
.params
.get('rejecttitle', False)
1505 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
1506 return '"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
1508 date
= info_dict
.get('upload_date')
1509 if date
is not None:
1510 date_range
= self
.params
.get('daterange', DateRange())
1511 if date
not in date_range
:
1512 return f
'{date_from_str(date).isoformat()} upload date is not in range {date_range}'
1513 view_count
= info_dict
.get('view_count')
1514 if view_count
is not None:
1515 min_views
= self
.params
.get('min_views')
1516 if min_views
is not None and view_count
< min_views
:
1517 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title
, view_count
, min_views
)
1518 max_views
= self
.params
.get('max_views')
1519 if max_views
is not None and view_count
> max_views
:
1520 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title
, view_count
, max_views
)
1521 if age_restricted(info_dict
.get('age_limit'), self
.params
.get('age_limit')):
1522 return f
'Skipping "{video_title}" because it is age restricted'
1524 match_filter
= self
.params
.get('match_filter')
1525 if match_filter
is None:
1531 ret
= match_filter(info_dict
, incomplete
=incomplete
)
1533 # For backward compatibility
1534 ret
= None if incomplete
else match_filter(info_dict
)
1535 except DownloadCancelled
as err
:
1536 if err
.msg
is not NO_DEFAULT
:
1538 ret
, cancelled
= err
.msg
, err
1540 if ret
is NO_DEFAULT
:
1542 filename
= self
._format
_screen
(self
.prepare_filename(info_dict
), self
.Styles
.FILENAME
)
1543 reply
= input(self
._format
_screen
(
1544 f
'Download "{filename}"? (Y/n): ', self
.Styles
.EMPHASIS
)).lower().strip()
1545 if reply
in {'y', ''}:
1549 raise type(cancelled
)(f
'Skipping {video_title}')
1550 return f
'Skipping {video_title}'
1553 if self
.in_download_archive(info_dict
):
1555 format_field(info_dict
, 'id', f
'{self._format_screen("%s", self.Styles.ID)}: '),
1556 format_field(info_dict
, 'title', f
'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1557 'has already been recorded in the archive'))
1558 break_opt
, break_err
= 'break_on_existing', ExistingVideoReached
1561 reason
= check_filter()
1562 except DownloadCancelled
as e
:
1563 reason
, break_opt
, break_err
= e
.msg
, 'match_filter', type(e
)
1565 break_opt
, break_err
= 'break_on_reject', RejectedVideoReached
1566 if reason
is not None:
1568 self
.to_screen('[download] ' + reason
)
1569 if self
.params
.get(break_opt
, False):
1574 def add_extra_info(info_dict
, extra_info
):
1575 """Set the keys from extra_info in info dict if they are missing"""
1576 for key
, value
in extra_info
.items():
1577 info_dict
.setdefault(key
, value
)
1579 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
=None,
1580 process
=True, force_generic_extractor
=False):
1582 Extract and return the information dictionary of the URL
1585 @param url URL to extract
1588 @param download Whether to download videos
1589 @param process Whether to resolve all unresolved references (URLs, playlist items).
1590 Must be True for download to work
1591 @param ie_key Use only the extractor with this key
1593 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1594 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1597 if extra_info
is None:
1600 if not ie_key
and force_generic_extractor
:
1604 ies
= {ie_key
: self
._ies
[ie_key
]} if ie_key
in self
._ies
else {}
1608 for key
, ie
in ies
.items():
1609 if not ie
.suitable(url
):
1612 if not ie
.working():
1613 self
.report_warning('The program functionality for this site has been marked as broken, '
1614 'and will probably not work.')
1616 temp_id
= ie
.get_temp_id(url
)
1617 if temp_id
is not None and self
.in_download_archive({'id': temp_id
, 'ie_key': key
}):
1618 self
.to_screen(f
'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1619 'has already been recorded in the archive')
1620 if self
.params
.get('break_on_existing', False):
1621 raise ExistingVideoReached
1623 return self
.__extract
_info
(url
, self
.get_info_extractor(key
), download
, extra_info
, process
)
1625 extractors_restricted
= self
.params
.get('allowed_extractors') not in (None, ['default'])
1626 self
.report_error(f
'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1627 tb
=False if extractors_restricted
else None)
1629 def _handle_extraction_exceptions(func
):
1630 @functools.wraps(func
)
1631 def wrapper(self
, *args
, **kwargs
):
1634 return func(self
, *args
, **kwargs
)
1635 except (CookieLoadError
, DownloadCancelled
, LazyList
.IndexError, PagedList
.IndexError):
1637 except ReExtractInfo
as e
:
1639 self
.to_screen(f
'{e}; Re-extracting data')
1641 self
.to_stderr('\r')
1642 self
.report_warning(f
'{e}; Re-extracting data')
1644 except GeoRestrictedError
as e
:
1647 msg
+= '\nThis video is available in {}.'.format(', '.join(
1648 map(ISO3166Utils
.short2full
, e
.countries
)))
1649 msg
+= '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1650 self
.report_error(msg
)
1651 except ExtractorError
as e
: # An error we somewhat expected
1652 self
.report_error(str(e
), e
.format_traceback())
1653 except Exception as e
:
1654 if self
.params
.get('ignoreerrors'):
1655 self
.report_error(str(e
), tb
=encode_compat_str(traceback
.format_exc()))
1661 def _wait_for_video(self
, ie_result
={}):
1662 if (not self
.params
.get('wait_for_video')
1663 or ie_result
.get('_type', 'video') != 'video'
1664 or ie_result
.get('formats') or ie_result
.get('url')):
1667 format_dur
= lambda dur
: '%02d:%02d:%02d' % timetuple_from_msec(dur
* 1000)[:-1]
1672 full_msg
= f
'{msg}\n'
1673 if not self
.params
.get('noprogress'):
1674 full_msg
= msg
+ ' ' * (len(last_msg
) - len(msg
)) + '\r'
1677 self
.to_screen(full_msg
, skip_eol
=True)
1680 min_wait
, max_wait
= self
.params
.get('wait_for_video')
1681 diff
= try_get(ie_result
, lambda x
: x
['release_timestamp'] - time
.time())
1682 if diff
is None and ie_result
.get('live_status') == 'is_upcoming':
1683 diff
= round(random
.uniform(min_wait
, max_wait
) if (max_wait
and min_wait
) else (max_wait
or min_wait
), 0)
1684 self
.report_warning('Release time of video is not known')
1685 elif ie_result
and (diff
or 0) <= 0:
1686 self
.report_warning('Video should already be available according to extracted info')
1687 diff
= min(max(diff
or 0, min_wait
or 0), max_wait
or float('inf'))
1688 self
.to_screen(f
'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1690 wait_till
= time
.time() + diff
1693 diff
= wait_till
- time
.time()
1696 raise ReExtractInfo('[wait] Wait period ended', expected
=True)
1697 progress(f
'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1699 except KeyboardInterrupt:
1701 raise ReExtractInfo('[wait] Interrupted by user', expected
=True)
1702 except BaseException
as e
:
1703 if not isinstance(e
, ReExtractInfo
):
1707 def _load_cookies(self
, data
, *, autoscope
=True):
1708 """Loads cookies from a `Cookie` header
1710 This tries to work around the security vulnerability of passing cookies to every domain.
1711 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1713 @param data The Cookie header as string to load the cookies from
1714 @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1715 If `True`, save cookies for later to be stored in the jar with a limited scope
1716 If a URL, save cookies in the jar with the domain of the URL
1718 for cookie
in LenientSimpleCookie(data
).values():
1719 if autoscope
and any(cookie
.values()):
1720 raise ValueError('Invalid syntax in Cookie Header')
1722 domain
= cookie
.get('domain') or ''
1723 expiry
= cookie
.get('expires')
1724 if expiry
== '': # 0 is valid
1726 prepared_cookie
= http
.cookiejar
.Cookie(
1727 cookie
.get('version') or 0, cookie
.key
, cookie
.value
, None, False,
1728 domain
, True, True, cookie
.get('path') or '', bool(cookie
.get('path')),
1729 cookie
.get('secure') or False, expiry
, False, None, None, {})
1732 self
.cookiejar
.set_cookie(prepared_cookie
)
1733 elif autoscope
is True:
1734 self
.deprecated_feature(
1735 'Passing cookies as a header is a potential security risk; '
1736 'they will be scoped to the domain of the downloaded urls. '
1737 'Please consider loading cookies from a file or browser instead.')
1738 self
.__header
_cookies
.append(prepared_cookie
)
1740 self
.report_warning(
1741 'The extractor result contains an unscoped cookie as an HTTP header. '
1742 f
'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1744 self
._apply
_header
_cookies
(autoscope
, [prepared_cookie
])
1746 self
.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1747 tb
=False, is_error
=False)
1749 def _apply_header_cookies(self
, url
, cookies
=None):
1750 """Applies stray header cookies to the provided url
1752 This loads header cookies and scopes them to the domain provided in `url`.
1753 While this is not ideal, it helps reduce the risk of them being sent
1754 to an unintended destination while mostly maintaining compatibility.
1756 parsed
= urllib
.parse
.urlparse(url
)
1757 if not parsed
.hostname
:
1760 for cookie
in map(copy
.copy
, cookies
or self
.__header
_cookies
):
1761 cookie
.domain
= f
'.{parsed.hostname}'
1762 self
.cookiejar
.set_cookie(cookie
)
1764 @_handle_extraction_exceptions
1765 def __extract_info(self
, url
, ie
, download
, extra_info
, process
):
1766 self
._apply
_header
_cookies
(url
)
1769 ie_result
= ie
.extract(url
)
1770 except UserNotLive
as e
:
1772 if self
.params
.get('wait_for_video'):
1773 self
.report_warning(e
)
1774 self
._wait
_for
_video
()
1776 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1777 self
.report_warning(f
'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1779 if isinstance(ie_result
, list):
1780 # Backwards compatibility: old IE result format
1782 '_type': 'compat_list',
1783 'entries': ie_result
,
1785 if extra_info
.get('original_url'):
1786 ie_result
.setdefault('original_url', extra_info
['original_url'])
1787 self
.add_default_extra_info(ie_result
, ie
, url
)
1789 self
._wait
_for
_video
(ie_result
)
1790 return self
.process_ie_result(ie_result
, download
, extra_info
)
1794 def add_default_extra_info(self
, ie_result
, ie
, url
):
1796 self
.add_extra_info(ie_result
, {
1798 'original_url': url
,
1800 webpage_url
= ie_result
.get('webpage_url')
1802 self
.add_extra_info(ie_result
, {
1803 'webpage_url_basename': url_basename(webpage_url
),
1804 'webpage_url_domain': get_domain(webpage_url
),
1807 self
.add_extra_info(ie_result
, {
1808 'extractor': ie
.IE_NAME
,
1809 'extractor_key': ie
.ie_key(),
1812 def process_ie_result(self
, ie_result
, download
=True, extra_info
=None):
1814 Take the result of the ie(may be modified) and resolve all unresolved
1815 references (URLs, playlist items).
1817 It will also download the videos if 'download'.
1818 Returns the resolved ie_result.
1820 if extra_info
is None:
1822 result_type
= ie_result
.get('_type', 'video')
1824 if result_type
in ('url', 'url_transparent'):
1825 ie_result
['url'] = sanitize_url(
1826 ie_result
['url'], scheme
='http' if self
.params
.get('prefer_insecure') else 'https')
1827 if ie_result
.get('original_url') and not extra_info
.get('original_url'):
1828 extra_info
= {'original_url': ie_result
['original_url'], **extra_info
}
1830 extract_flat
= self
.params
.get('extract_flat', False)
1831 if ((extract_flat
== 'in_playlist' and 'playlist' in extra_info
)
1832 or extract_flat
is True):
1833 info_copy
= ie_result
.copy()
1834 ie
= try_get(ie_result
.get('ie_key'), self
.get_info_extractor
)
1835 if ie
and not ie_result
.get('id'):
1836 info_copy
['id'] = ie
.get_temp_id(ie_result
['url'])
1837 self
.add_default_extra_info(info_copy
, ie
, ie_result
['url'])
1838 self
.add_extra_info(info_copy
, extra_info
)
1839 info_copy
, _
= self
.pre_process(info_copy
)
1840 self
._fill
_common
_fields
(info_copy
, False)
1841 self
.__forced
_printings
(info_copy
)
1842 self
._raise
_pending
_errors
(info_copy
)
1843 if self
.params
.get('force_write_download_archive', False):
1844 self
.record_download_archive(info_copy
)
1847 if result_type
== 'video':
1848 self
.add_extra_info(ie_result
, extra_info
)
1849 ie_result
= self
.process_video_result(ie_result
, download
=download
)
1850 self
._raise
_pending
_errors
(ie_result
)
1851 additional_urls
= (ie_result
or {}).get('additional_urls')
1853 # TODO: Improve MetadataParserPP to allow setting a list
1854 if isinstance(additional_urls
, str):
1855 additional_urls
= [additional_urls
]
1857 '[info] {}: {} additional URL(s) requested'.format(ie_result
['id'], len(additional_urls
)))
1858 self
.write_debug('Additional URLs: "{}"'.format('", "'.join(additional_urls
)))
1859 ie_result
['additional_entries'] = [
1861 url
, download
, extra_info
=extra_info
,
1862 force_generic_extractor
=self
.params
.get('force_generic_extractor'))
1863 for url
in additional_urls
1866 elif result_type
== 'url':
1867 # We have to add extra_info to the results because it may be
1868 # contained in a playlist
1869 return self
.extract_info(
1870 ie_result
['url'], download
,
1871 ie_key
=ie_result
.get('ie_key'),
1872 extra_info
=extra_info
)
1873 elif result_type
== 'url_transparent':
1874 # Use the information from the embedding page
1875 info
= self
.extract_info(
1876 ie_result
['url'], ie_key
=ie_result
.get('ie_key'),
1877 extra_info
=extra_info
, download
=False, process
=False)
1879 # extract_info may return None when ignoreerrors is enabled and
1880 # extraction failed with an error, don't crash and return early
1885 exempted_fields
= {'_type', 'url', 'ie_key'}
1886 if not ie_result
.get('section_end') and ie_result
.get('section_start') is None:
1887 # For video clips, the id etc of the clip extractor should be used
1888 exempted_fields |
= {'id', 'extractor', 'extractor_key'}
1890 new_result
= info
.copy()
1891 new_result
.update(filter_dict(ie_result
, lambda k
, v
: v
is not None and k
not in exempted_fields
))
1893 # Extracted info may not be a video result (i.e.
1894 # info.get('_type', 'video') != video) but rather an url or
1895 # url_transparent. In such cases outer metadata (from ie_result)
1896 # should be propagated to inner one (info). For this to happen
1897 # _type of info should be overridden with url_transparent. This
1898 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1899 if new_result
.get('_type') == 'url':
1900 new_result
['_type'] = 'url_transparent'
1902 return self
.process_ie_result(
1903 new_result
, download
=download
, extra_info
=extra_info
)
1904 elif result_type
in ('playlist', 'multi_video'):
1905 # Protect from infinite recursion due to recursively nested playlists
1906 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1907 webpage_url
= ie_result
.get('webpage_url') # Playlists maynot have webpage_url
1908 if webpage_url
and webpage_url
in self
._playlist
_urls
:
1910 '[download] Skipping already downloaded playlist: {}'.format(
1911 ie_result
.get('title')) or ie_result
.get('id'))
1914 self
._playlist
_level
+= 1
1915 self
._playlist
_urls
.add(webpage_url
)
1916 self
._fill
_common
_fields
(ie_result
, False)
1917 self
._sanitize
_thumbnails
(ie_result
)
1919 return self
.__process
_playlist
(ie_result
, download
)
1921 self
._playlist
_level
-= 1
1922 if not self
._playlist
_level
:
1923 self
._playlist
_urls
.clear()
1924 elif result_type
== 'compat_list':
1925 self
.report_warning(
1926 'Extractor {} returned a compat_list result. '
1927 'It needs to be updated.'.format(ie_result
.get('extractor')))
1930 self
.add_extra_info(r
, {
1931 'extractor': ie_result
['extractor'],
1932 'webpage_url': ie_result
['webpage_url'],
1933 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1934 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1935 'extractor_key': ie_result
['extractor_key'],
1938 ie_result
['entries'] = [
1939 self
.process_ie_result(_fixup(r
), download
, extra_info
)
1940 for r
in ie_result
['entries']
1944 raise Exception(f
'Invalid result type: {result_type}')
1946 def _ensure_dir_exists(self
, path
):
1947 return make_dir(path
, self
.report_error
)
1950 def _playlist_infodict(ie_result
, strict
=False, **kwargs
):
1952 'playlist_count': ie_result
.get('playlist_count'),
1953 'playlist': ie_result
.get('title') or ie_result
.get('id'),
1954 'playlist_id': ie_result
.get('id'),
1955 'playlist_title': ie_result
.get('title'),
1956 'playlist_uploader': ie_result
.get('uploader'),
1957 'playlist_uploader_id': ie_result
.get('uploader_id'),
1958 'playlist_channel': ie_result
.get('channel'),
1959 'playlist_channel_id': ie_result
.get('channel_id'),
1960 'playlist_webpage_url': ie_result
.get('webpage_url'),
1965 if ie_result
.get('webpage_url'):
1967 'webpage_url': ie_result
['webpage_url'],
1968 'webpage_url_basename': url_basename(ie_result
['webpage_url']),
1969 'webpage_url_domain': get_domain(ie_result
['webpage_url']),
1973 'playlist_index': 0,
1974 '__last_playlist_index': max(ie_result
.get('requested_entries') or (0, 0)),
1975 'extractor': ie_result
['extractor'],
1976 'extractor_key': ie_result
['extractor_key'],
1979 def __process_playlist(self
, ie_result
, download
):
1980 """Process each entry in the playlist"""
1981 assert ie_result
['_type'] in ('playlist', 'multi_video')
1983 common_info
= self
._playlist
_infodict
(ie_result
, strict
=True)
1984 title
= common_info
.get('playlist') or '<Untitled>'
1985 if self
._match
_entry
(common_info
, incomplete
=True) is not None:
1987 self
.to_screen(f
'[download] Downloading {ie_result["_type"]}: {title}')
1989 all_entries
= PlaylistEntries(self
, ie_result
)
1990 entries
= orderedSet(all_entries
.get_requested_items(), lazy
=True)
1992 lazy
= self
.params
.get('lazy_playlist')
1994 resolved_entries
, n_entries
= [], 'N/A'
1995 ie_result
['requested_entries'], ie_result
['entries'] = None, None
1997 entries
= resolved_entries
= list(entries
)
1998 n_entries
= len(resolved_entries
)
1999 ie_result
['requested_entries'], ie_result
['entries'] = tuple(zip(*resolved_entries
)) or ([], [])
2000 if not ie_result
.get('playlist_count'):
2001 # Better to do this after potentially exhausting entries
2002 ie_result
['playlist_count'] = all_entries
.get_full_count()
2004 extra
= self
._playlist
_infodict
(ie_result
, n_entries
=int_or_none(n_entries
))
2005 ie_copy
= collections
.ChainMap(ie_result
, extra
)
2007 _infojson_written
= False
2008 write_playlist_files
= self
.params
.get('allow_playlist_files', True)
2009 if write_playlist_files
and self
.params
.get('list_thumbnails'):
2010 self
.list_thumbnails(ie_result
)
2011 if write_playlist_files
and not self
.params
.get('simulate'):
2012 _infojson_written
= self
._write
_info
_json
(
2013 'playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_infojson'))
2014 if _infojson_written
is None:
2016 if self
._write
_description
('playlist', ie_result
,
2017 self
.prepare_filename(ie_copy
, 'pl_description')) is None:
2019 # TODO: This should be passed to ThumbnailsConvertor if necessary
2020 self
._write
_thumbnails
('playlist', ie_result
, self
.prepare_filename(ie_copy
, 'pl_thumbnail'))
2023 if self
.params
.get('playlistreverse') or self
.params
.get('playlistrandom'):
2024 self
.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once
=True)
2025 elif self
.params
.get('playlistreverse'):
2027 elif self
.params
.get('playlistrandom'):
2028 random
.shuffle(entries
)
2030 self
.to_screen(f
'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
2031 f
'{format_field(ie_result, "playlist_count", " of %s")}')
2033 keep_resolved_entries
= self
.params
.get('extract_flat') != 'discard'
2034 if self
.params
.get('extract_flat') == 'discard_in_playlist':
2035 keep_resolved_entries
= ie_result
['_type'] != 'playlist'
2036 if keep_resolved_entries
:
2037 self
.write_debug('The information of all playlist entries will be held in memory')
2040 max_failures
= self
.params
.get('skip_playlist_after_errors') or float('inf')
2041 for i
, (playlist_index
, entry
) in enumerate(entries
):
2043 resolved_entries
.append((playlist_index
, entry
))
2047 entry
['__x_forwarded_for_ip'] = ie_result
.get('__x_forwarded_for_ip')
2048 if not lazy
and 'playlist-index' in self
.params
['compat_opts']:
2049 playlist_index
= ie_result
['requested_entries'][i
]
2051 entry_copy
= collections
.ChainMap(entry
, {
2053 'n_entries': int_or_none(n_entries
),
2054 'playlist_index': playlist_index
,
2055 'playlist_autonumber': i
+ 1,
2058 if self
._match
_entry
(entry_copy
, incomplete
=True) is not None:
2059 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
2060 resolved_entries
[i
] = (playlist_index
, NO_DEFAULT
)
2064 f
'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} '
2065 f
'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}')
2067 entry_result
= self
.__process
_iterable
_entry
(entry
, download
, collections
.ChainMap({
2068 'playlist_index': playlist_index
,
2069 'playlist_autonumber': i
+ 1,
2071 if not entry_result
:
2073 if failures
>= max_failures
:
2075 f
'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2077 if keep_resolved_entries
:
2078 resolved_entries
[i
] = (playlist_index
, entry_result
)
2080 # Update with processed data
2081 ie_result
['entries'] = [e
for _
, e
in resolved_entries
if e
is not NO_DEFAULT
]
2082 ie_result
['requested_entries'] = [i
for i
, e
in resolved_entries
if e
is not NO_DEFAULT
]
2083 if ie_result
['requested_entries'] == try_call(lambda: list(range(1, ie_result
['playlist_count'] + 1))):
2084 # Do not set for full playlist
2085 ie_result
.pop('requested_entries')
2087 # Write the updated info to json
2088 if _infojson_written
is True and self
._write
_info
_json
(
2089 'updated playlist', ie_result
,
2090 self
.prepare_filename(ie_copy
, 'pl_infojson'), overwrite
=True) is None:
2093 ie_result
= self
.run_all_pps('playlist', ie_result
)
2094 self
.to_screen(f
'[download] Finished downloading playlist: {title}')
2097 @_handle_extraction_exceptions
2098 def __process_iterable_entry(self
, entry
, download
, extra_info
):
2099 return self
.process_ie_result(
2100 entry
, download
=download
, extra_info
=extra_info
)
2102 def _build_format_filter(self
, filter_spec
):
2103 " Returns a function to filter the formats according to the filter_spec "
2113 operator_rex
= re
.compile(r
'''(?x)\s*
2115 (?P<op>{})(?P<none_inclusive>\s*\?)?\s*
2116 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2117 '''.format('|'.join(map(re
.escape
, OPERATORS
.keys()))))
2118 m
= operator_rex
.fullmatch(filter_spec
)
2121 comparison_value
= int(m
.group('value'))
2123 comparison_value
= parse_filesize(m
.group('value'))
2124 if comparison_value
is None:
2125 comparison_value
= parse_filesize(m
.group('value') + 'B')
2126 if comparison_value
is None:
2128 'Invalid value {!r} in format specification {!r}'.format(
2129 m
.group('value'), filter_spec
))
2130 op
= OPERATORS
[m
.group('op')]
2135 '^=': lambda attr
, value
: attr
.startswith(value
),
2136 '$=': lambda attr
, value
: attr
.endswith(value
),
2137 '*=': lambda attr
, value
: value
in attr
,
2138 '~=': lambda attr
, value
: value
.search(attr
) is not None,
2140 str_operator_rex
= re
.compile(r
'''(?x)\s*
2141 (?P<key>[a-zA-Z0-9._-]+)\s*
2142 (?P<negation>!\s*)?(?P<op>{})\s*(?P<none_inclusive>\?\s*)?
2144 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2145 (?(quote)(?P=quote))\s*
2146 '''.format('|'.join(map(re
.escape
, STR_OPERATORS
.keys()))))
2147 m
= str_operator_rex
.fullmatch(filter_spec
)
2149 if m
.group('op') == '~=':
2150 comparison_value
= re
.compile(m
.group('value'))
2152 comparison_value
= re
.sub(r
'''\\([\\"'])''', r
'\1', m
.group('value'))
2153 str_op
= STR_OPERATORS
[m
.group('op')]
2154 if m
.group('negation'):
2155 op
= lambda attr
, value
: not str_op(attr
, value
)
2160 raise SyntaxError(f
'Invalid filter specification {filter_spec!r}')
2163 actual_value
= f
.get(m
.group('key'))
2164 if actual_value
is None:
2165 return m
.group('none_inclusive')
2166 return op(actual_value
, comparison_value
)
2169 def _check_formats(self
, formats
):
2171 working
= f
.get('__working')
2172 if working
is not None:
2176 self
.to_screen('[info] Testing format {}'.format(f
['format_id']))
2177 path
= self
.get_output_path('temp')
2178 if not self
._ensure
_dir
_exists
(f
'{path}/'):
2180 temp_file
= tempfile
.NamedTemporaryFile(suffix
='.tmp', delete
=False, dir=path
or None)
2183 success
, _
= self
.dl(temp_file
.name
, f
, test
=True)
2184 except (DownloadError
, OSError, ValueError, *network_exceptions
):
2187 if os
.path
.exists(temp_file
.name
):
2189 os
.remove(temp_file
.name
)
2191 self
.report_warning(f
'Unable to delete temporary file "{temp_file.name}"')
2192 f
['__working'] = success
2196 self
.to_screen('[info] Unable to download format {}. Skipping...'.format(f
['format_id']))
2198 def _select_formats(self
, formats
, selector
):
2199 return list(selector({
2201 'has_merged_format': any('none' not in (f
.get('acodec'), f
.get('vcodec')) for f
in formats
),
2202 'incomplete_formats': (all(f
.get('vcodec') == 'none' for f
in formats
) # No formats with video
2203 or all(f
.get('acodec') == 'none' for f
in formats
)), # OR, No formats with audio
2206 def _default_format_spec(self
, info_dict
):
2208 self
.params
['outtmpl']['default'] == '-'
2209 or (info_dict
.get('is_live') and not self
.params
.get('live_from_start')))
2212 merger
= FFmpegMergerPP(self
)
2213 return merger
.available
and merger
.can_merge()
2215 if not prefer_best
and not can_merge():
2217 formats
= self
._get
_formats
(info_dict
)
2218 evaluate_formats
= lambda spec
: self
._select
_formats
(formats
, self
.build_format_selector(spec
))
2219 if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
2220 self
.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
2221 'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')
2223 compat
= (self
.params
.get('allow_multiple_audio_streams')
2224 or 'format-spec' in self
.params
['compat_opts'])
2226 return ('best/bestvideo+bestaudio' if prefer_best
2227 else 'bestvideo+bestaudio/best' if compat
2228 else 'bestvideo*+bestaudio/best')
2230 def build_format_selector(self
, format_spec
):
2231 def syntax_error(note
, start
):
2233 'Invalid format specification: '
2234 '{}\n\t{}\n\t{}^'.format(note
, format_spec
, ' ' * start
[1]))
2235 return SyntaxError(message
)
2237 PICKFIRST
= 'PICKFIRST'
2241 FormatSelector
= collections
.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2243 allow_multiple_streams
= {'audio': self
.params
.get('allow_multiple_audio_streams', False),
2244 'video': self
.params
.get('allow_multiple_video_streams', False)}
2246 def _parse_filter(tokens
):
2248 for type_
, string_
, _start
, _
, _
in tokens
:
2249 if type_
== tokenize
.OP
and string_
== ']':
2250 return ''.join(filter_parts
)
2252 filter_parts
.append(string_
)
2254 def _remove_unused_ops(tokens
):
2255 # Remove operators that we don't use and join them with the surrounding strings.
2256 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2257 ALLOWED_OPS
= ('/', '+', ',', '(', ')')
2258 last_string
, last_start
, last_end
, last_line
= None, None, None, None
2259 for type_
, string_
, start
, end
, line
in tokens
:
2260 if type_
== tokenize
.OP
and string_
== '[':
2262 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2264 yield type_
, string_
, start
, end
, line
2265 # everything inside brackets will be handled by _parse_filter
2266 for type_
, string_
, start
, end
, line
in tokens
:
2267 yield type_
, string_
, start
, end
, line
2268 if type_
== tokenize
.OP
and string_
== ']':
2270 elif type_
== tokenize
.OP
and string_
in ALLOWED_OPS
:
2272 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2274 yield type_
, string_
, start
, end
, line
2275 elif type_
in [tokenize
.NAME
, tokenize
.NUMBER
, tokenize
.OP
]:
2277 last_string
= string_
2281 last_string
+= string_
2283 yield tokenize
.NAME
, last_string
, last_start
, last_end
, last_line
2285 def _parse_format_selection(tokens
, inside_merge
=False, inside_choice
=False, inside_group
=False):
2287 current_selector
= None
2288 for type_
, string_
, start
, _
, _
in tokens
:
2289 # ENCODING is only defined in Python 3.x
2290 if type_
== getattr(tokenize
, 'ENCODING', None):
2292 elif type_
in [tokenize
.NAME
, tokenize
.NUMBER
]:
2293 current_selector
= FormatSelector(SINGLE
, string_
, [])
2294 elif type_
== tokenize
.OP
:
2296 if not inside_group
:
2297 # ')' will be handled by the parentheses group
2298 tokens
.restore_last_token()
2300 elif inside_merge
and string_
in ['/', ',']:
2301 tokens
.restore_last_token()
2303 elif inside_choice
and string_
== ',':
2304 tokens
.restore_last_token()
2306 elif string_
== ',':
2307 if not current_selector
:
2308 raise syntax_error('"," must follow a format selector', start
)
2309 selectors
.append(current_selector
)
2310 current_selector
= None
2311 elif string_
== '/':
2312 if not current_selector
:
2313 raise syntax_error('"/" must follow a format selector', start
)
2314 first_choice
= current_selector
2315 second_choice
= _parse_format_selection(tokens
, inside_choice
=True)
2316 current_selector
= FormatSelector(PICKFIRST
, (first_choice
, second_choice
), [])
2317 elif string_
== '[':
2318 if not current_selector
:
2319 current_selector
= FormatSelector(SINGLE
, 'best', [])
2320 format_filter
= _parse_filter(tokens
)
2321 current_selector
.filters
.append(format_filter
)
2322 elif string_
== '(':
2323 if current_selector
:
2324 raise syntax_error('Unexpected "("', start
)
2325 group
= _parse_format_selection(tokens
, inside_group
=True)
2326 current_selector
= FormatSelector(GROUP
, group
, [])
2327 elif string_
== '+':
2328 if not current_selector
:
2329 raise syntax_error('Unexpected "+"', start
)
2330 selector_1
= current_selector
2331 selector_2
= _parse_format_selection(tokens
, inside_merge
=True)
2333 raise syntax_error('Expected a selector', start
)
2334 current_selector
= FormatSelector(MERGE
, (selector_1
, selector_2
), [])
2336 raise syntax_error(f
'Operator not recognized: "{string_}"', start
)
2337 elif type_
== tokenize
.ENDMARKER
:
2339 if current_selector
:
2340 selectors
.append(current_selector
)
2343 def _merge(formats_pair
):
2344 format_1
, format_2
= formats_pair
2347 formats_info
.extend(format_1
.get('requested_formats', (format_1
,)))
2348 formats_info
.extend(format_2
.get('requested_formats', (format_2
,)))
2350 if not allow_multiple_streams
['video'] or not allow_multiple_streams
['audio']:
2351 get_no_more
= {'video': False, 'audio': False}
2352 for (i
, fmt_info
) in enumerate(formats_info
):
2353 if fmt_info
.get('acodec') == fmt_info
.get('vcodec') == 'none':
2356 for aud_vid
in ['audio', 'video']:
2357 if not allow_multiple_streams
[aud_vid
] and fmt_info
.get(aud_vid
[0] + 'codec') != 'none':
2358 if get_no_more
[aud_vid
]:
2361 get_no_more
[aud_vid
] = True
2363 if len(formats_info
) == 1:
2364 return formats_info
[0]
2366 video_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('vcodec') != 'none']
2367 audio_fmts
= [fmt_info
for fmt_info
in formats_info
if fmt_info
.get('acodec') != 'none']
2369 the_only_video
= video_fmts
[0] if len(video_fmts
) == 1 else None
2370 the_only_audio
= audio_fmts
[0] if len(audio_fmts
) == 1 else None
2372 output_ext
= get_compatible_ext(
2373 vcodecs
=[f
.get('vcodec') for f
in video_fmts
],
2374 acodecs
=[f
.get('acodec') for f
in audio_fmts
],
2375 vexts
=[f
['ext'] for f
in video_fmts
],
2376 aexts
=[f
['ext'] for f
in audio_fmts
],
2377 preferences
=(try_call(lambda: self
.params
['merge_output_format'].split('/'))
2378 or (self
.params
.get('prefer_free_formats') and ('webm', 'mkv'))))
2380 filtered
= lambda *keys
: filter(None, (traverse_obj(fmt
, *keys
) for fmt
in formats_info
))
2383 'requested_formats': formats_info
,
2384 'format': '+'.join(filtered('format')),
2385 'format_id': '+'.join(filtered('format_id')),
2387 'protocol': '+'.join(map(determine_protocol
, formats_info
)),
2388 'language': '+'.join(orderedSet(filtered('language'))) or None,
2389 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2390 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2391 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2396 'width': the_only_video
.get('width'),
2397 'height': the_only_video
.get('height'),
2398 'resolution': the_only_video
.get('resolution') or self
.format_resolution(the_only_video
),
2399 'fps': the_only_video
.get('fps'),
2400 'dynamic_range': the_only_video
.get('dynamic_range'),
2401 'vcodec': the_only_video
.get('vcodec'),
2402 'vbr': the_only_video
.get('vbr'),
2403 'stretched_ratio': the_only_video
.get('stretched_ratio'),
2404 'aspect_ratio': the_only_video
.get('aspect_ratio'),
2409 'acodec': the_only_audio
.get('acodec'),
2410 'abr': the_only_audio
.get('abr'),
2411 'asr': the_only_audio
.get('asr'),
2412 'audio_channels': the_only_audio
.get('audio_channels'),
2417 def _check_formats(formats
):
2418 if self
.params
.get('check_formats') == 'selected':
2419 yield from self
._check
_formats
(formats
)
2421 elif (self
.params
.get('check_formats') is not None
2422 or self
.params
.get('allow_unplayable_formats')):
2427 if f
.get('has_drm') or f
.get('__needs_testing'):
2428 yield from self
._check
_formats
([f
])
2432 def _build_selector_function(selector
):
2433 if isinstance(selector
, list): # ,
2434 fs
= [_build_selector_function(s
) for s
in selector
]
2436 def selector_function(ctx
):
2439 return selector_function
2441 elif selector
.type == GROUP
: # ()
2442 selector_function
= _build_selector_function(selector
.selector
)
2444 elif selector
.type == PICKFIRST
: # /
2445 fs
= [_build_selector_function(s
) for s
in selector
.selector
]
2447 def selector_function(ctx
):
2449 picked_formats
= list(f(ctx
))
2451 return picked_formats
2454 elif selector
.type == MERGE
: # +
2455 selector_1
, selector_2
= map(_build_selector_function
, selector
.selector
)
2457 def selector_function(ctx
):
2458 for pair
in itertools
.product(selector_1(ctx
), selector_2(ctx
)):
2461 elif selector
.type == SINGLE
: # atom
2462 format_spec
= selector
.selector
or 'best'
2464 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2465 if format_spec
== 'all':
2466 def selector_function(ctx
):
2467 yield from _check_formats(ctx
['formats'][::-1])
2468 elif format_spec
== 'mergeall':
2469 def selector_function(ctx
):
2470 formats
= list(_check_formats(
2471 f
for f
in ctx
['formats'] if f
.get('vcodec') != 'none' or f
.get('acodec') != 'none'))
2474 merged_format
= formats
[-1]
2475 for f
in formats
[-2::-1]:
2476 merged_format
= _merge((merged_format
, f
))
2480 format_fallback
, seperate_fallback
, format_reverse
, format_idx
= False, None, True, 1
2482 r
'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2484 if mobj
is not None:
2485 format_idx
= int_or_none(mobj
.group('n'), default
=1)
2486 format_reverse
= mobj
.group('bw')[0] == 'b'
2487 format_type
= (mobj
.group('type') or [None])[0]
2488 not_format_type
= {'v': 'a', 'a': 'v'}.get(format_type
)
2489 format_modified
= mobj
.group('mod') is not None
2491 format_fallback
= not format_type
and not format_modified
# for b, w
2493 (lambda f
: f
.get(f
'{format_type}codec') != 'none')
2494 if format_type
and format_modified
# bv*, ba*, wv*, wa*
2495 else (lambda f
: f
.get(f
'{not_format_type}codec') == 'none')
2496 if format_type
# bv, ba, wv, wa
2497 else (lambda f
: f
.get('vcodec') != 'none' and f
.get('acodec') != 'none')
2498 if not format_modified
# b, w
2499 else lambda f
: True) # b*, w*
2500 filter_f
= lambda f
: _filter_f(f
) and (
2501 f
.get('vcodec') != 'none' or f
.get('acodec') != 'none')
2503 if format_spec
in self
._format
_selection
_exts
['audio']:
2504 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none'
2505 elif format_spec
in self
._format
_selection
_exts
['video']:
2506 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') != 'none' and f
.get('vcodec') != 'none'
2507 seperate_fallback
= lambda f
: f
.get('ext') == format_spec
and f
.get('vcodec') != 'none'
2508 elif format_spec
in self
._format
_selection
_exts
['storyboards']:
2509 filter_f
= lambda f
: f
.get('ext') == format_spec
and f
.get('acodec') == 'none' and f
.get('vcodec') == 'none'
2511 filter_f
= lambda f
: f
.get('format_id') == format_spec
# id
2513 def selector_function(ctx
):
2514 formats
= list(ctx
['formats'])
2515 matches
= list(filter(filter_f
, formats
)) if filter_f
is not None else formats
2517 if format_fallback
and ctx
['incomplete_formats']:
2518 # for extractors with incomplete formats (audio only (soundcloud)
2519 # or video only (imgur)) best/worst will fallback to
2520 # best/worst {video,audio}-only format
2521 matches
= list(filter(lambda f
: f
.get('vcodec') != 'none' or f
.get('acodec') != 'none', formats
))
2522 elif seperate_fallback
and not ctx
['has_merged_format']:
2523 # for compatibility with youtube-dl when there is no pre-merged format
2524 matches
= list(filter(seperate_fallback
, formats
))
2525 matches
= LazyList(_check_formats(matches
[::-1 if format_reverse
else 1]))
2527 yield matches
[format_idx
- 1]
2528 except LazyList
.IndexError:
2531 filters
= [self
._build
_format
_filter
(f
) for f
in selector
.filters
]
2533 def final_selector(ctx
):
2534 ctx_copy
= dict(ctx
)
2535 for _filter
in filters
:
2536 ctx_copy
['formats'] = list(filter(_filter
, ctx_copy
['formats']))
2537 return selector_function(ctx_copy
)
2538 return final_selector
2540 # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
2541 # Prefix numbers with random letters to avoid it being classified as a number
2542 # See: https://github.com/yt-dlp/yt-dlp/pulls/8797
2543 # TODO: Implement parser not reliant on tokenize.tokenize
2544 prefix
= ''.join(random
.choices(string
.ascii_letters
, k
=32))
2545 stream
= io
.BytesIO(re
.sub(r
'\d[_\d]*', rf
'{prefix}\g<0>', format_spec
).encode())
2547 tokens
= list(_remove_unused_ops(
2548 token
._replace
(string
=token
.string
.replace(prefix
, ''))
2549 for token
in tokenize
.tokenize(stream
.readline
)))
2550 except tokenize
.TokenError
:
2551 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec
)))
2553 class TokenIterator
:
2554 def __init__(self
, tokens
):
2555 self
.tokens
= tokens
2562 if self
.counter
>= len(self
.tokens
):
2564 value
= self
.tokens
[self
.counter
]
2570 def restore_last_token(self
):
2573 parsed_selector
= _parse_format_selection(iter(TokenIterator(tokens
)))
2574 return _build_selector_function(parsed_selector
)
2576 def _calc_headers(self
, info_dict
, load_cookies
=False):
2577 res
= HTTPHeaderDict(self
.params
['http_headers'], info_dict
.get('http_headers'))
2580 if load_cookies
: # For --load-info-json
2581 self
._load
_cookies
(res
.get('Cookie'), autoscope
=info_dict
['url']) # compat
2582 self
._load
_cookies
(info_dict
.get('cookies'), autoscope
=False)
2583 # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2584 # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2585 res
.pop('Cookie', None)
2586 cookies
= self
.cookiejar
.get_cookies_for_url(info_dict
['url'])
2588 encoder
= LenientSimpleCookie()
2590 for cookie
in cookies
:
2591 _
, value
= encoder
.value_encode(cookie
.value
)
2592 values
.append(f
'{cookie.name}={value}')
2594 values
.append(f
'Domain={cookie.domain}')
2596 values
.append(f
'Path={cookie.path}')
2598 values
.append('Secure')
2600 values
.append(f
'Expires={cookie.expires}')
2602 values
.append(f
'Version={cookie.version}')
2603 info_dict
['cookies'] = '; '.join(values
)
2605 if 'X-Forwarded-For' not in res
:
2606 x_forwarded_for_ip
= info_dict
.get('__x_forwarded_for_ip')
2607 if x_forwarded_for_ip
:
2608 res
['X-Forwarded-For'] = x_forwarded_for_ip
2612 def _calc_cookies(self
, url
):
2613 self
.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2614 return self
.cookiejar
.get_cookie_header(url
)
2616 def _sort_thumbnails(self
, thumbnails
):
2617 thumbnails
.sort(key
=lambda t
: (
2618 t
.get('preference') if t
.get('preference') is not None else -1,
2619 t
.get('width') if t
.get('width') is not None else -1,
2620 t
.get('height') if t
.get('height') is not None else -1,
2621 t
.get('id') if t
.get('id') is not None else '',
2624 def _sanitize_thumbnails(self
, info_dict
):
2625 thumbnails
= info_dict
.get('thumbnails')
2626 if thumbnails
is None:
2627 thumbnail
= info_dict
.get('thumbnail')
2629 info_dict
['thumbnails'] = thumbnails
= [{'url': thumbnail
}]
2633 def check_thumbnails(thumbnails
):
2634 for t
in thumbnails
:
2635 self
.to_screen(f
'[info] Testing thumbnail {t["id"]}')
2637 self
.urlopen(HEADRequest(t
['url']))
2638 except network_exceptions
as err
:
2639 self
.to_screen(f
'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2643 self
._sort
_thumbnails
(thumbnails
)
2644 for i
, t
in enumerate(thumbnails
):
2645 if t
.get('id') is None:
2647 if t
.get('width') and t
.get('height'):
2648 t
['resolution'] = '%dx%d' % (t
['width'], t
['height'])
2649 t
['url'] = sanitize_url(t
['url'])
2651 if self
.params
.get('check_formats') is True:
2652 info_dict
['thumbnails'] = LazyList(check_thumbnails(thumbnails
[::-1]), reverse
=True)
2654 info_dict
['thumbnails'] = thumbnails
2656 def _fill_common_fields(self
, info_dict
, final
=True):
2657 # TODO: move sanitization here
2659 title
= info_dict
['fulltitle'] = info_dict
.get('title')
2662 self
.write_debug('Extractor gave empty title. Creating a generic title')
2664 self
.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2665 info_dict
['title'] = f
'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2667 if info_dict
.get('duration') is not None:
2668 info_dict
['duration_string'] = formatSeconds(info_dict
['duration'])
2670 for ts_key
, date_key
in (
2671 ('timestamp', 'upload_date'),
2672 ('release_timestamp', 'release_date'),
2673 ('modified_timestamp', 'modified_date'),
2675 if info_dict
.get(date_key
) is None and info_dict
.get(ts_key
) is not None:
2676 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2677 # see http://bugs.python.org/issue1646728)
2678 with contextlib
.suppress(ValueError, OverflowError, OSError):
2679 upload_date
= dt
.datetime
.fromtimestamp(info_dict
[ts_key
], dt
.timezone
.utc
)
2680 info_dict
[date_key
] = upload_date
.strftime('%Y%m%d')
2682 if not info_dict
.get('release_year'):
2683 info_dict
['release_year'] = traverse_obj(info_dict
, ('release_date', {lambda x
: int(x
[:4])}))
2685 live_keys
= ('is_live', 'was_live')
2686 live_status
= info_dict
.get('live_status')
2687 if live_status
is None:
2688 for key
in live_keys
:
2689 if info_dict
.get(key
) is False:
2691 if info_dict
.get(key
):
2694 if all(info_dict
.get(key
) is False for key
in live_keys
):
2695 live_status
= 'not_live'
2697 info_dict
['live_status'] = live_status
2698 for key
in live_keys
:
2699 if info_dict
.get(key
) is None:
2700 info_dict
[key
] = (live_status
== key
)
2701 if live_status
== 'post_live':
2702 info_dict
['was_live'] = True
2704 # Auto generate title fields corresponding to the *_number fields when missing
2705 # in order to always have clean titles. This is very common for TV series.
2706 for field
in ('chapter', 'season', 'episode'):
2707 if final
and info_dict
.get(f
'{field}_number') is not None and not info_dict
.get(field
):
2708 info_dict
[field
] = '%s %d' % (field
.capitalize(), info_dict
[f
'{field}_number'])
2710 for old_key
, new_key
in self
._deprecated
_multivalue
_fields
.items():
2711 if new_key
in info_dict
and old_key
in info_dict
:
2712 if '_version' not in info_dict
: # HACK: Do not warn when using --load-info-json
2713 self
.deprecation_warning(f
'Do not return {old_key!r} when {new_key!r} is present')
2714 elif old_value
:= info_dict
.get(old_key
):
2715 info_dict
[new_key
] = old_value
.split(', ')
2716 elif new_value
:= info_dict
.get(new_key
):
2717 info_dict
[old_key
] = ', '.join(v
.replace(',', '\N{FULLWIDTH COMMA}') for v
in new_value
)
2719 def _raise_pending_errors(self
, info
):
2720 err
= info
.pop('__pending_error', None)
2722 self
.report_error(err
, tb
=False)
2724 def sort_formats(self
, info_dict
):
2725 formats
= self
._get
_formats
(info_dict
)
2726 formats
.sort(key
=FormatSorter(
2727 self
, info_dict
.get('_format_sort_fields') or []).calculate_preference
)
2729 def process_video_result(self
, info_dict
, download
=True):
2730 assert info_dict
.get('_type', 'video') == 'video'
2731 self
._num
_videos
+= 1
2733 if 'id' not in info_dict
:
2734 raise ExtractorError('Missing "id" field in extractor result', ie
=info_dict
['extractor'])
2735 elif not info_dict
.get('id'):
2736 raise ExtractorError('Extractor failed to obtain "id"', ie
=info_dict
['extractor'])
2738 def report_force_conversion(field
, field_not
, conversion
):
2739 self
.report_warning(
2740 f
'"{field}" field is not {field_not} - forcing {conversion} conversion, '
2741 'there is an error in extractor')
2743 def sanitize_string_field(info
, string_field
):
2744 field
= info
.get(string_field
)
2745 if field
is None or isinstance(field
, str):
2747 report_force_conversion(string_field
, 'a string', 'string')
2748 info
[string_field
] = str(field
)
2750 def sanitize_numeric_fields(info
):
2751 for numeric_field
in self
._NUMERIC
_FIELDS
:
2752 field
= info
.get(numeric_field
)
2753 if field
is None or isinstance(field
, (int, float)):
2755 report_force_conversion(numeric_field
, 'numeric', 'int')
2756 info
[numeric_field
] = int_or_none(field
)
2758 sanitize_string_field(info_dict
, 'id')
2759 sanitize_numeric_fields(info_dict
)
2760 if info_dict
.get('section_end') and info_dict
.get('section_start') is not None:
2761 info_dict
['duration'] = round(info_dict
['section_end'] - info_dict
['section_start'], 3)
2762 if (info_dict
.get('duration') or 0) <= 0 and info_dict
.pop('duration', None):
2763 self
.report_warning('"duration" field is negative, there is an error in extractor')
2765 chapters
= info_dict
.get('chapters') or []
2766 if chapters
and chapters
[0].get('start_time'):
2767 chapters
.insert(0, {'start_time': 0})
2769 dummy_chapter
= {'end_time': 0, 'start_time': info_dict
.get('duration')}
2770 for idx
, (prev
, current
, next_
) in enumerate(zip(
2771 (dummy_chapter
, *chapters
), chapters
, (*chapters
[1:], dummy_chapter
)), 1):
2772 if current
.get('start_time') is None:
2773 current
['start_time'] = prev
.get('end_time')
2774 if not current
.get('end_time'):
2775 current
['end_time'] = next_
.get('start_time')
2776 if not current
.get('title'):
2777 current
['title'] = f
'<Untitled Chapter {idx}>'
2779 if 'playlist' not in info_dict
:
2780 # It isn't part of a playlist
2781 info_dict
['playlist'] = None
2782 info_dict
['playlist_index'] = None
2784 self
._sanitize
_thumbnails
(info_dict
)
2786 thumbnail
= info_dict
.get('thumbnail')
2787 thumbnails
= info_dict
.get('thumbnails')
2789 info_dict
['thumbnail'] = sanitize_url(thumbnail
)
2791 info_dict
['thumbnail'] = thumbnails
[-1]['url']
2793 if info_dict
.get('display_id') is None and 'id' in info_dict
:
2794 info_dict
['display_id'] = info_dict
['id']
2796 self
._fill
_common
_fields
(info_dict
)
2798 for cc_kind
in ('subtitles', 'automatic_captions'):
2799 cc
= info_dict
.get(cc_kind
)
2801 for _
, subtitle
in cc
.items():
2802 for subtitle_format
in subtitle
:
2803 if subtitle_format
.get('url'):
2804 subtitle_format
['url'] = sanitize_url(subtitle_format
['url'])
2805 if subtitle_format
.get('ext') is None:
2806 subtitle_format
['ext'] = determine_ext(subtitle_format
['url']).lower()
2808 automatic_captions
= info_dict
.get('automatic_captions')
2809 subtitles
= info_dict
.get('subtitles')
2811 info_dict
['requested_subtitles'] = self
.process_subtitles(
2812 info_dict
['id'], subtitles
, automatic_captions
)
2814 formats
= self
._get
_formats
(info_dict
)
2816 # Backward compatibility with InfoExtractor._sort_formats
2817 field_preference
= (formats
or [{}])[0].pop('__sort_fields', None)
2818 if field_preference
:
2819 info_dict
['_format_sort_fields'] = field_preference
2821 info_dict
['_has_drm'] = any( # or None ensures --clean-infojson removes it
2822 f
.get('has_drm') and f
['has_drm'] != 'maybe' for f
in formats
) or None
2823 if not self
.params
.get('allow_unplayable_formats'):
2824 formats
= [f
for f
in formats
if not f
.get('has_drm') or f
['has_drm'] == 'maybe']
2826 if formats
and all(f
.get('acodec') == f
.get('vcodec') == 'none' for f
in formats
):
2827 self
.report_warning(
2828 f
'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2829 'only images are available for download. Use --list-formats to see them'.capitalize())
2831 get_from_start
= not info_dict
.get('is_live') or bool(self
.params
.get('live_from_start'))
2832 if not get_from_start
:
2833 info_dict
['title'] += ' ' + dt
.datetime
.now().strftime('%Y-%m-%d %H:%M')
2834 if info_dict
.get('is_live') and formats
:
2835 formats
= [f
for f
in formats
if bool(f
.get('is_from_start')) == get_from_start
]
2836 if get_from_start
and not formats
:
2837 self
.raise_no_formats(info_dict
, msg
=(
2838 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2839 'If you want to download from the current time, use --no-live-from-start'))
2841 def is_wellformed(f
):
2844 self
.report_warning(
2845 '"url" field is missing or empty - skipping format, '
2846 'there is an error in extractor')
2848 if isinstance(url
, bytes
):
2849 sanitize_string_field(f
, 'url')
2852 # Filter out malformed formats for better extraction robustness
2853 formats
= list(filter(is_wellformed
, formats
or []))
2856 self
.raise_no_formats(info_dict
)
2859 sanitize_string_field(fmt
, 'format_id')
2860 sanitize_numeric_fields(fmt
)
2861 fmt
['url'] = sanitize_url(fmt
['url'])
2862 FormatSorter
._fill
_sorting
_fields
(fmt
)
2863 if fmt
['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
2864 if fmt
.get('acodec') is None:
2865 fmt
['acodec'] = fmt
['ext']
2866 if fmt
.get('resolution') is None:
2867 fmt
['resolution'] = self
.format_resolution(fmt
, default
=None)
2868 if fmt
.get('dynamic_range') is None and fmt
.get('vcodec') != 'none':
2869 fmt
['dynamic_range'] = 'SDR'
2870 if fmt
.get('aspect_ratio') is None:
2871 fmt
['aspect_ratio'] = try_call(lambda: round(fmt
['width'] / fmt
['height'], 2))
2872 # For fragmented formats, "tbr" is often max bitrate and not average
2873 if (('manifest-filesize-approx' in self
.params
['compat_opts'] or not fmt
.get('manifest_url'))
2874 and not fmt
.get('filesize') and not fmt
.get('filesize_approx')):
2875 fmt
['filesize_approx'] = filesize_from_tbr(fmt
.get('tbr'), info_dict
.get('duration'))
2876 fmt
['http_headers'] = self
._calc
_headers
(collections
.ChainMap(fmt
, info_dict
), load_cookies
=True)
2878 # Safeguard against old/insecure infojson when using --load-info-json
2879 if info_dict
.get('http_headers'):
2880 info_dict
['http_headers'] = HTTPHeaderDict(info_dict
['http_headers'])
2881 info_dict
['http_headers'].pop('Cookie', None)
2883 # This is copied to http_headers by the above _calc_headers and can now be removed
2884 if '__x_forwarded_for_ip' in info_dict
:
2885 del info_dict
['__x_forwarded_for_ip']
2889 '_format_sort_fields': info_dict
.get('_format_sort_fields'),
2892 # Sanitize and group by format_id
2894 for i
, fmt
in enumerate(formats
):
2895 if not fmt
.get('format_id'):
2896 fmt
['format_id'] = str(i
)
2898 # Sanitize format_id from characters used in format selector expression
2899 fmt
['format_id'] = re
.sub(r
'[\s,/+\[\]()]', '_', fmt
['format_id'])
2900 formats_dict
.setdefault(fmt
['format_id'], []).append(fmt
)
2902 # Make sure all formats have unique format_id
2903 common_exts
= set(itertools
.chain(*self
._format
_selection
_exts
.values()))
2904 for format_id
, ambiguous_formats
in formats_dict
.items():
2905 ambigious_id
= len(ambiguous_formats
) > 1
2906 for i
, fmt
in enumerate(ambiguous_formats
):
2908 fmt
['format_id'] = f
'{format_id}-{i}'
2909 # Ensure there is no conflict between id and ext in format selection
2910 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2911 if fmt
['format_id'] != fmt
['ext'] and fmt
['format_id'] in common_exts
:
2912 fmt
['format_id'] = 'f{}'.format(fmt
['format_id'])
2914 if fmt
.get('format') is None:
2915 fmt
['format'] = '{id} - {res}{note}'.format(
2916 id=fmt
['format_id'],
2917 res
=self
.format_resolution(fmt
),
2918 note
=format_field(fmt
, 'format_note', ' (%s)'),
2921 if self
.params
.get('check_formats') is True:
2922 formats
= LazyList(self
._check
_formats
(formats
[::-1]), reverse
=True)
2924 if not formats
or formats
[0] is not info_dict
:
2925 # only set the 'formats' fields if the original info_dict list them
2926 # otherwise we end up with a circular reference, the first (and unique)
2927 # element in the 'formats' field in info_dict is info_dict itself,
2928 # which can't be exported to json
2929 info_dict
['formats'] = formats
2931 info_dict
, _
= self
.pre_process(info_dict
)
2933 if self
._match
_entry
(info_dict
, incomplete
=self
._format
_fields
) is not None:
2936 self
.post_extract(info_dict
)
2937 info_dict
, _
= self
.pre_process(info_dict
, 'after_filter')
2939 # The pre-processors may have modified the formats
2940 formats
= self
._get
_formats
(info_dict
)
2942 list_only
= self
.params
.get('simulate') == 'list_only'
2943 interactive_format_selection
= not list_only
and self
.format_selector
== '-'
2944 if self
.params
.get('list_thumbnails'):
2945 self
.list_thumbnails(info_dict
)
2946 if self
.params
.get('listsubtitles'):
2947 if 'automatic_captions' in info_dict
:
2948 self
.list_subtitles(
2949 info_dict
['id'], automatic_captions
, 'automatic captions')
2950 self
.list_subtitles(info_dict
['id'], subtitles
, 'subtitles')
2951 if self
.params
.get('listformats') or interactive_format_selection
:
2952 self
.list_formats(info_dict
)
2954 # Without this printing, -F --print-json will not work
2955 self
.__forced
_printings
(info_dict
)
2958 format_selector
= self
.format_selector
2960 if interactive_format_selection
:
2961 req_format
= input(self
._format
_screen
('\nEnter format selector ', self
.Styles
.EMPHASIS
)
2962 + '(Press ENTER for default, or Ctrl+C to quit)'
2963 + self
._format
_screen
(': ', self
.Styles
.EMPHASIS
))
2965 format_selector
= self
.build_format_selector(req_format
) if req_format
else None
2966 except SyntaxError as err
:
2967 self
.report_error(err
, tb
=False, is_error
=False)
2970 if format_selector
is None:
2971 req_format
= self
._default
_format
_spec
(info_dict
)
2972 self
.write_debug(f
'Default format spec: {req_format}')
2973 format_selector
= self
.build_format_selector(req_format
)
2975 formats_to_download
= self
._select
_formats
(formats
, format_selector
)
2976 if interactive_format_selection
and not formats_to_download
:
2977 self
.report_error('Requested format is not available', tb
=False, is_error
=False)
2981 if not formats_to_download
:
2982 if not self
.params
.get('ignore_no_formats_error'):
2983 raise ExtractorError(
2984 'Requested format is not available. Use --list-formats for a list of available formats',
2985 expected
=True, video_id
=info_dict
['id'], ie
=info_dict
['extractor'])
2986 self
.report_warning('Requested format is not available')
2987 # Process what we can, even without any available formats.
2988 formats_to_download
= [{}]
2990 requested_ranges
= tuple(self
.params
.get('download_ranges', lambda *_
: [{}])(info_dict
, self
))
2991 best_format
, downloaded_formats
= formats_to_download
[-1], []
2993 if best_format
and requested_ranges
:
2994 def to_screen(*msg
):
2995 self
.to_screen(f
'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2997 to_screen(f
'Downloading {len(formats_to_download)} format(s):',
2998 (f
['format_id'] for f
in formats_to_download
))
2999 if requested_ranges
!= ({}, ):
3000 to_screen(f
'Downloading {len(requested_ranges)} time ranges:',
3001 (f
'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c
in requested_ranges
))
3002 max_downloads_reached
= False
3004 for fmt
, chapter
in itertools
.product(formats_to_download
, requested_ranges
):
3005 new_info
= self
._copy
_infodict
(info_dict
)
3006 new_info
.update(fmt
)
3007 offset
, duration
= info_dict
.get('section_start') or 0, info_dict
.get('duration') or float('inf')
3008 end_time
= offset
+ min(chapter
.get('end_time', duration
), duration
)
3009 # duration may not be accurate. So allow deviations <1sec
3010 if end_time
== float('inf') or end_time
> offset
+ duration
+ 1:
3012 if chapter
or offset
:
3014 'section_start': offset
+ chapter
.get('start_time', 0),
3015 'section_end': end_time
,
3016 'section_title': chapter
.get('title'),
3017 'section_number': chapter
.get('index'),
3019 downloaded_formats
.append(new_info
)
3021 self
.process_info(new_info
)
3022 except MaxDownloadsReached
:
3023 max_downloads_reached
= True
3024 self
._raise
_pending
_errors
(new_info
)
3025 # Remove copied info
3026 for key
, val
in tuple(new_info
.items()):
3027 if info_dict
.get(key
) == val
:
3029 if max_downloads_reached
:
3032 write_archive
= {f
.get('__write_download_archive', False) for f
in downloaded_formats
}
3033 assert write_archive
.issubset({True, False, 'ignore'})
3034 if True in write_archive
and False not in write_archive
:
3035 self
.record_download_archive(info_dict
)
3037 info_dict
['requested_downloads'] = downloaded_formats
3038 info_dict
= self
.run_all_pps('after_video', info_dict
)
3039 if max_downloads_reached
:
3040 raise MaxDownloadsReached
3042 # We update the info dict with the selected best quality format (backwards compatibility)
3043 info_dict
.update(best_format
)
3046 def process_subtitles(self
, video_id
, normal_subtitles
, automatic_captions
):
3047 """Select the requested subtitles and their format"""
3048 available_subs
, normal_sub_langs
= {}, []
3049 if normal_subtitles
and self
.params
.get('writesubtitles'):
3050 available_subs
.update(normal_subtitles
)
3051 normal_sub_langs
= tuple(normal_subtitles
.keys())
3052 if automatic_captions
and self
.params
.get('writeautomaticsub'):
3053 for lang
, cap_info
in automatic_captions
.items():
3054 if lang
not in available_subs
:
3055 available_subs
[lang
] = cap_info
3057 if not available_subs
or (
3058 not self
.params
.get('writesubtitles')
3059 and not self
.params
.get('writeautomaticsub')):
3062 all_sub_langs
= tuple(available_subs
.keys())
3063 if self
.params
.get('allsubtitles', False):
3064 requested_langs
= all_sub_langs
3065 elif self
.params
.get('subtitleslangs', False):
3067 requested_langs
= orderedSet_from_options(
3068 self
.params
.get('subtitleslangs'), {'all': all_sub_langs
}, use_regex
=True)
3069 except re
.error
as e
:
3070 raise ValueError(f
'Wrong regex for subtitlelangs: {e.pattern}')
3072 requested_langs
= LazyList(itertools
.chain(
3073 ['en'] if 'en' in normal_sub_langs
else [],
3074 filter(lambda f
: f
.startswith('en'), normal_sub_langs
),
3075 ['en'] if 'en' in all_sub_langs
else [],
3076 filter(lambda f
: f
.startswith('en'), all_sub_langs
),
3077 normal_sub_langs
, all_sub_langs
,
3080 self
.to_screen(f
'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
3082 formats_query
= self
.params
.get('subtitlesformat', 'best')
3083 formats_preference
= formats_query
.split('/') if formats_query
else []
3085 for lang
in requested_langs
:
3086 formats
= available_subs
.get(lang
)
3088 self
.report_warning(f
'{lang} subtitles not available for {video_id}')
3090 for ext
in formats_preference
:
3094 matches
= list(filter(lambda f
: f
['ext'] == ext
, formats
))
3100 self
.report_warning(
3101 'No subtitle format found matching "{}" for language {}, '
3102 'using {}. Use --list-subs for a list of available subtitles'.format(formats_query
, lang
, f
['ext']))
3106 def _forceprint(self
, key
, info_dict
):
3107 if info_dict
is None:
3109 info_copy
= info_dict
.copy()
3110 info_copy
.setdefault('filename', self
.prepare_filename(info_dict
))
3111 if info_dict
.get('requested_formats') is not None:
3112 # For RTMP URLs, also include the playpath
3113 info_copy
['urls'] = '\n'.join(f
['url'] + f
.get('play_path', '') for f
in info_dict
['requested_formats'])
3114 elif info_dict
.get('url'):
3115 info_copy
['urls'] = info_dict
['url'] + info_dict
.get('play_path', '')
3116 info_copy
['formats_table'] = self
.render_formats_table(info_dict
)
3117 info_copy
['thumbnails_table'] = self
.render_thumbnails_table(info_dict
)
3118 info_copy
['subtitles_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('subtitles'))
3119 info_copy
['automatic_captions_table'] = self
.render_subtitles_table(info_dict
.get('id'), info_dict
.get('automatic_captions'))
3121 def format_tmpl(tmpl
):
3122 mobj
= re
.fullmatch(r
'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl
)
3127 if tmpl
.startswith('{'):
3128 tmpl
, fmt
= f
'.{tmpl}', '%({})j'
3129 if tmpl
.endswith('='):
3130 tmpl
, fmt
= tmpl
[:-1], '{0} = %({0})#j'
3131 return '\n'.join(map(fmt
.format
, [tmpl
] if mobj
.group('dict') else tmpl
.split(',')))
3133 for tmpl
in self
.params
['forceprint'].get(key
, []):
3134 self
.to_stdout(self
.evaluate_outtmpl(format_tmpl(tmpl
), info_copy
))
3136 for tmpl
, file_tmpl
in self
.params
['print_to_file'].get(key
, []):
3137 filename
= self
.prepare_filename(info_dict
, outtmpl
=file_tmpl
)
3138 tmpl
= format_tmpl(tmpl
)
3139 self
.to_screen(f
'[info] Writing {tmpl!r} to: {filename}')
3140 if self
._ensure
_dir
_exists
(filename
):
3141 with
open(filename
, 'a', encoding
='utf-8', newline
='') as f
:
3142 f
.write(self
.evaluate_outtmpl(tmpl
, info_copy
) + os
.linesep
)
3146 def __forced_printings(self
, info_dict
, filename
=None, incomplete
=True):
3147 if (self
.params
.get('forcejson')
3148 or self
.params
['forceprint'].get('video')
3149 or self
.params
['print_to_file'].get('video')):
3150 self
.post_extract(info_dict
)
3152 info_dict
['filename'] = filename
3153 info_copy
= self
._forceprint
('video', info_dict
)
3155 def print_field(field
, actual_field
=None, optional
=False):
3156 if actual_field
is None:
3157 actual_field
= field
3158 if self
.params
.get(f
'force{field}') and (
3159 info_copy
.get(field
) is not None or (not optional
and not incomplete
)):
3160 self
.to_stdout(info_copy
[actual_field
])
3162 print_field('title')
3164 print_field('url', 'urls')
3165 print_field('thumbnail', optional
=True)
3166 print_field('description', optional
=True)
3167 print_field('filename')
3168 if self
.params
.get('forceduration') and info_copy
.get('duration') is not None:
3169 self
.to_stdout(formatSeconds(info_copy
['duration']))
3170 print_field('format')
3172 if self
.params
.get('forcejson'):
3173 self
.to_stdout(json
.dumps(self
.sanitize_info(info_dict
)))
3175 def dl(self
, name
, info
, subtitle
=False, test
=False):
3176 if not info
.get('url'):
3177 self
.raise_no_formats(info
, True)
3180 verbose
= self
.params
.get('verbose')
3181 quiet
= self
.params
.get('quiet') or not verbose
3186 'noprogress': quiet
,
3188 'skip_unavailable_fragments': False,
3189 'keep_fragments': False,
3191 '_no_ytdl_file': True,
3194 params
= self
.params
3195 fd
= get_suitable_downloader(info
, params
, to_stdout
=(name
== '-'))(self
, params
)
3197 for ph
in self
._progress
_hooks
:
3198 fd
.add_progress_hook(ph
)
3200 (f
['url'].split(',')[0] + ',<data>' if f
['url'].startswith('data:') else f
['url'])
3201 for f
in info
.get('requested_formats', []) or [info
])
3202 self
.write_debug(f
'Invoking {fd.FD_NAME} downloader on "{urls}"')
3204 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3205 # But it may contain objects that are not deep-copyable
3206 new_info
= self
._copy
_infodict
(info
)
3207 if new_info
.get('http_headers') is None:
3208 new_info
['http_headers'] = self
._calc
_headers
(new_info
)
3209 return fd
.download(name
, new_info
, subtitle
)
3211 def existing_file(self
, filepaths
, *, default_overwrite
=True):
3212 existing_files
= list(filter(os
.path
.exists
, orderedSet(filepaths
)))
3213 if existing_files
and not self
.params
.get('overwrites', default_overwrite
):
3214 return existing_files
[0]
3216 for file in existing_files
:
3217 self
.report_file_delete(file)
3221 @_catch_unsafe_extension_error
3222 def process_info(self
, info_dict
):
3223 """Process a single resolved IE result. (Modifies it in-place)"""
3225 assert info_dict
.get('_type', 'video') == 'video'
3226 original_infodict
= info_dict
3228 if 'format' not in info_dict
and 'ext' in info_dict
:
3229 info_dict
['format'] = info_dict
['ext']
3231 if self
._match
_entry
(info_dict
) is not None:
3232 info_dict
['__write_download_archive'] = 'ignore'
3235 # Does nothing under normal operation - for backward compatibility of process_info
3236 self
.post_extract(info_dict
)
3238 def replace_info_dict(new_info
):
3240 if new_info
== info_dict
:
3243 info_dict
.update(new_info
)
3245 new_info
, _
= self
.pre_process(info_dict
, 'video')
3246 replace_info_dict(new_info
)
3247 self
._num
_downloads
+= 1
3249 # info_dict['_filename'] needs to be set for backward compatibility
3250 info_dict
['_filename'] = full_filename
= self
.prepare_filename(info_dict
, warn
=True)
3251 temp_filename
= self
.prepare_filename(info_dict
, 'temp')
3255 self
.__forced
_printings
(info_dict
, full_filename
, incomplete
=('format' not in info_dict
))
3257 def check_max_downloads():
3258 if self
._num
_downloads
>= float(self
.params
.get('max_downloads') or 'inf'):
3259 raise MaxDownloadsReached
3261 if self
.params
.get('simulate'):
3262 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
3263 check_max_downloads()
3266 if full_filename
is None:
3268 if not self
._ensure
_dir
_exists
(full_filename
):
3270 if not self
._ensure
_dir
_exists
(temp_filename
):
3273 if self
._write
_description
('video', info_dict
,
3274 self
.prepare_filename(info_dict
, 'description')) is None:
3277 sub_files
= self
._write
_subtitles
(info_dict
, temp_filename
)
3278 if sub_files
is None:
3280 files_to_move
.update(dict(sub_files
))
3282 thumb_files
= self
._write
_thumbnails
(
3283 'video', info_dict
, temp_filename
, self
.prepare_filename(info_dict
, 'thumbnail'))
3284 if thumb_files
is None:
3286 files_to_move
.update(dict(thumb_files
))
3288 infofn
= self
.prepare_filename(info_dict
, 'infojson')
3289 _infojson_written
= self
._write
_info
_json
('video', info_dict
, infofn
)
3290 if _infojson_written
:
3291 info_dict
['infojson_filename'] = infofn
3292 # For backward compatibility, even though it was a private field
3293 info_dict
['__infojson_filename'] = infofn
3294 elif _infojson_written
is None:
3297 # Note: Annotations are deprecated
3299 if self
.params
.get('writeannotations', False):
3300 annofn
= self
.prepare_filename(info_dict
, 'annotation')
3302 if not self
._ensure
_dir
_exists
(annofn
):
3304 if not self
.params
.get('overwrites', True) and os
.path
.exists(annofn
):
3305 self
.to_screen('[info] Video annotations are already present')
3306 elif not info_dict
.get('annotations'):
3307 self
.report_warning('There are no annotations to write.')
3310 self
.to_screen('[info] Writing video annotations to: ' + annofn
)
3311 with
open(annofn
, 'w', encoding
='utf-8') as annofile
:
3312 annofile
.write(info_dict
['annotations'])
3313 except (KeyError, TypeError):
3314 self
.report_warning('There are no annotations to write.')
3316 self
.report_error('Cannot write annotations file: ' + annofn
)
3319 # Write internet shortcut files
3320 def _write_link_file(link_type
):
3321 url
= try_get(info_dict
['webpage_url'], iri_to_uri
)
3323 self
.report_warning(
3324 f
'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3326 linkfn
= replace_extension(self
.prepare_filename(info_dict
, 'link'), link_type
, info_dict
.get('ext'))
3327 if not self
._ensure
_dir
_exists
(linkfn
):
3329 if self
.params
.get('overwrites', True) and os
.path
.exists(linkfn
):
3330 self
.to_screen(f
'[info] Internet shortcut (.{link_type}) is already present')
3333 self
.to_screen(f
'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3334 with
open(to_high_limit_path(linkfn
), 'w', encoding
='utf-8',
3335 newline
='\r\n' if link_type
== 'url' else '\n') as linkfile
:
3336 template_vars
= {'url': url
}
3337 if link_type
== 'desktop':
3338 template_vars
['filename'] = linkfn
[:-(len(link_type
) + 1)]
3339 linkfile
.write(LINK_TEMPLATES
[link_type
] % template_vars
)
3341 self
.report_error(f
'Cannot write internet shortcut {linkfn}')
3346 'url': self
.params
.get('writeurllink'),
3347 'webloc': self
.params
.get('writewebloclink'),
3348 'desktop': self
.params
.get('writedesktoplink'),
3350 if self
.params
.get('writelink'):
3351 link_type
= ('webloc' if sys
.platform
== 'darwin'
3352 else 'desktop' if sys
.platform
.startswith('linux')
3354 write_links
[link_type
] = True
3356 if any(should_write
and not _write_link_file(link_type
)
3357 for link_type
, should_write
in write_links
.items()):
3360 new_info
, files_to_move
= self
.pre_process(info_dict
, 'before_dl', files_to_move
)
3361 replace_info_dict(new_info
)
3363 if self
.params
.get('skip_download'):
3364 info_dict
['filepath'] = temp_filename
3365 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(full_filename
))
3366 info_dict
['__files_to_move'] = files_to_move
3367 replace_info_dict(self
.run_pp(MoveFilesAfterDownloadPP(self
, False), info_dict
))
3368 info_dict
['__write_download_archive'] = self
.params
.get('force_write_download_archive')
3371 info_dict
.setdefault('__postprocessors', [])
3374 def existing_video_file(*filepaths
):
3375 ext
= info_dict
.get('ext')
3376 converted
= lambda file: replace_extension(file, self
.params
.get('final_ext') or ext
, ext
)
3377 file = self
.existing_file(itertools
.chain(*zip(map(converted
, filepaths
), filepaths
)),
3378 default_overwrite
=False)
3380 info_dict
['ext'] = os
.path
.splitext(file)[1][1:]
3383 fd
, success
= None, True
3384 if info_dict
.get('protocol') or info_dict
.get('url'):
3385 fd
= get_suitable_downloader(info_dict
, self
.params
, to_stdout
=temp_filename
== '-')
3386 if fd
!= FFmpegFD
and 'no-direct-merge' not in self
.params
['compat_opts'] and (
3387 info_dict
.get('section_start') or info_dict
.get('section_end')):
3388 msg
= ('This format cannot be partially downloaded' if FFmpegFD
.available()
3389 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3390 self
.report_error(f
'{msg}. Aborting')
3393 if info_dict
.get('requested_formats') is not None:
3394 old_ext
= info_dict
['ext']
3395 if self
.params
.get('merge_output_format') is None:
3396 if (info_dict
['ext'] == 'webm'
3397 and info_dict
.get('thumbnails')
3398 # check with type instead of pp_key, __name__, or isinstance
3399 # since we dont want any custom PPs to trigger this
3400 and any(type(pp
) == EmbedThumbnailPP
for pp
in self
._pps
['post_process'])): # noqa: E721
3401 info_dict
['ext'] = 'mkv'
3402 self
.report_warning(
3403 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3404 new_ext
= info_dict
['ext']
3406 def correct_ext(filename
, ext
=new_ext
):
3409 filename_real_ext
= os
.path
.splitext(filename
)[1][1:]
3411 os
.path
.splitext(filename
)[0]
3412 if filename_real_ext
in (old_ext
, new_ext
)
3414 return f
'{filename_wo_ext}.{ext}'
3416 # Ensure filename always has a correct extension for successful merge
3417 full_filename
= correct_ext(full_filename
)
3418 temp_filename
= correct_ext(temp_filename
)
3419 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3421 info_dict
['__real_download'] = False
3422 # NOTE: Copy so that original format dicts are not modified
3423 info_dict
['requested_formats'] = list(map(dict, info_dict
['requested_formats']))
3425 merger
= FFmpegMergerPP(self
)
3427 if dl_filename
is not None:
3428 self
.report_file_already_downloaded(dl_filename
)
3430 for f
in info_dict
['requested_formats'] if fd
!= FFmpegFD
else []:
3431 f
['filepath'] = fname
= prepend_extension(
3432 correct_ext(temp_filename
, info_dict
['ext']),
3433 'f{}'.format(f
['format_id']), info_dict
['ext'])
3434 downloaded
.append(fname
)
3435 info_dict
['url'] = '\n'.join(f
['url'] for f
in info_dict
['requested_formats'])
3436 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3437 info_dict
['__real_download'] = real_download
3439 if self
.params
.get('allow_unplayable_formats'):
3440 self
.report_warning(
3441 'You have requested merging of multiple formats '
3442 'while also allowing unplayable formats to be downloaded. '
3443 'The formats won\'t be merged to prevent data corruption.')
3444 elif not merger
.available
:
3445 msg
= 'You have requested merging of multiple formats but ffmpeg is not installed'
3446 if not self
.params
.get('ignoreerrors'):
3447 self
.report_error(f
'{msg}. Aborting due to --abort-on-error')
3449 self
.report_warning(f
'{msg}. The formats won\'t be merged')
3451 if temp_filename
== '-':
3452 reason
= ('using a downloader other than ffmpeg' if FFmpegFD
.can_merge_formats(info_dict
, self
.params
)
3453 else 'but the formats are incompatible for simultaneous download' if merger
.available
3454 else 'but ffmpeg is not installed')
3455 self
.report_warning(
3456 f
'You have requested downloading multiple formats to stdout {reason}. '
3457 'The formats will be streamed one after the other')
3458 fname
= temp_filename
3459 for f
in info_dict
['requested_formats']:
3460 new_info
= dict(info_dict
)
3461 del new_info
['requested_formats']
3463 if temp_filename
!= '-':
3464 fname
= prepend_extension(
3465 correct_ext(temp_filename
, new_info
['ext']),
3466 'f{}'.format(f
['format_id']), new_info
['ext'])
3467 if not self
._ensure
_dir
_exists
(fname
):
3469 f
['filepath'] = fname
3470 downloaded
.append(fname
)
3471 partial_success
, real_download
= self
.dl(fname
, new_info
)
3472 info_dict
['__real_download'] = info_dict
['__real_download'] or real_download
3473 success
= success
and partial_success
3475 if downloaded
and merger
.available
and not self
.params
.get('allow_unplayable_formats'):
3476 info_dict
['__postprocessors'].append(merger
)
3477 info_dict
['__files_to_merge'] = downloaded
3478 # Even if there were no downloads, it is being merged only now
3479 info_dict
['__real_download'] = True
3481 for file in downloaded
:
3482 files_to_move
[file] = None
3484 # Just a single file
3485 dl_filename
= existing_video_file(full_filename
, temp_filename
)
3486 if dl_filename
is None or dl_filename
== temp_filename
:
3487 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3488 # So we should try to resume the download
3489 success
, real_download
= self
.dl(temp_filename
, info_dict
)
3490 info_dict
['__real_download'] = real_download
3492 self
.report_file_already_downloaded(dl_filename
)
3494 dl_filename
= dl_filename
or temp_filename
3495 info_dict
['__finaldir'] = os
.path
.dirname(os
.path
.abspath(full_filename
))
3497 except network_exceptions
as err
:
3498 self
.report_error(f
'unable to download video data: {err}')
3500 except OSError as err
:
3501 raise UnavailableVideoError(err
)
3502 except ContentTooShortError
as err
:
3503 self
.report_error(f
'content too short (expected {err.expected} bytes and served {err.downloaded})')
3506 self
._raise
_pending
_errors
(info_dict
)
3507 if success
and full_filename
!= '-':
3511 fixup_policy
= self
.params
.get('fixup')
3512 vid
= info_dict
['id']
3514 if fixup_policy
in ('ignore', 'never'):
3516 elif fixup_policy
== 'warn':
3518 elif fixup_policy
!= 'force':
3519 assert fixup_policy
in ('detect_or_warn', None)
3520 if not info_dict
.get('__real_download'):
3523 def ffmpeg_fixup(cndn
, msg
, cls
):
3524 if not (do_fixup
and cndn
):
3526 elif do_fixup
== 'warn':
3527 self
.report_warning(f
'{vid}: {msg}')
3531 info_dict
['__postprocessors'].append(pp
)
3533 self
.report_warning(f
'{vid}: {msg}. Install ffmpeg to fix this automatically')
3535 stretched_ratio
= info_dict
.get('stretched_ratio')
3536 ffmpeg_fixup(stretched_ratio
not in (1, None),
3537 f
'Non-uniform pixel ratio {stretched_ratio}',
3538 FFmpegFixupStretchedPP
)
3540 downloader
= get_suitable_downloader(info_dict
, self
.params
) if 'protocol' in info_dict
else None
3541 downloader
= downloader
.FD_NAME
if downloader
else None
3543 ext
= info_dict
.get('ext')
3544 postprocessed_by_ffmpeg
= info_dict
.get('requested_formats') or any((
3545 isinstance(pp
, FFmpegVideoConvertorPP
)
3546 and resolve_recode_mapping(ext
, pp
.mapping
)[0] not in (ext
, None)
3547 ) for pp
in self
._pps
['post_process'])
3549 if not postprocessed_by_ffmpeg
:
3550 ffmpeg_fixup(fd
!= FFmpegFD
and ext
== 'm4a'
3551 and info_dict
.get('container') == 'm4a_dash',
3552 'writing DASH m4a. Only some players support this container',
3554 ffmpeg_fixup((downloader
== 'hlsnative' and not self
.params
.get('hls_use_mpegts'))
3555 or (info_dict
.get('is_live') and self
.params
.get('hls_use_mpegts') is None),
3556 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3558 ffmpeg_fixup(downloader
== 'dashsegments'
3559 and (info_dict
.get('is_live') or info_dict
.get('is_dash_periods')),
3560 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP
)
3562 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP
)
3563 ffmpeg_fixup(downloader
== 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP
)
3567 replace_info_dict(self
.post_process(dl_filename
, info_dict
, files_to_move
))
3568 except PostProcessingError
as err
:
3569 self
.report_error(f
'Postprocessing: {err}')
3572 for ph
in self
._post
_hooks
:
3573 ph(info_dict
['filepath'])
3574 except Exception as err
:
3575 self
.report_error(f
'post hooks: {err}')
3577 info_dict
['__write_download_archive'] = True
3579 assert info_dict
is original_infodict
# Make sure the info_dict was modified in-place
3580 if self
.params
.get('force_write_download_archive'):
3581 info_dict
['__write_download_archive'] = True
3582 check_max_downloads()
3584 def __download_wrapper(self
, func
):
3585 @functools.wraps(func
)
3586 def wrapper(*args
, **kwargs
):
3588 res
= func(*args
, **kwargs
)
3589 except CookieLoadError
:
3591 except UnavailableVideoError
as e
:
3592 self
.report_error(e
)
3593 except DownloadCancelled
as e
:
3594 self
.to_screen(f
'[info] {e}')
3595 if not self
.params
.get('break_per_url'):
3597 self
._num
_downloads
= 0
3599 if self
.params
.get('dump_single_json', False):
3600 self
.post_extract(res
)
3601 self
.to_stdout(json
.dumps(self
.sanitize_info(res
)))
3604 def download(self
, url_list
):
3605 """Download a given list of URLs."""
3606 url_list
= variadic(url_list
) # Passing a single URL is a common mistake
3607 outtmpl
= self
.params
['outtmpl']['default']
3608 if (len(url_list
) > 1
3610 and '%' not in outtmpl
3611 and self
.params
.get('max_downloads') != 1):
3612 raise SameFileError(outtmpl
)
3614 for url
in url_list
:
3615 self
.__download
_wrapper
(self
.extract_info
)(
3616 url
, force_generic_extractor
=self
.params
.get('force_generic_extractor', False))
3618 return self
._download
_retcode
3620 def download_with_info_file(self
, info_filename
):
3621 with contextlib
.closing(fileinput
.FileInput(
3622 [info_filename
], mode
='r',
3623 openhook
=fileinput
.hook_encoded('utf-8'))) as f
:
3624 # FileInput doesn't have a read method, we can't call json.load
3625 infos
= [self
.sanitize_info(info
, self
.params
.get('clean_infojson', True))
3626 for info
in variadic(json
.loads('\n'.join(f
)))]
3629 self
.__download
_wrapper
(self
.process_ie_result
)(info
, download
=True)
3630 except (DownloadError
, EntryNotInPlaylist
, ReExtractInfo
) as e
:
3631 if not isinstance(e
, EntryNotInPlaylist
):
3632 self
.to_stderr('\r')
3633 webpage_url
= info
.get('webpage_url')
3634 if webpage_url
is None:
3636 self
.report_warning(f
'The info failed to download: {e}; trying with URL {webpage_url}')
3637 self
.download([webpage_url
])
3638 except ExtractorError
as e
:
3639 self
.report_error(e
)
3640 return self
._download
_retcode
3643 def sanitize_info(info_dict
, remove_private_keys
=False):
3644 """ Sanitize the infodict for converting to json """
3645 if info_dict
is None:
3647 info_dict
.setdefault('epoch', int(time
.time()))
3648 info_dict
.setdefault('_type', 'video')
3649 info_dict
.setdefault('_version', {
3650 'version': __version__
,
3651 'current_git_head': current_git_head(),
3652 'release_git_head': RELEASE_GIT_HEAD
,
3653 'repository': ORIGIN
,
3656 if remove_private_keys
:
3657 reject
= lambda k
, v
: v
is None or k
.startswith('__') or k
in {
3658 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3659 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3660 'playlist_autonumber',
3663 reject
= lambda k
, v
: False
3666 if isinstance(obj
, dict):
3667 return {k
: filter_fn(v
) for k
, v
in obj
.items() if not reject(k
, v
)}
3668 elif isinstance(obj
, (list, tuple, set, LazyList
)):
3669 return list(map(filter_fn
, obj
))
3670 elif obj
is None or isinstance(obj
, (str, int, float, bool)):
3675 return filter_fn(info_dict
)
3678 def filter_requested_info(info_dict
, actually_filter
=True):
3679 """ Alias of sanitize_info for backward compatibility """
3680 return YoutubeDL
.sanitize_info(info_dict
, actually_filter
)
3682 def _delete_downloaded_files(self
, *files_to_delete
, info
={}, msg
=None):
3683 for filename
in set(filter(None, files_to_delete
)):
3685 self
.to_screen(msg
% filename
)
3689 self
.report_warning(f
'Unable to delete file {filename}')
3690 if filename
in info
.get('__files_to_move', []): # NB: Delete even if None
3691 del info
['__files_to_move'][filename
]
3694 def post_extract(info_dict
):
3695 def actual_post_extract(info_dict
):
3696 if info_dict
.get('_type') in ('playlist', 'multi_video'):
3697 for video_dict
in info_dict
.get('entries', {}):
3698 actual_post_extract(video_dict
or {})
3701 post_extractor
= info_dict
.pop('__post_extractor', None) or dict
3702 info_dict
.update(post_extractor())
3704 actual_post_extract(info_dict
or {})
3706 def run_pp(self
, pp
, infodict
):
3707 files_to_delete
= []
3708 if '__files_to_move' not in infodict
:
3709 infodict
['__files_to_move'] = {}
3711 files_to_delete
, infodict
= pp
.run(infodict
)
3712 except PostProcessingError
as e
:
3713 # Must be True and not 'only_download'
3714 if self
.params
.get('ignoreerrors') is True:
3715 self
.report_error(e
)
3719 if not files_to_delete
:
3721 if self
.params
.get('keepvideo', False):
3722 for f
in files_to_delete
:
3723 infodict
['__files_to_move'].setdefault(f
, '')
3725 self
._delete
_downloaded
_files
(
3726 *files_to_delete
, info
=infodict
, msg
='Deleting original file %s (pass -k to keep)')
3729 def run_all_pps(self
, key
, info
, *, additional_pps
=None):
3731 self
._forceprint
(key
, info
)
3732 for pp
in (additional_pps
or []) + self
._pps
[key
]:
3733 info
= self
.run_pp(pp
, info
)
3736 def pre_process(self
, ie_info
, key
='pre_process', files_to_move
=None):
3737 info
= dict(ie_info
)
3738 info
['__files_to_move'] = files_to_move
or {}
3740 info
= self
.run_all_pps(key
, info
)
3741 except PostProcessingError
as err
:
3742 msg
= f
'Preprocessing: {err}'
3743 info
.setdefault('__pending_error', msg
)
3744 self
.report_error(msg
, is_error
=False)
3745 return info
, info
.pop('__files_to_move', None)
3747 def post_process(self
, filename
, info
, files_to_move
=None):
3748 """Run all the postprocessors on the given file."""
3749 info
['filepath'] = filename
3750 info
['__files_to_move'] = files_to_move
or {}
3751 info
= self
.run_all_pps('post_process', info
, additional_pps
=info
.get('__postprocessors'))
3752 info
= self
.run_pp(MoveFilesAfterDownloadPP(self
), info
)
3753 del info
['__files_to_move']
3754 return self
.run_all_pps('after_move', info
)
3756 def _make_archive_id(self
, info_dict
):
3757 video_id
= info_dict
.get('id')
3760 # Future-proof against any change in case
3761 # and backwards compatibility with prior versions
3762 extractor
= info_dict
.get('extractor_key') or info_dict
.get('ie_key') # key in a playlist
3763 if extractor
is None:
3764 url
= str_or_none(info_dict
.get('url'))
3767 # Try to find matching extractor for the URL and take its ie_key
3768 for ie_key
, ie
in self
._ies
.items():
3769 if ie
.suitable(url
):
3774 return make_archive_id(extractor
, video_id
)
3776 def in_download_archive(self
, info_dict
):
3777 if not self
.archive
:
3780 vid_ids
= [self
._make
_archive
_id
(info_dict
)]
3781 vid_ids
.extend(info_dict
.get('_old_archive_ids') or [])
3782 return any(id_
in self
.archive
for id_
in vid_ids
)
3784 def record_download_archive(self
, info_dict
):
3785 fn
= self
.params
.get('download_archive')
3788 vid_id
= self
._make
_archive
_id
(info_dict
)
3791 self
.write_debug(f
'Adding to archive: {vid_id}')
3792 if is_path_like(fn
):
3793 with
locked_file(fn
, 'a', encoding
='utf-8') as archive_file
:
3794 archive_file
.write(vid_id
+ '\n')
3795 self
.archive
.add(vid_id
)
3798 def format_resolution(format
, default
='unknown'):
3799 if format
.get('vcodec') == 'none' and format
.get('acodec') != 'none':
3801 if format
.get('resolution') is not None:
3802 return format
['resolution']
3803 if format
.get('width') and format
.get('height'):
3804 return '%dx%d' % (format
['width'], format
['height'])
3805 elif format
.get('height'):
3806 return '{}p'.format(format
['height'])
3807 elif format
.get('width'):
3808 return '%dx?' % format
['width']
3811 def _list_format_headers(self
, *headers
):
3812 if self
.params
.get('listformats_table', True) is not False:
3813 return [self
._format
_out
(header
, self
.Styles
.HEADERS
) for header
in headers
]
3816 def _format_note(self
, fdict
):
3818 if fdict
.get('ext') in ['f4f', 'f4m']:
3819 res
+= '(unsupported)'
3820 if fdict
.get('language'):
3823 res
+= '[{}]'.format(fdict
['language'])
3824 if fdict
.get('format_note') is not None:
3827 res
+= fdict
['format_note']
3828 if fdict
.get('tbr') is not None:
3831 res
+= '%4dk' % fdict
['tbr']
3832 if fdict
.get('container') is not None:
3835 res
+= '{} container'.format(fdict
['container'])
3836 if (fdict
.get('vcodec') is not None
3837 and fdict
.get('vcodec') != 'none'):
3840 res
+= fdict
['vcodec']
3841 if fdict
.get('vbr') is not None:
3843 elif fdict
.get('vbr') is not None and fdict
.get('abr') is not None:
3845 if fdict
.get('vbr') is not None:
3846 res
+= '%4dk' % fdict
['vbr']
3847 if fdict
.get('fps') is not None:
3850 res
+= '{}fps'.format(fdict
['fps'])
3851 if fdict
.get('acodec') is not None:
3854 if fdict
['acodec'] == 'none':
3857 res
+= '%-5s' % fdict
['acodec']
3858 elif fdict
.get('abr') is not None:
3862 if fdict
.get('abr') is not None:
3863 res
+= '@%3dk' % fdict
['abr']
3864 if fdict
.get('asr') is not None:
3865 res
+= ' (%5dHz)' % fdict
['asr']
3866 if fdict
.get('filesize') is not None:
3869 res
+= format_bytes(fdict
['filesize'])
3870 elif fdict
.get('filesize_approx') is not None:
3873 res
+= '~' + format_bytes(fdict
['filesize_approx'])
3876 def _get_formats(self
, info_dict
):
3877 if info_dict
.get('formats') is None:
3878 if info_dict
.get('url') and info_dict
.get('_type', 'video') == 'video':
3881 return info_dict
['formats']
3883 def render_formats_table(self
, info_dict
):
3884 formats
= self
._get
_formats
(info_dict
)
3887 if not self
.params
.get('listformats_table', True) is not False:
3890 format_field(f
, 'format_id'),
3891 format_field(f
, 'ext'),
3892 self
.format_resolution(f
),
3893 self
._format
_note
(f
),
3894 ] for f
in formats
if (f
.get('preference') or 0) >= -1000]
3895 return render_table(['format code', 'extension', 'resolution', 'note'], table
, extra_gap
=1)
3897 def simplified_codec(f
, field
):
3898 assert field
in ('acodec', 'vcodec')
3899 codec
= f
.get(field
)
3902 elif codec
!= 'none':
3903 return '.'.join(codec
.split('.')[:4])
3905 if field
== 'vcodec' and f
.get('acodec') == 'none':
3907 elif field
== 'acodec' and f
.get('vcodec') == 'none':
3909 return self
._format
_out
('audio only' if field
== 'vcodec' else 'video only',
3910 self
.Styles
.SUPPRESS
)
3912 delim
= self
._format
_out
('\u2502', self
.Styles
.DELIM
, '|', test_encoding
=True)
3915 self
._format
_out
(format_field(f
, 'format_id'), self
.Styles
.ID
),
3916 format_field(f
, 'ext'),
3917 format_field(f
, func
=self
.format_resolution
, ignore
=('audio only', 'images')),
3918 format_field(f
, 'fps', '\t%d', func
=round),
3919 format_field(f
, 'dynamic_range', '%s', ignore
=(None, 'SDR')).replace('HDR', ''),
3920 format_field(f
, 'audio_channels', '\t%s'),
3922 format_field(f
, 'filesize', ' \t%s', func
=format_bytes
)
3923 or format_field(f
, 'filesize_approx', '≈\t%s', func
=format_bytes
)
3924 or format_field(filesize_from_tbr(f
.get('tbr'), info_dict
.get('duration')), None,
3925 self
._format
_out
('~\t%s', self
.Styles
.SUPPRESS
), func
=format_bytes
)),
3926 format_field(f
, 'tbr', '\t%dk', func
=round),
3927 shorten_protocol_name(f
.get('protocol', '')),
3929 simplified_codec(f
, 'vcodec'),
3930 format_field(f
, 'vbr', '\t%dk', func
=round),
3931 simplified_codec(f
, 'acodec'),
3932 format_field(f
, 'abr', '\t%dk', func
=round),
3933 format_field(f
, 'asr', '\t%s', func
=format_decimal_suffix
),
3934 join_nonempty(format_field(f
, 'language', '[%s]'), join_nonempty(
3935 self
._format
_out
('UNSUPPORTED', self
.Styles
.BAD_FORMAT
) if f
.get('ext') in ('f4f', 'f4m') else None,
3936 (self
._format
_out
('Maybe DRM', self
.Styles
.WARNING
) if f
.get('has_drm') == 'maybe'
3937 else self
._format
_out
('DRM', self
.Styles
.BAD_FORMAT
) if f
.get('has_drm') else None),
3938 format_field(f
, 'format_note'),
3939 format_field(f
, 'container', ignore
=(None, f
.get('ext'))),
3940 delim
=', '), delim
=' '),
3941 ] for f
in formats
if f
.get('preference') is None or f
['preference'] >= -1000]
3942 header_line
= self
._list
_format
_headers
(
3943 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim
, '\tFILESIZE', '\tTBR', 'PROTO',
3944 delim
, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3946 return render_table(
3947 header_line
, table
, hide_empty
=True,
3948 delim
=self
._format
_out
('\u2500', self
.Styles
.DELIM
, '-', test_encoding
=True))
3950 def render_thumbnails_table(self
, info_dict
):
3951 thumbnails
= list(info_dict
.get('thumbnails') or [])
3954 return render_table(
3955 self
._list
_format
_headers
('ID', 'Width', 'Height', 'URL'),
3956 [[t
.get('id'), t
.get('width') or 'unknown', t
.get('height') or 'unknown', t
['url']] for t
in thumbnails
])
3958 def render_subtitles_table(self
, video_id
, subtitles
):
3959 def _row(lang
, formats
):
3960 exts
, names
= zip(*((f
['ext'], f
.get('name') or 'unknown') for f
in reversed(formats
)))
3961 if len(set(names
)) == 1:
3962 names
= [] if names
[0] == 'unknown' else names
[:1]
3963 return [lang
, ', '.join(names
), ', '.join(exts
)]
3967 return render_table(
3968 self
._list
_format
_headers
('Language', 'Name', 'Formats'),
3969 [_row(lang
, formats
) for lang
, formats
in subtitles
.items()],
3972 def __list_table(self
, video_id
, name
, func
, *args
):
3975 self
.to_screen(f
'{video_id} has no {name}')
3977 self
.to_screen(f
'[info] Available {name} for {video_id}:')
3978 self
.to_stdout(table
)
3980 def list_formats(self
, info_dict
):
3981 self
.__list
_table
(info_dict
['id'], 'formats', self
.render_formats_table
, info_dict
)
3983 def list_thumbnails(self
, info_dict
):
3984 self
.__list
_table
(info_dict
['id'], 'thumbnails', self
.render_thumbnails_table
, info_dict
)
3986 def list_subtitles(self
, video_id
, subtitles
, name
='subtitles'):
3987 self
.__list
_table
(video_id
, name
, self
.render_subtitles_table
, video_id
, subtitles
)
3989 def print_debug_header(self
):
3990 if not self
.params
.get('verbose'):
3993 from . import _IN_CLI
# Must be delayed import
3995 # These imports can be slow. So import them only as needed
3996 from .extractor
.extractors
import _LAZY_LOADER
3997 from .extractor
.extractors
import (
3998 _PLUGIN_CLASSES
as plugin_ies
,
3999 _PLUGIN_OVERRIDES
as plugin_ie_overrides
,
4002 def get_encoding(stream
):
4003 ret
= str(getattr(stream
, 'encoding', f
'missing ({type(stream).__name__})'))
4004 additional_info
= []
4005 if os
.environ
.get('TERM', '').lower() == 'dumb':
4006 additional_info
.append('dumb')
4007 if not supports_terminal_sequences(stream
):
4008 from .utils
import WINDOWS_VT_MODE
# Must be imported locally
4009 additional_info
.append('No VT' if WINDOWS_VT_MODE
is False else 'No ANSI')
4011 ret
= f
'{ret} ({",".join(additional_info)})'
4014 encoding_str
= 'Encodings: locale {}, fs {}, pref {}, {}'.format(
4015 locale
.getpreferredencoding(),
4016 sys
.getfilesystemencoding(),
4017 self
.get_encoding(),
4019 f
'{key} {get_encoding(stream)}' for key
, stream
in self
._out
_files
.items_
4020 if stream
is not None and key
!= 'console'),
4023 logger
= self
.params
.get('logger')
4025 write_debug
= lambda msg
: logger
.debug(f
'[debug] {msg}')
4026 write_debug(encoding_str
)
4028 write_string(f
'[debug] {encoding_str}\n', encoding
=None)
4029 write_debug
= lambda msg
: self
._write
_string
(f
'[debug] {msg}\n')
4031 source
= detect_variant()
4032 if VARIANT
not in (None, 'pip'):
4035 write_debug(join_nonempty(
4036 f
'{REPOSITORY.rpartition("/")[2]} version',
4037 _make_label(ORIGIN
, CHANNEL
.partition('@')[2] or __version__
, __version__
),
4038 f
'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD
else '',
4039 '' if source
== 'unknown' else f
'({source})',
4040 '' if _IN_CLI
else 'API' if klass
== YoutubeDL
else f
'API:{self.__module__}.{klass.__qualname__}',
4044 write_debug(f
'params: {self.params}')
4046 if not _LAZY_LOADER
:
4047 if os
.environ
.get('YTDLP_NO_LAZY_EXTRACTORS'):
4048 write_debug('Lazy loading extractors is forcibly disabled')
4050 write_debug('Lazy loading extractors is disabled')
4051 if self
.params
['compat_opts']:
4052 write_debug('Compatibility options: {}'.format(', '.join(self
.params
['compat_opts'])))
4054 if current_git_head():
4055 write_debug(f
'Git HEAD: {current_git_head()}')
4056 write_debug(system_identifier())
4058 exe_versions
, ffmpeg_features
= FFmpegPostProcessor
.get_versions_and_features(self
)
4059 ffmpeg_features
= {key
for key
, val
in ffmpeg_features
.items() if val
}
4061 exe_versions
['ffmpeg'] += ' ({})'.format(','.join(sorted(ffmpeg_features
)))
4063 exe_versions
['rtmpdump'] = rtmpdump_version()
4064 exe_versions
['phantomjs'] = PhantomJSwrapper
._version
()
4065 exe_str
= ', '.join(
4066 f
'{exe} {v}' for exe
, v
in sorted(exe_versions
.items()) if v
4068 write_debug(f
'exe versions: {exe_str}')
4070 from .compat
.compat_utils
import get_package_info
4071 from .dependencies
import available_dependencies
4073 write_debug('Optional libraries: %s' % (', '.join(sorted({
4074 join_nonempty(*get_package_info(m
)) for m
in available_dependencies
.values()
4077 write_debug(f
'Proxy map: {self.proxies}')
4078 write_debug(f
'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
4079 if os
.environ
.get('YTDLP_NO_PLUGINS'):
4080 write_debug('Plugins are forcibly disabled')
4083 for plugin_type
, plugins
in {'Extractor': plugin_ies
, 'Post-Processor': plugin_pps
}.items():
4084 display_list
= ['{}{}'.format(
4085 klass
.__name
__, '' if klass
.__name
__ == name
else f
' as {name}')
4086 for name
, klass
in plugins
.items()]
4087 if plugin_type
== 'Extractor':
4088 display_list
.extend(f
'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
4089 for parent
, plugins
in plugin_ie_overrides
.items())
4090 if not display_list
:
4092 write_debug(f
'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
4094 plugin_dirs
= plugin_directories()
4096 write_debug(f
'Plugin directories: {plugin_dirs}')
4098 @functools.cached_property
4100 """Global proxy configuration"""
4101 opts_proxy
= self
.params
.get('proxy')
4102 if opts_proxy
is not None:
4103 if opts_proxy
== '':
4104 opts_proxy
= '__noproxy__'
4105 proxies
= {'all': opts_proxy
}
4107 proxies
= urllib
.request
.getproxies()
4108 # compat. Set HTTPS_PROXY to __noproxy__ to revert
4109 if 'http' in proxies
and 'https' not in proxies
:
4110 proxies
['https'] = proxies
['http']
4114 @functools.cached_property
4115 def cookiejar(self
):
4116 """Global cookiejar instance"""
4118 return load_cookies(
4119 self
.params
.get('cookiefile'), self
.params
.get('cookiesfrombrowser'), self
)
4120 except CookieLoadError
as error
:
4121 cause
= error
.__context
__
4122 # compat: <=py3.9: `traceback.format_exception` has a different signature
4123 self
.report_error(str(cause
), tb
=''.join(traceback
.format_exception(None, cause
, cause
.__traceback
__)))
4129 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4131 self
.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
4132 handler
= self
._request
_director
.handlers
['Urllib']
4133 return handler
._get
_instance
(cookiejar
=self
.cookiejar
, proxies
=self
.proxies
)
4135 def _get_available_impersonate_targets(self
):
4136 # TODO(future): make available as public API
4138 (target
, rh
.RH_NAME
)
4139 for rh
in self
._request
_director
.handlers
.values()
4140 if isinstance(rh
, ImpersonateRequestHandler
)
4141 for target
in rh
.supported_targets
4144 def _impersonate_target_available(self
, target
):
4145 # TODO(future): make available as public API
4147 rh
.is_supported_target(target
)
4148 for rh
in self
._request
_director
.handlers
.values()
4149 if isinstance(rh
, ImpersonateRequestHandler
))
4151 def urlopen(self
, req
):
4152 """ Start an HTTP download """
4153 if isinstance(req
, str):
4155 elif isinstance(req
, urllib
.request
.Request
):
4156 self
.deprecation_warning(
4157 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4158 'Use yt_dlp.networking.common.Request instead.')
4159 req
= urllib_req_to_req(req
)
4160 assert isinstance(req
, Request
)
4162 # compat: Assume user:pass url params are basic auth
4163 url
, basic_auth_header
= extract_basic_auth(req
.url
)
4164 if basic_auth_header
:
4165 req
.headers
['Authorization'] = basic_auth_header
4166 req
.url
= sanitize_url(url
)
4168 clean_proxies(proxies
=req
.proxies
, headers
=req
.headers
)
4169 clean_headers(req
.headers
)
4172 return self
._request
_director
.send(req
)
4173 except NoSupportingHandlers
as e
:
4174 for ue
in e
.unsupported_errors
:
4175 # FIXME: This depends on the order of errors.
4176 if not (ue
.handler
and ue
.msg
):
4178 if ue
.handler
.RH_KEY
== 'Urllib' and 'unsupported url scheme: "file"' in ue
.msg
.lower():
4180 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4181 'Use --enable-file-urls to enable at your own risk.', cause
=ue
) from ue
4183 'unsupported proxy type: "https"' in ue
.msg
.lower()
4184 and 'requests' not in self
._request
_director
.handlers
4185 and 'curl_cffi' not in self
._request
_director
.handlers
4188 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests, curl_cffi')
4191 re
.match(r
'unsupported url scheme: "wss?"', ue
.msg
.lower())
4192 and 'websockets' not in self
._request
_director
.handlers
4195 'This request requires WebSocket support. '
4196 'Ensure one of the following dependencies are installed: websockets',
4199 elif re
.match(r
'unsupported (?:extensions: impersonate|impersonate target)', ue
.msg
.lower()):
4201 f
'Impersonate target "{req.extensions["impersonate"]}" is not available.'
4202 f
' See --list-impersonate-targets for available targets.'
4203 f
' This request requires browser impersonation, however you may be missing dependencies'
4204 f
' required to support this target.')
4206 except SSLError
as e
:
4207 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e
):
4208 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause
=e
) from e
4209 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e
):
4211 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4212 'Try using --legacy-server-connect', cause
=e
) from e
4215 def build_request_director(self
, handlers
, preferences
=None):
4216 logger
= _YDLLogger(self
)
4217 headers
= self
.params
['http_headers'].copy()
4218 proxies
= self
.proxies
.copy()
4219 clean_headers(headers
)
4220 clean_proxies(proxies
, headers
)
4222 director
= RequestDirector(logger
=logger
, verbose
=self
.params
.get('debug_printtraffic'))
4223 for handler
in handlers
:
4224 director
.add_handler(handler(
4227 cookiejar
=self
.cookiejar
,
4229 prefer_system_certs
='no-certifi' in self
.params
['compat_opts'],
4230 verify
=not self
.params
.get('nocheckcertificate'),
4231 **traverse_obj(self
.params
, {
4232 'verbose': 'debug_printtraffic',
4233 'source_address': 'source_address',
4234 'timeout': 'socket_timeout',
4235 'legacy_ssl_support': 'legacyserverconnect',
4236 'enable_file_urls': 'enable_file_urls',
4237 'impersonate': 'impersonate',
4239 'client_certificate': 'client_certificate',
4240 'client_certificate_key': 'client_certificate_key',
4241 'client_certificate_password': 'client_certificate_password',
4245 director
.preferences
.update(preferences
or [])
4246 if 'prefer-legacy-http-handler' in self
.params
['compat_opts']:
4247 director
.preferences
.add(lambda rh
, _
: 500 if rh
.RH_KEY
== 'Urllib' else 0)
4250 @functools.cached_property
4251 def _request_director(self
):
4252 return self
.build_request_director(_REQUEST_HANDLERS
.values(), _RH_PREFERENCES
)
4254 def encode(self
, s
):
4255 if isinstance(s
, bytes
):
4256 return s
# Already encoded
4259 return s
.encode(self
.get_encoding())
4260 except UnicodeEncodeError as err
:
4261 err
.reason
= err
.reason
+ '. Check your system encoding configuration or use the --encoding option.'
4264 def get_encoding(self
):
4265 encoding
= self
.params
.get('encoding')
4266 if encoding
is None:
4267 encoding
= preferredencoding()
4270 def _write_info_json(self
, label
, ie_result
, infofn
, overwrite
=None):
4271 """ Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error """
4272 if overwrite
is None:
4273 overwrite
= self
.params
.get('overwrites', True)
4274 if not self
.params
.get('writeinfojson'):
4277 self
.write_debug(f
'Skipping writing {label} infojson')
4279 elif not self
._ensure
_dir
_exists
(infofn
):
4281 elif not overwrite
and os
.path
.exists(infofn
):
4282 self
.to_screen(f
'[info] {label.title()} metadata is already present')
4285 self
.to_screen(f
'[info] Writing {label} metadata as JSON to: {infofn}')
4287 write_json_file(self
.sanitize_info(ie_result
, self
.params
.get('clean_infojson', True)), infofn
)
4290 self
.report_error(f
'Cannot write {label} metadata to JSON file {infofn}')
4293 def _write_description(self
, label
, ie_result
, descfn
):
4294 """ Write description and returns True = written, False = skip, None = error """
4295 if not self
.params
.get('writedescription'):
4298 self
.write_debug(f
'Skipping writing {label} description')
4300 elif not self
._ensure
_dir
_exists
(descfn
):
4302 elif not self
.params
.get('overwrites', True) and os
.path
.exists(descfn
):
4303 self
.to_screen(f
'[info] {label.title()} description is already present')
4304 elif ie_result
.get('description') is None:
4305 self
.to_screen(f
'[info] There\'s no {label} description to write')
4309 self
.to_screen(f
'[info] Writing {label} description to: {descfn}')
4310 with
open(descfn
, 'w', encoding
='utf-8') as descfile
:
4311 descfile
.write(ie_result
['description'])
4313 self
.report_error(f
'Cannot write {label} description file {descfn}')
4317 def _write_subtitles(self
, info_dict
, filename
):
4318 """ Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error"""
4320 subtitles
= info_dict
.get('requested_subtitles')
4321 if not (self
.params
.get('writesubtitles') or self
.params
.get('writeautomaticsub')):
4322 # subtitles download errors are already managed as troubles in relevant IE
4323 # that way it will silently go on when used with unsupporting IE
4326 self
.to_screen('[info] There are no subtitles for the requested languages')
4328 sub_filename_base
= self
.prepare_filename(info_dict
, 'subtitle')
4329 if not sub_filename_base
:
4330 self
.to_screen('[info] Skipping writing video subtitles')
4333 for sub_lang
, sub_info
in subtitles
.items():
4334 sub_format
= sub_info
['ext']
4335 sub_filename
= subtitles_filename(filename
, sub_lang
, sub_format
, info_dict
.get('ext'))
4336 sub_filename_final
= subtitles_filename(sub_filename_base
, sub_lang
, sub_format
, info_dict
.get('ext'))
4337 existing_sub
= self
.existing_file((sub_filename_final
, sub_filename
))
4339 self
.to_screen(f
'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4340 sub_info
['filepath'] = existing_sub
4341 ret
.append((existing_sub
, sub_filename_final
))
4344 self
.to_screen(f
'[info] Writing video subtitles to: {sub_filename}')
4345 if sub_info
.get('data') is not None:
4347 # Use newline='' to prevent conversion of newline characters
4348 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4349 with
open(sub_filename
, 'w', encoding
='utf-8', newline
='') as subfile
:
4350 subfile
.write(sub_info
['data'])
4351 sub_info
['filepath'] = sub_filename
4352 ret
.append((sub_filename
, sub_filename_final
))
4355 self
.report_error(f
'Cannot write video subtitles file {sub_filename}')
4359 sub_copy
= sub_info
.copy()
4360 sub_copy
.setdefault('http_headers', info_dict
.get('http_headers'))
4361 self
.dl(sub_filename
, sub_copy
, subtitle
=True)
4362 sub_info
['filepath'] = sub_filename
4363 ret
.append((sub_filename
, sub_filename_final
))
4364 except (DownloadError
, ExtractorError
, OSError, ValueError, *network_exceptions
) as err
:
4365 msg
= f
'Unable to download video subtitles for {sub_lang!r}: {err}'
4366 if self
.params
.get('ignoreerrors') is not True: # False or 'only_download'
4367 if not self
.params
.get('ignoreerrors'):
4368 self
.report_error(msg
)
4369 raise DownloadError(msg
)
4370 self
.report_warning(msg
)
4373 def _write_thumbnails(self
, label
, info_dict
, filename
, thumb_filename_base
=None):
4374 """ Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error """
4375 write_all
= self
.params
.get('write_all_thumbnails', False)
4376 thumbnails
, ret
= [], []
4377 if write_all
or self
.params
.get('writethumbnail', False):
4378 thumbnails
= info_dict
.get('thumbnails') or []
4380 self
.to_screen(f
'[info] There are no {label} thumbnails to download')
4382 multiple
= write_all
and len(thumbnails
) > 1
4384 if thumb_filename_base
is None:
4385 thumb_filename_base
= filename
4386 if thumbnails
and not thumb_filename_base
:
4387 self
.write_debug(f
'Skipping writing {label} thumbnail')
4390 if thumbnails
and not self
._ensure
_dir
_exists
(filename
):
4393 for idx
, t
in list(enumerate(thumbnails
))[::-1]:
4394 thumb_ext
= t
.get('ext') or determine_ext(t
['url'], 'jpg')
4396 thumb_ext
= f
'{t["id"]}.{thumb_ext}'
4397 thumb_display_id
= f
'{label} thumbnail {t["id"]}'
4398 thumb_filename
= replace_extension(filename
, thumb_ext
, info_dict
.get('ext'))
4399 thumb_filename_final
= replace_extension(thumb_filename_base
, thumb_ext
, info_dict
.get('ext'))
4401 existing_thumb
= self
.existing_file((thumb_filename_final
, thumb_filename
))
4403 self
.to_screen('[info] {} is already present'.format((
4404 thumb_display_id
if multiple
else f
'{label} thumbnail').capitalize()))
4405 t
['filepath'] = existing_thumb
4406 ret
.append((existing_thumb
, thumb_filename_final
))
4408 self
.to_screen(f
'[info] Downloading {thumb_display_id} ...')
4410 uf
= self
.urlopen(Request(t
['url'], headers
=t
.get('http_headers', {})))
4411 self
.to_screen(f
'[info] Writing {thumb_display_id} to: {thumb_filename}')
4412 with
open(thumb_filename
, 'wb') as thumbf
:
4413 shutil
.copyfileobj(uf
, thumbf
)
4414 ret
.append((thumb_filename
, thumb_filename_final
))
4415 t
['filepath'] = thumb_filename
4416 except network_exceptions
as err
:
4417 if isinstance(err
, HTTPError
) and err
.status
== 404:
4418 self
.to_screen(f
'[info] {thumb_display_id.title()} does not exist')
4420 self
.report_warning(f
'Unable to download {thumb_display_id}: {err}')
4422 if ret
and not write_all
: