10 from .fragment
import FragmentFD
11 from ..compat
import functools
12 from ..postprocessor
.ffmpeg
import EXT_TO_OUT_FORMATS
, FFmpegPostProcessor
32 class Features(enum
.Enum
):
33 TO_STDOUT
= enum
.auto()
34 MULTIPLE_FORMATS
= enum
.auto()
37 class ExternalFD(FragmentFD
):
38 SUPPORTED_PROTOCOLS
= ('http', 'https', 'ftp', 'ftps')
39 SUPPORTED_FEATURES
= ()
40 _CAPTURE_STDERR
= True
42 def real_download(self
, filename
, info_dict
):
43 self
.report_destination(filename
)
44 tmpfilename
= self
.temp_name(filename
)
48 retval
= self
._call
_downloader
(tmpfilename
, info_dict
)
49 except KeyboardInterrupt:
50 if not info_dict
.get('is_live'):
52 # Live stream downloading cancellation should be considered as
53 # correct and expected termination thus all postprocessing
56 self
.to_screen('[%s] Interrupted by user' % self
.get_basename())
62 'elapsed': time
.time() - started
,
65 fsize
= os
.path
.getsize(encodeFilename(tmpfilename
))
66 self
.try_rename(tmpfilename
, filename
)
68 'downloaded_bytes': fsize
,
71 self
._hook
_progress
(status
, info_dict
)
75 self
.report_error('%s exited with code %d' % (
76 self
.get_basename(), retval
))
80 def get_basename(cls
):
81 return cls
.__name
__[:-2].lower()
85 return cls
.get_basename()
87 @functools.cached_property
92 def available(cls
, path
=None):
93 path
= check_executable(
94 cls
.EXE_NAME
if path
in (None, cls
.get_basename()) else path
,
102 def supports(cls
, info_dict
):
104 not info_dict
.get('to_stdout') or Features
.TO_STDOUT
in cls
.SUPPORTED_FEATURES
,
105 '+' not in info_dict
['protocol'] or Features
.MULTIPLE_FORMATS
in cls
.SUPPORTED_FEATURES
,
106 not traverse_obj(info_dict
, ('hls_aes', ...), 'extra_param_to_segment_url'),
107 all(proto
in cls
.SUPPORTED_PROTOCOLS
for proto
in info_dict
['protocol'].split('+')),
111 def can_download(cls
, info_dict
, path
=None):
112 return cls
.available(path
) and cls
.supports(info_dict
)
114 def _option(self
, command_option
, param
):
115 return cli_option(self
.params
, command_option
, param
)
117 def _bool_option(self
, command_option
, param
, true_value
='true', false_value
='false', separator
=None):
118 return cli_bool_option(self
.params
, command_option
, param
, true_value
, false_value
, separator
)
120 def _valueless_option(self
, command_option
, param
, expected_value
=True):
121 return cli_valueless_option(self
.params
, command_option
, param
, expected_value
)
123 def _configuration_args(self
, keys
=None, *args
, **kwargs
):
124 return _configuration_args(
125 self
.get_basename(), self
.params
.get('external_downloader_args'), self
.EXE_NAME
,
126 keys
, *args
, **kwargs
)
128 def _call_downloader(self
, tmpfilename
, info_dict
):
129 """ Either overwrite this or implement _make_cmd """
130 cmd
= [encodeArgument(a
) for a
in self
._make
_cmd
(tmpfilename
, info_dict
)]
134 if 'fragments' not in info_dict
:
135 _
, stderr
, returncode
= self
._call
_process
(cmd
, info_dict
)
136 if returncode
and stderr
:
137 self
.to_stderr(stderr
)
140 skip_unavailable_fragments
= self
.params
.get('skip_unavailable_fragments', True)
142 retry_manager
= RetryManager(self
.params
.get('fragment_retries'), self
.report_retry
,
143 frag_index
=None, fatal
=not skip_unavailable_fragments
)
144 for retry
in retry_manager
:
145 _
, stderr
, returncode
= self
._call
_process
(cmd
, info_dict
)
148 # TODO: Decide whether to retry based on error code
149 # https://aria2.github.io/manual/en/html/aria2c.html#exit-status
151 self
.to_stderr(stderr
)
152 retry
.error
= Exception()
154 if not skip_unavailable_fragments
and retry_manager
.error
:
157 decrypt_fragment
= self
.decrypter(info_dict
)
158 dest
, _
= self
.sanitize_open(tmpfilename
, 'wb')
159 for frag_index
, fragment
in enumerate(info_dict
['fragments']):
160 fragment_filename
= '%s-Frag%d' % (tmpfilename
, frag_index
)
162 src
, _
= self
.sanitize_open(fragment_filename
, 'rb')
163 except OSError as err
:
164 if skip_unavailable_fragments
and frag_index
> 1:
165 self
.report_skip_fragment(frag_index
, err
)
167 self
.report_error(f
'Unable to open fragment {frag_index}; {err}')
169 dest
.write(decrypt_fragment(fragment
, src
.read()))
171 if not self
.params
.get('keep_fragments', False):
172 self
.try_remove(encodeFilename(fragment_filename
))
174 self
.try_remove(encodeFilename('%s.frag.urls' % tmpfilename
))
177 def _call_process(self
, cmd
, info_dict
):
178 return Popen
.run(cmd
, text
=True, stderr
=subprocess
.PIPE
if self
._CAPTURE
_STDERR
else None)
181 class CurlFD(ExternalFD
):
183 _CAPTURE_STDERR
= False # curl writes the progress to stderr
185 def _make_cmd(self
, tmpfilename
, info_dict
):
186 cmd
= [self
.exe
, '--location', '-o', tmpfilename
, '--compressed']
187 if info_dict
.get('http_headers') is not None:
188 for key
, val
in info_dict
['http_headers'].items():
189 cmd
+= ['--header', f
'{key}: {val}']
191 cmd
+= self
._bool
_option
('--continue-at', 'continuedl', '-', '0')
192 cmd
+= self
._valueless
_option
('--silent', 'noprogress')
193 cmd
+= self
._valueless
_option
('--verbose', 'verbose')
194 cmd
+= self
._option
('--limit-rate', 'ratelimit')
195 retry
= self
._option
('--retry', 'retries')
197 if retry
[1] in ('inf', 'infinite'):
198 retry
[1] = '2147483647'
200 cmd
+= self
._option
('--max-filesize', 'max_filesize')
201 cmd
+= self
._option
('--interface', 'source_address')
202 cmd
+= self
._option
('--proxy', 'proxy')
203 cmd
+= self
._valueless
_option
('--insecure', 'nocheckcertificate')
204 cmd
+= self
._configuration
_args
()
205 cmd
+= ['--', info_dict
['url']]
209 class AxelFD(ExternalFD
):
212 def _make_cmd(self
, tmpfilename
, info_dict
):
213 cmd
= [self
.exe
, '-o', tmpfilename
]
214 if info_dict
.get('http_headers') is not None:
215 for key
, val
in info_dict
['http_headers'].items():
216 cmd
+= ['-H', f
'{key}: {val}']
217 cmd
+= self
._configuration
_args
()
218 cmd
+= ['--', info_dict
['url']]
222 class WgetFD(ExternalFD
):
223 AVAILABLE_OPT
= '--version'
225 def _make_cmd(self
, tmpfilename
, info_dict
):
226 cmd
= [self
.exe
, '-O', tmpfilename
, '-nv', '--no-cookies', '--compression=auto']
227 if info_dict
.get('http_headers') is not None:
228 for key
, val
in info_dict
['http_headers'].items():
229 cmd
+= ['--header', f
'{key}: {val}']
230 cmd
+= self
._option
('--limit-rate', 'ratelimit')
231 retry
= self
._option
('--tries', 'retries')
233 if retry
[1] in ('inf', 'infinite'):
236 cmd
+= self
._option
('--bind-address', 'source_address')
237 proxy
= self
.params
.get('proxy')
239 for var
in ('http_proxy', 'https_proxy'):
240 cmd
+= ['--execute', f
'{var}={proxy}']
241 cmd
+= self
._valueless
_option
('--no-check-certificate', 'nocheckcertificate')
242 cmd
+= self
._configuration
_args
()
243 cmd
+= ['--', info_dict
['url']]
247 class Aria2cFD(ExternalFD
):
249 SUPPORTED_PROTOCOLS
= ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls')
252 def supports_manifest(manifest
):
253 UNSUPPORTED_FEATURES
= [
254 r
'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1]
255 # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
257 check_results
= (not re
.search(feature
, manifest
) for feature
in UNSUPPORTED_FEATURES
)
258 return all(check_results
)
261 def _aria2c_filename(fn
):
262 return fn
if os
.path
.isabs(fn
) else f
'.{os.path.sep}{fn}'
264 def _call_downloader(self
, tmpfilename
, info_dict
):
265 # FIXME: Disabled due to https://github.com/yt-dlp/yt-dlp/issues/5931
266 if False and 'no-external-downloader-progress' not in self
.params
.get('compat_opts', []):
267 info_dict
['__rpc'] = {
268 'port': find_available_port() or 19190,
269 'secret': str(uuid
.uuid4()),
271 return super()._call
_downloader
(tmpfilename
, info_dict
)
273 def _make_cmd(self
, tmpfilename
, info_dict
):
274 cmd
= [self
.exe
, '-c',
275 '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
276 '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
277 if 'fragments' in info_dict
:
278 cmd
+= ['--allow-overwrite=true', '--allow-piece-length-change=true']
280 cmd
+= ['--min-split-size', '1M']
282 if info_dict
.get('http_headers') is not None:
283 for key
, val
in info_dict
['http_headers'].items():
284 cmd
+= ['--header', f
'{key}: {val}']
285 cmd
+= self
._option
('--max-overall-download-limit', 'ratelimit')
286 cmd
+= self
._option
('--interface', 'source_address')
287 cmd
+= self
._option
('--all-proxy', 'proxy')
288 cmd
+= self
._bool
_option
('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
289 cmd
+= self
._bool
_option
('--remote-time', 'updatetime', 'true', 'false', '=')
290 cmd
+= self
._bool
_option
('--show-console-readout', 'noprogress', 'false', 'true', '=')
291 cmd
+= self
._configuration
_args
()
293 if '__rpc' in info_dict
:
296 f
'--rpc-listen-port={info_dict["__rpc"]["port"]}',
297 f
'--rpc-secret={info_dict["__rpc"]["secret"]}']
299 # aria2c strips out spaces from the beginning/end of filenames and paths.
300 # We work around this issue by adding a "./" to the beginning of the
301 # filename and relative path, and adding a "/" at the end of the path.
302 # See: https://github.com/yt-dlp/yt-dlp/issues/276
303 # https://github.com/ytdl-org/youtube-dl/issues/20312
304 # https://github.com/aria2/aria2/issues/1373
305 dn
= os
.path
.dirname(tmpfilename
)
307 cmd
+= ['--dir', self
._aria
2c
_filename
(dn
) + os
.path
.sep
]
308 if 'fragments' not in info_dict
:
309 cmd
+= ['--out', self
._aria
2c
_filename
(os
.path
.basename(tmpfilename
))]
310 cmd
+= ['--auto-file-renaming=false']
312 if 'fragments' in info_dict
:
313 cmd
+= ['--file-allocation=none', '--uri-selector=inorder']
314 url_list_file
= '%s.frag.urls' % tmpfilename
316 for frag_index
, fragment
in enumerate(info_dict
['fragments']):
317 fragment_filename
= '%s-Frag%d' % (os
.path
.basename(tmpfilename
), frag_index
)
318 url_list
.append('%s\n\tout=%s' % (fragment
['url'], self
._aria
2c
_filename
(fragment_filename
)))
319 stream
, _
= self
.sanitize_open(url_list_file
, 'wb')
320 stream
.write('\n'.join(url_list
).encode())
322 cmd
+= ['-i', self
._aria
2c
_filename
(url_list_file
)]
324 cmd
+= ['--', info_dict
['url']]
327 def aria2c_rpc(self
, rpc_port
, rpc_secret
, method
, params
=()):
328 # Does not actually need to be UUID, just unique
329 sanitycheck
= str(uuid
.uuid4())
334 'params': [f
'token:{rpc_secret}', *params
],
336 request
= sanitized_Request(
337 f
'http://localhost:{rpc_port}/jsonrpc',
339 'Content-Type': 'application/json',
340 'Content-Length': f
'{len(d)}',
341 'Ytdl-request-proxy': '__noproxy__',
343 with self
.ydl
.urlopen(request
) as r
:
345 assert resp
.get('id') == sanitycheck
, 'Something went wrong with RPC server'
346 return resp
['result']
348 def _call_process(self
, cmd
, info_dict
):
349 if '__rpc' not in info_dict
:
350 return super()._call
_process
(cmd
, info_dict
)
352 send_rpc
= functools
.partial(self
.aria2c_rpc
, info_dict
['__rpc']['port'], info_dict
['__rpc']['secret'])
353 started
= time
.time()
355 fragmented
= 'fragments' in info_dict
356 frag_count
= len(info_dict
['fragments']) if fragmented
else 1
358 'filename': info_dict
.get('_filename'),
359 'status': 'downloading',
361 'downloaded_bytes': 0,
362 'fragment_count': frag_count
if fragmented
else None,
363 'fragment_index': 0 if fragmented
else None,
365 self
._hook
_progress
(status
, info_dict
)
367 def get_stat(key
, *obj
, average
=False):
368 val
= tuple(filter(None, map(float, traverse_obj(obj
, (..., ..., key
))))) or [0]
369 return sum(val
) / (len(val
) if average
else 1)
371 with
Popen(cmd
, text
=True, stdout
=subprocess
.DEVNULL
, stderr
=subprocess
.PIPE
) as p
:
372 # Add a small sleep so that RPC client can receive response,
373 # or the connection stalls infinitely
376 while retval
is None:
377 # We don't use tellStatus as we won't know the GID without reading stdout
378 # Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive
379 active
= send_rpc('aria2.tellActive')
380 completed
= send_rpc('aria2.tellStopped', [0, frag_count
])
382 downloaded
= get_stat('totalLength', completed
) + get_stat('completedLength', active
)
383 speed
= get_stat('downloadSpeed', active
)
384 total
= frag_count
* get_stat('totalLength', active
, completed
, average
=True)
385 if total
< downloaded
:
389 'downloaded_bytes': int(downloaded
),
391 'total_bytes': None if fragmented
else total
,
392 'total_bytes_estimate': total
,
393 'eta': (total
- downloaded
) / (speed
or 1),
394 'fragment_index': min(frag_count
, len(completed
) + 1) if fragmented
else None,
395 'elapsed': time
.time() - started
397 self
._hook
_progress
(status
, info_dict
)
399 if not active
and len(completed
) >= frag_count
:
400 send_rpc('aria2.shutdown')
407 return '', p
.stderr
.read(), retval
410 class HttpieFD(ExternalFD
):
411 AVAILABLE_OPT
= '--version'
414 def _make_cmd(self
, tmpfilename
, info_dict
):
415 cmd
= ['http', '--download', '--output', tmpfilename
, info_dict
['url']]
417 if info_dict
.get('http_headers') is not None:
418 for key
, val
in info_dict
['http_headers'].items():
419 cmd
+= [f
'{key}:{val}']
423 class FFmpegFD(ExternalFD
):
424 SUPPORTED_PROTOCOLS
= ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments')
425 SUPPORTED_FEATURES
= (Features
.TO_STDOUT
, Features
.MULTIPLE_FORMATS
)
428 def available(cls
, path
=None):
429 # TODO: Fix path for ffmpeg
430 # Fixme: This may be wrong when --ffmpeg-location is used
431 return FFmpegPostProcessor().available
433 def on_process_started(self
, proc
, stdin
):
434 """ Override this in subclasses """
438 def can_merge_formats(cls
, info_dict
, params
):
440 info_dict
.get('requested_formats')
441 and info_dict
.get('protocol')
442 and not params
.get('allow_unplayable_formats')
443 and 'no-direct-merge' not in params
.get('compat_opts', [])
444 and cls
.can_download(info_dict
))
446 def _call_downloader(self
, tmpfilename
, info_dict
):
447 ffpp
= FFmpegPostProcessor(downloader
=self
)
448 if not ffpp
.available
:
449 self
.report_error('m3u8 download detected but ffmpeg could not be found. Please install')
453 args
= [ffpp
.executable
, '-y']
455 for log_level
in ('quiet', 'verbose'):
456 if self
.params
.get(log_level
, False):
457 args
+= ['-loglevel', log_level
]
459 if not self
.params
.get('verbose'):
460 args
+= ['-hide_banner']
462 args
+= traverse_obj(info_dict
, ('downloader_options', 'ffmpeg_args'), default
=[])
464 # These exists only for compatibility. Extractors should use
465 # info_dict['downloader_options']['ffmpeg_args'] instead
466 args
+= info_dict
.get('_ffmpeg_args') or []
467 seekable
= info_dict
.get('_seekable')
468 if seekable
is not None:
469 # setting -seekable prevents ffmpeg from guessing if the server
470 # supports seeking(by adding the header `Range: bytes=0-`), which
471 # can cause problems in some cases
472 # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
473 # http://trac.ffmpeg.org/ticket/6125#comment:10
474 args
+= ['-seekable', '1' if seekable
else '0']
477 proxy
= self
.params
.get('proxy')
479 if not re
.match(r
'^[\da-zA-Z]+://', proxy
):
480 proxy
= 'http://%s' % proxy
482 if proxy
.startswith('socks'):
484 '%s does not support SOCKS proxies. Downloading is likely to fail. '
485 'Consider adding --hls-prefer-native to your command.' % self
.get_basename())
487 # Since December 2015 ffmpeg supports -http_proxy option (see
488 # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
489 # We could switch to the following code if we are able to detect version properly
490 # args += ['-http_proxy', proxy]
491 env
= os
.environ
.copy()
492 env
['HTTP_PROXY'] = proxy
493 env
['http_proxy'] = proxy
495 protocol
= info_dict
.get('protocol')
497 if protocol
== 'rtmp':
498 player_url
= info_dict
.get('player_url')
499 page_url
= info_dict
.get('page_url')
500 app
= info_dict
.get('app')
501 play_path
= info_dict
.get('play_path')
502 tc_url
= info_dict
.get('tc_url')
503 flash_version
= info_dict
.get('flash_version')
504 live
= info_dict
.get('rtmp_live', False)
505 conn
= info_dict
.get('rtmp_conn')
506 if player_url
is not None:
507 args
+= ['-rtmp_swfverify', player_url
]
508 if page_url
is not None:
509 args
+= ['-rtmp_pageurl', page_url
]
511 args
+= ['-rtmp_app', app
]
512 if play_path
is not None:
513 args
+= ['-rtmp_playpath', play_path
]
514 if tc_url
is not None:
515 args
+= ['-rtmp_tcurl', tc_url
]
516 if flash_version
is not None:
517 args
+= ['-rtmp_flashver', flash_version
]
519 args
+= ['-rtmp_live', 'live']
520 if isinstance(conn
, list):
522 args
+= ['-rtmp_conn', entry
]
523 elif isinstance(conn
, str):
524 args
+= ['-rtmp_conn', conn
]
526 start_time
, end_time
= info_dict
.get('section_start') or 0, info_dict
.get('section_end')
528 selected_formats
= info_dict
.get('requested_formats') or [info_dict
]
529 for i
, fmt
in enumerate(selected_formats
):
530 if fmt
.get('http_headers') and re
.match(r
'^https?://', fmt
['url']):
531 # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
532 # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
533 args
.extend(['-headers', ''.join(f
'{key}: {val}\r\n' for key
, val
in fmt
['http_headers'].items())])
536 args
+= ['-ss', str(start_time
)]
538 args
+= ['-t', str(end_time
- start_time
)]
540 args
+= self
._configuration
_args
((f
'_i{i + 1}', '_i')) + ['-i', fmt
['url']]
542 if not (start_time
or end_time
) or not self
.params
.get('force_keyframes_at_cuts'):
543 args
+= ['-c', 'copy']
545 if info_dict
.get('requested_formats') or protocol
== 'http_dash_segments':
546 for i
, fmt
in enumerate(selected_formats
):
547 stream_number
= fmt
.get('manifest_stream_number', 0)
548 args
.extend(['-map', f
'{i}:{stream_number}'])
550 if self
.params
.get('test', False):
551 args
+= ['-fs', str(self
._TEST
_FILE
_SIZE
)]
553 ext
= info_dict
['ext']
554 if protocol
in ('m3u8', 'm3u8_native'):
555 use_mpegts
= (tmpfilename
== '-') or self
.params
.get('hls_use_mpegts')
556 if use_mpegts
is None:
557 use_mpegts
= info_dict
.get('is_live')
559 args
+= ['-f', 'mpegts']
561 args
+= ['-f', 'mp4']
562 if (ffpp
.basename
== 'ffmpeg' and ffpp
._features
.get('needs_adtstoasc')) and (not info_dict
.get('acodec') or info_dict
['acodec'].split('.')[0] in ('aac', 'mp4a')):
563 args
+= ['-bsf:a', 'aac_adtstoasc']
564 elif protocol
== 'rtmp':
565 args
+= ['-f', 'flv']
566 elif ext
== 'mp4' and tmpfilename
== '-':
567 args
+= ['-f', 'mpegts']
568 elif ext
== 'unknown_video':
569 ext
= determine_ext(remove_end(tmpfilename
, '.part'))
570 if ext
== 'unknown_video':
572 'The video format is unknown and cannot be downloaded by ffmpeg. '
573 'Explicitly set the extension in the filename to attempt download in that format')
575 self
.report_warning(f
'The video format is unknown. Trying to download as {ext} according to the filename')
576 args
+= ['-f', EXT_TO_OUT_FORMATS
.get(ext
, ext
)]
578 args
+= ['-f', EXT_TO_OUT_FORMATS
.get(ext
, ext
)]
580 args
+= self
._configuration
_args
(('_o1', '_o', ''))
582 args
= [encodeArgument(opt
) for opt
in args
]
583 args
.append(encodeFilename(ffpp
._ffmpeg
_filename
_argument
(tmpfilename
), True))
584 self
._debug
_cmd
(args
)
586 piped
= any(fmt
['url'] in ('-', 'pipe:') for fmt
in selected_formats
)
587 with
Popen(args
, stdin
=subprocess
.PIPE
, env
=env
) as proc
:
589 self
.on_process_started(proc
, proc
.stdin
)
592 except BaseException
as e
:
593 # subprocces.run would send the SIGKILL signal to ffmpeg and the
594 # mp4 file couldn't be played, but if we ask ffmpeg to quit it
595 # produces a file that is playable (this is mostly useful for live
596 # streams). Note that Windows is not affected and produces playable
597 # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
598 if isinstance(e
, KeyboardInterrupt) and sys
.platform
!= 'win32' and not piped
:
599 proc
.communicate_or_kill(b
'q')
601 proc
.kill(timeout
=None)
606 class AVconvFD(FFmpegFD
):
611 klass
.get_basename(): klass
612 for name
, klass
in globals().items()
613 if name
.endswith('FD') and name
not in ('ExternalFD', 'FragmentFD')
617 def list_external_downloaders():
618 return sorted(_BY_NAME
.keys())
621 def get_external_downloader(external_downloader
):
622 """ Given the name of the executable, see whether we support the given downloader """
623 bn
= os
.path
.splitext(os
.path
.basename(external_downloader
))[0]
624 return _BY_NAME
.get(bn
) or next((
625 klass
for klass
in _BY_NAME
.values() if klass
.EXE_NAME
in bn