9 from ..dependencies
import brotli
, requests
, urllib3
10 from ..utils
import bug_reports_message
, int_or_none
, variadic
11 from ..utils
.networking
import normalize_url
14 raise ImportError('requests module is not installed')
17 raise ImportError('urllib3 module is not installed')
19 urllib3_version
= tuple(int_or_none(x
, default
=0) for x
in urllib3
.__version__
.split('.'))
21 if urllib3_version
< (1, 26, 17):
22 raise ImportError('Only urllib3 >= 1.26.17 is supported')
24 if requests
.__build
__ < 0x023100:
25 raise ImportError('Only requests >= 2.31.0 is supported')
27 import requests
.adapters
29 import urllib3
.connection
30 import urllib3
.exceptions
32 from ._helper
import (
34 add_accept_encoding_header
,
36 create_socks_proxy_socket
,
38 make_socks_proxy_opts
,
48 from .exceptions
import (
49 CertificateVerifyError
,
57 from ..socks
import ProxyError
as SocksProxyError
59 SUPPORTED_ENCODINGS
= [
63 if brotli
is not None:
64 SUPPORTED_ENCODINGS
.append('br')
67 Override urllib3's behavior to not convert lower-case percent-encoded characters
68 to upper-case during url normalization process.
70 RFC3986 defines that the lower or upper case percent-encoded hexidecimal characters are equivalent
71 and normalizers should convert them to uppercase for consistency [1].
73 However, some sites may have an incorrect implementation where they provide
74 a percent-encoded url that is then compared case-sensitively.[2]
76 While this is a very rare case, since urllib does not do this normalization step, it
77 is best to avoid it in requests too for compatability reasons.
79 1: https://tools.ietf.org/html/rfc3986#section-2.1
80 2: https://github.com/streamlink/streamlink/pull/4003
84 class Urllib3PercentREOverride
:
85 def __init__(self
, r
: re
.Pattern
):
88 # pass through all other attribute calls to the original re
89 def __getattr__(self
, item
):
90 return self
.re
.__getattribute
__(item
)
92 def subn(self
, repl
, string
, *args
, **kwargs
):
93 return string
, self
.re
.subn(repl
, string
, *args
, **kwargs
)[1]
96 # urllib3 >= 1.25.8 uses subn:
97 # https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0
98 import urllib3
.util
.url
# noqa: E305
100 if hasattr(urllib3
.util
.url
, 'PERCENT_RE'):
101 urllib3
.util
.url
.PERCENT_RE
= Urllib3PercentREOverride(urllib3
.util
.url
.PERCENT_RE
)
102 elif hasattr(urllib3
.util
.url
, '_PERCENT_RE'): # urllib3 >= 2.0.0
103 urllib3
.util
.url
._PERCENT
_RE
= Urllib3PercentREOverride(urllib3
.util
.url
._PERCENT
_RE
)
105 warnings
.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message())
108 Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass
109 server_hostname to SSLContext.wrap_socket if server_hostname is an IP,
110 however this is an issue because we set check_hostname to True in our SSLContext.
112 Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless.
114 This has been fixed in urllib3 2.0+.
115 See: https://github.com/urllib3/urllib3/issues/517
118 if urllib3_version
< (2, 0, 0):
119 with contextlib
.suppress(Exception):
120 urllib3
.util
.IS_SECURETRANSPORT
= urllib3
.util
.ssl_
.IS_SECURETRANSPORT
= True
123 # Requests will not automatically handle no_proxy by default
124 # due to buggy no_proxy handling with proxy dict [1].
125 # 1. https://github.com/psf/requests/issues/5000
126 requests
.adapters
.select_proxy
= select_proxy
129 class RequestsResponseAdapter(Response
):
130 def __init__(self
, res
: requests
.models
.Response
):
132 fp
=res
.raw
, headers
=res
.headers
, url
=res
.url
,
133 status
=res
.status_code
, reason
=res
.reason
)
135 self
._requests
_response
= res
137 def read(self
, amt
: int = None):
139 # Interact with urllib3 response directly.
140 return self
.fp
.read(amt
, decode_content
=True)
142 # See urllib3.response.HTTPResponse.read() for exceptions raised on read
143 except urllib3
.exceptions
.SSLError
as e
:
144 raise SSLError(cause
=e
) from e
146 except urllib3
.exceptions
.ProtocolError
as e
:
147 # IncompleteRead is always contained within ProtocolError
148 # See urllib3.response.HTTPResponse._error_catcher()
150 (err
for err
in (e
.__context
__, e
.__cause
__, *variadic(e
.args
))
151 if isinstance(err
, http
.client
.IncompleteRead
)), None)
152 if ir_err
is not None:
153 # `urllib3.exceptions.IncompleteRead` is subclass of `http.client.IncompleteRead`
154 # but uses an `int` for its `partial` property.
155 partial
= ir_err
.partial
if isinstance(ir_err
.partial
, int) else len(ir_err
.partial
)
156 raise IncompleteRead(partial
=partial
, expected
=ir_err
.expected
) from e
157 raise TransportError(cause
=e
) from e
159 except urllib3
.exceptions
.HTTPError
as e
:
160 # catch-all for any other urllib3 response exceptions
161 raise TransportError(cause
=e
) from e
164 class RequestsHTTPAdapter(requests
.adapters
.HTTPAdapter
):
165 def __init__(self
, ssl_context
=None, proxy_ssl_context
=None, source_address
=None, **kwargs
):
168 self
._pm
_args
['ssl_context'] = ssl_context
170 self
._pm
_args
['source_address'] = (source_address
, 0)
171 self
._proxy
_ssl
_context
= proxy_ssl_context
or ssl_context
172 super().__init
__(**kwargs
)
174 def init_poolmanager(self
, *args
, **kwargs
):
175 return super().init_poolmanager(*args
, **kwargs
, **self
._pm
_args
)
177 def proxy_manager_for(self
, proxy
, **proxy_kwargs
):
179 if not proxy
.lower().startswith('socks') and self
._proxy
_ssl
_context
:
180 extra_kwargs
['proxy_ssl_context'] = self
._proxy
_ssl
_context
181 return super().proxy_manager_for(proxy
, **proxy_kwargs
, **self
._pm
_args
, **extra_kwargs
)
183 def cert_verify(*args
, **kwargs
):
184 # lean on SSLContext for cert verification
188 class RequestsSession(requests
.sessions
.Session
):
190 Ensure unified redirect method handling with our urllib redirect handler.
193 def rebuild_method(self
, prepared_request
, response
):
194 new_method
= get_redirect_method(prepared_request
.method
, response
.status_code
)
196 # HACK: requests removes headers/body on redirect unless code was a 307/308.
197 if new_method
== prepared_request
.method
:
198 response
._real
_status
_code
= response
.status_code
199 response
.status_code
= 308
201 prepared_request
.method
= new_method
203 # Requests fails to resolve dot segments on absolute redirect locations
204 # See: https://github.com/yt-dlp/yt-dlp/issues/9020
205 prepared_request
.url
= normalize_url(prepared_request
.url
)
207 def rebuild_auth(self
, prepared_request
, response
):
208 # HACK: undo status code change from rebuild_method, if applicable.
209 # rebuild_auth runs after requests would remove headers/body based on status code
210 if hasattr(response
, '_real_status_code'):
211 response
.status_code
= response
._real
_status
_code
212 del response
._real
_status
_code
213 return super().rebuild_auth(prepared_request
, response
)
216 class Urllib3LoggingFilter(logging
.Filter
):
218 def filter(self
, record
):
219 # Ignore HTTP request messages since HTTPConnection prints those
220 if record
.msg
== '%s://%s:%s "%s %s %s" %s %s':
225 class Urllib3LoggingHandler(logging
.Handler
):
226 """Redirect urllib3 logs to our logger"""
228 def __init__(self
, logger
, *args
, **kwargs
):
229 super().__init
__(*args
, **kwargs
)
230 self
._logger
= logger
232 def emit(self
, record
):
234 msg
= self
.format(record
)
235 if record
.levelno
>= logging
.ERROR
:
236 self
._logger
.error(msg
)
238 self
._logger
.stdout(msg
)
241 self
.handleError(record
)
245 class RequestsRH(RequestHandler
, InstanceStoreMixin
):
247 """Requests RequestHandler
248 https://github.com/psf/requests
250 _SUPPORTED_URL_SCHEMES
= ('http', 'https')
251 _SUPPORTED_ENCODINGS
= tuple(SUPPORTED_ENCODINGS
)
252 _SUPPORTED_PROXY_SCHEMES
= ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
253 _SUPPORTED_FEATURES
= (Features
.NO_PROXY
, Features
.ALL_PROXY
)
256 def __init__(self
, *args
, **kwargs
):
257 super().__init
__(*args
, **kwargs
)
259 # Forward urllib3 debug messages to our logger
260 logger
= logging
.getLogger('urllib3')
261 self
.__logging
_handler
= Urllib3LoggingHandler(logger
=self
._logger
)
262 self
.__logging
_handler
.setFormatter(logging
.Formatter('requests: %(message)s'))
263 self
.__logging
_handler
.addFilter(Urllib3LoggingFilter())
264 logger
.addHandler(self
.__logging
_handler
)
265 # TODO: Use a logger filter to suppress pool reuse warning instead
266 logger
.setLevel(logging
.ERROR
)
269 # Setting this globally is not ideal, but is easier than hacking with urllib3.
270 # It could technically be problematic for scripts embedding yt-dlp.
271 # However, it is unlikely debug traffic is used in that context in a way this will cause problems.
272 urllib3
.connection
.HTTPConnection
.debuglevel
= 1
273 logger
.setLevel(logging
.DEBUG
)
274 # this is expected if we are using --no-check-certificate
275 urllib3
.disable_warnings(urllib3
.exceptions
.InsecureRequestWarning
)
278 self
._clear
_instances
()
279 # Remove the logging handler that contains a reference to our logger
280 # See: https://github.com/yt-dlp/yt-dlp/issues/8922
281 logging
.getLogger('urllib3').removeHandler(self
.__logging
_handler
)
283 def _check_extensions(self
, extensions
):
284 super()._check
_extensions
(extensions
)
285 extensions
.pop('cookiejar', None)
286 extensions
.pop('timeout', None)
288 def _create_instance(self
, cookiejar
):
289 session
= RequestsSession()
290 http_adapter
= RequestsHTTPAdapter(
291 ssl_context
=self
._make
_sslcontext
(),
292 source_address
=self
.source_address
,
293 max_retries
=urllib3
.util
.retry
.Retry(False),
295 session
.adapters
.clear()
296 session
.headers
= requests
.models
.CaseInsensitiveDict({'Connection': 'keep-alive'})
297 session
.mount('https://', http_adapter
)
298 session
.mount('http://', http_adapter
)
299 session
.cookies
= cookiejar
300 session
.trust_env
= False # no need, we already load proxies from env
303 def _send(self
, request
):
305 headers
= self
._merge
_headers
(request
.headers
)
306 add_accept_encoding_header(headers
, SUPPORTED_ENCODINGS
)
308 max_redirects_exceeded
= False
310 session
= self
._get
_instance
(
311 cookiejar
=request
.extensions
.get('cookiejar') or self
.cookiejar
)
314 requests_res
= session
.request(
315 method
=request
.method
,
319 timeout
=float(request
.extensions
.get('timeout') or self
.timeout
),
320 proxies
=request
.proxies
or self
.proxies
,
321 allow_redirects
=True,
325 except requests
.exceptions
.TooManyRedirects
as e
:
326 max_redirects_exceeded
= True
327 requests_res
= e
.response
329 except requests
.exceptions
.SSLError
as e
:
330 if 'CERTIFICATE_VERIFY_FAILED' in str(e
):
331 raise CertificateVerifyError(cause
=e
) from e
332 raise SSLError(cause
=e
) from e
334 except requests
.exceptions
.ProxyError
as e
:
335 raise ProxyError(cause
=e
) from e
337 except (requests
.exceptions
.ConnectionError
, requests
.exceptions
.Timeout
) as e
:
338 raise TransportError(cause
=e
) from e
340 except urllib3
.exceptions
.HTTPError
as e
:
341 # Catch any urllib3 exceptions that may leak through
342 raise TransportError(cause
=e
) from e
344 except requests
.exceptions
.RequestException
as e
:
345 # Miscellaneous Requests exceptions. May not necessary be network related e.g. InvalidURL
346 raise RequestError(cause
=e
) from e
348 res
= RequestsResponseAdapter(requests_res
)
350 if not 200 <= res
.status
< 300:
351 raise HTTPError(res
, redirect_loop
=max_redirects_exceeded
)
356 @register_preference(RequestsRH
)
357 def requests_preference(rh
, request
):
361 # Use our socks proxy implementation with requests to avoid an extra dependency.
362 class SocksHTTPConnection(urllib3
.connection
.HTTPConnection
):
363 def __init__(self
, _socks_options
, *args
, **kwargs
): # must use _socks_options to pass PoolKey checks
364 self
._proxy
_args
= _socks_options
365 super().__init
__(*args
, **kwargs
)
369 return create_connection(
370 address
=(self
._proxy
_args
['addr'], self
._proxy
_args
['port']),
371 timeout
=self
.timeout
,
372 source_address
=self
.source_address
,
373 _create_socket_func
=functools
.partial(
374 create_socks_proxy_socket
, (self
.host
, self
.port
), self
._proxy
_args
))
375 except (socket
.timeout
, TimeoutError
) as e
:
376 raise urllib3
.exceptions
.ConnectTimeoutError(
377 self
, f
'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e
378 except SocksProxyError
as e
:
379 raise urllib3
.exceptions
.ProxyError(str(e
), e
) from e
381 raise urllib3
.exceptions
.NewConnectionError(
382 self
, f
'Failed to establish a new connection: {e}') from e
385 class SocksHTTPSConnection(SocksHTTPConnection
, urllib3
.connection
.HTTPSConnection
):
389 class SocksHTTPConnectionPool(urllib3
.HTTPConnectionPool
):
390 ConnectionCls
= SocksHTTPConnection
393 class SocksHTTPSConnectionPool(urllib3
.HTTPSConnectionPool
):
394 ConnectionCls
= SocksHTTPSConnection
397 class SocksProxyManager(urllib3
.PoolManager
):
399 def __init__(self
, socks_proxy
, username
=None, password
=None, num_pools
=10, headers
=None, **connection_pool_kw
):
400 connection_pool_kw
['_socks_options'] = make_socks_proxy_opts(socks_proxy
)
401 super().__init
__(num_pools
, headers
, **connection_pool_kw
)
402 self
.pool_classes_by_scheme
= {
403 'http': SocksHTTPConnectionPool
,
404 'https': SocksHTTPSConnectionPool
408 requests
.adapters
.SOCKSProxyManager
= SocksProxyManager