1 from __future__
import annotations
11 from ..dependencies
import brotli
, requests
, urllib3
12 from ..utils
import bug_reports_message
, int_or_none
, variadic
13 from ..utils
.networking
import normalize_url
16 raise ImportError('requests module is not installed')
19 raise ImportError('urllib3 module is not installed')
21 urllib3_version
= tuple(int_or_none(x
, default
=0) for x
in urllib3
.__version__
.split('.'))
23 if urllib3_version
< (1, 26, 17):
24 raise ImportError('Only urllib3 >= 1.26.17 is supported')
26 if requests
.__build
__ < 0x023202:
27 raise ImportError('Only requests >= 2.32.2 is supported')
29 import requests
.adapters
31 import urllib3
.connection
32 import urllib3
.exceptions
35 from ._helper
import (
37 add_accept_encoding_header
,
39 create_socks_proxy_socket
,
41 make_socks_proxy_opts
,
51 from .exceptions
import (
52 CertificateVerifyError
,
60 from ..socks
import ProxyError
as SocksProxyError
62 SUPPORTED_ENCODINGS
= [
66 if brotli
is not None:
67 SUPPORTED_ENCODINGS
.append('br')
70 Override urllib3's behavior to not convert lower-case percent-encoded characters
71 to upper-case during url normalization process.
73 RFC3986 defines that the lower or upper case percent-encoded hexidecimal characters are equivalent
74 and normalizers should convert them to uppercase for consistency [1].
76 However, some sites may have an incorrect implementation where they provide
77 a percent-encoded url that is then compared case-sensitively.[2]
79 While this is a very rare case, since urllib does not do this normalization step, it
80 is best to avoid it in requests too for compatability reasons.
82 1: https://tools.ietf.org/html/rfc3986#section-2.1
83 2: https://github.com/streamlink/streamlink/pull/4003
87 class Urllib3PercentREOverride
:
88 def __init__(self
, r
: re
.Pattern
):
91 # pass through all other attribute calls to the original re
92 def __getattr__(self
, item
):
93 return self
.re
.__getattribute
__(item
)
95 def subn(self
, repl
, string
, *args
, **kwargs
):
96 return string
, self
.re
.subn(repl
, string
, *args
, **kwargs
)[1]
99 # urllib3 >= 1.25.8 uses subn:
100 # https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0
101 import urllib3
.util
.url
103 if hasattr(urllib3
.util
.url
, 'PERCENT_RE'):
104 urllib3
.util
.url
.PERCENT_RE
= Urllib3PercentREOverride(urllib3
.util
.url
.PERCENT_RE
)
105 elif hasattr(urllib3
.util
.url
, '_PERCENT_RE'): # urllib3 >= 2.0.0
106 urllib3
.util
.url
._PERCENT
_RE
= Urllib3PercentREOverride(urllib3
.util
.url
._PERCENT
_RE
)
108 warnings
.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message())
111 Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass
112 server_hostname to SSLContext.wrap_socket if server_hostname is an IP,
113 however this is an issue because we set check_hostname to True in our SSLContext.
115 Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless.
117 This has been fixed in urllib3 2.0+.
118 See: https://github.com/urllib3/urllib3/issues/517
121 if urllib3_version
< (2, 0, 0):
122 with contextlib
.suppress(Exception):
123 urllib3
.util
.IS_SECURETRANSPORT
= urllib3
.util
.ssl_
.IS_SECURETRANSPORT
= True
126 # Requests will not automatically handle no_proxy by default
127 # due to buggy no_proxy handling with proxy dict [1].
128 # 1. https://github.com/psf/requests/issues/5000
129 requests
.adapters
.select_proxy
= select_proxy
132 class RequestsResponseAdapter(Response
):
133 def __init__(self
, res
: requests
.models
.Response
):
135 fp
=res
.raw
, headers
=res
.headers
, url
=res
.url
,
136 status
=res
.status_code
, reason
=res
.reason
)
138 self
._requests
_response
= res
140 def read(self
, amt
: int |
None = None):
142 # Interact with urllib3 response directly.
143 return self
.fp
.read(amt
, decode_content
=True)
145 # See urllib3.response.HTTPResponse.read() for exceptions raised on read
146 except urllib3
.exceptions
.SSLError
as e
:
147 raise SSLError(cause
=e
) from e
149 except urllib3
.exceptions
.ProtocolError
as e
:
150 # IncompleteRead is always contained within ProtocolError
151 # See urllib3.response.HTTPResponse._error_catcher()
153 (err
for err
in (e
.__context
__, e
.__cause
__, *variadic(e
.args
))
154 if isinstance(err
, http
.client
.IncompleteRead
)), None)
155 if ir_err
is not None:
156 # `urllib3.exceptions.IncompleteRead` is subclass of `http.client.IncompleteRead`
157 # but uses an `int` for its `partial` property.
158 partial
= ir_err
.partial
if isinstance(ir_err
.partial
, int) else len(ir_err
.partial
)
159 raise IncompleteRead(partial
=partial
, expected
=ir_err
.expected
) from e
160 raise TransportError(cause
=e
) from e
162 except urllib3
.exceptions
.HTTPError
as e
:
163 # catch-all for any other urllib3 response exceptions
164 raise TransportError(cause
=e
) from e
167 class RequestsHTTPAdapter(requests
.adapters
.HTTPAdapter
):
168 def __init__(self
, ssl_context
=None, proxy_ssl_context
=None, source_address
=None, **kwargs
):
171 self
._pm
_args
['ssl_context'] = ssl_context
173 self
._pm
_args
['source_address'] = (source_address
, 0)
174 self
._proxy
_ssl
_context
= proxy_ssl_context
or ssl_context
175 super().__init
__(**kwargs
)
177 def init_poolmanager(self
, *args
, **kwargs
):
178 return super().init_poolmanager(*args
, **kwargs
, **self
._pm
_args
)
180 def proxy_manager_for(self
, proxy
, **proxy_kwargs
):
182 if not proxy
.lower().startswith('socks') and self
._proxy
_ssl
_context
:
183 extra_kwargs
['proxy_ssl_context'] = self
._proxy
_ssl
_context
184 return super().proxy_manager_for(proxy
, **proxy_kwargs
, **self
._pm
_args
, **extra_kwargs
)
186 # Skip `requests` internal verification; we use our own SSLContext
187 def cert_verify(*args
, **kwargs
):
190 # requests 2.32.2+: Reimplementation without `_urllib3_request_context`
191 def get_connection_with_tls_context(self
, request
, verify
, proxies
=None, cert
=None):
192 url
= urllib3
.util
.parse_url(request
.url
).url
194 manager
= self
.poolmanager
195 if proxy
:= select_proxy(url
, proxies
):
196 manager
= self
.proxy_manager_for(proxy
)
198 return manager
.connection_from_url(url
)
201 class RequestsSession(requests
.sessions
.Session
):
203 Ensure unified redirect method handling with our urllib redirect handler.
206 def rebuild_method(self
, prepared_request
, response
):
207 new_method
= get_redirect_method(prepared_request
.method
, response
.status_code
)
209 # HACK: requests removes headers/body on redirect unless code was a 307/308.
210 if new_method
== prepared_request
.method
:
211 response
._real
_status
_code
= response
.status_code
212 response
.status_code
= 308
214 prepared_request
.method
= new_method
216 # Requests fails to resolve dot segments on absolute redirect locations
217 # See: https://github.com/yt-dlp/yt-dlp/issues/9020
218 prepared_request
.url
= normalize_url(prepared_request
.url
)
220 def rebuild_auth(self
, prepared_request
, response
):
221 # HACK: undo status code change from rebuild_method, if applicable.
222 # rebuild_auth runs after requests would remove headers/body based on status code
223 if hasattr(response
, '_real_status_code'):
224 response
.status_code
= response
._real
_status
_code
225 del response
._real
_status
_code
226 return super().rebuild_auth(prepared_request
, response
)
229 class Urllib3LoggingFilter(logging
.Filter
):
231 def filter(self
, record
):
232 # Ignore HTTP request messages since HTTPConnection prints those
233 return record
.msg
!= '%s://%s:%s "%s %s %s" %s %s'
236 class Urllib3LoggingHandler(logging
.Handler
):
237 """Redirect urllib3 logs to our logger"""
239 def __init__(self
, logger
, *args
, **kwargs
):
240 super().__init
__(*args
, **kwargs
)
241 self
._logger
= logger
243 def emit(self
, record
):
245 msg
= self
.format(record
)
246 if record
.levelno
>= logging
.ERROR
:
247 self
._logger
.error(msg
)
249 self
._logger
.stdout(msg
)
252 self
.handleError(record
)
256 class RequestsRH(RequestHandler
, InstanceStoreMixin
):
258 """Requests RequestHandler
259 https://github.com/psf/requests
261 _SUPPORTED_URL_SCHEMES
= ('http', 'https')
262 _SUPPORTED_ENCODINGS
= tuple(SUPPORTED_ENCODINGS
)
263 _SUPPORTED_PROXY_SCHEMES
= ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
264 _SUPPORTED_FEATURES
= (Features
.NO_PROXY
, Features
.ALL_PROXY
)
267 def __init__(self
, *args
, **kwargs
):
268 super().__init
__(*args
, **kwargs
)
270 # Forward urllib3 debug messages to our logger
271 logger
= logging
.getLogger('urllib3')
272 self
.__logging
_handler
= Urllib3LoggingHandler(logger
=self
._logger
)
273 self
.__logging
_handler
.setFormatter(logging
.Formatter('requests: %(message)s'))
274 self
.__logging
_handler
.addFilter(Urllib3LoggingFilter())
275 logger
.addHandler(self
.__logging
_handler
)
276 # TODO: Use a logger filter to suppress pool reuse warning instead
277 logger
.setLevel(logging
.ERROR
)
280 # Setting this globally is not ideal, but is easier than hacking with urllib3.
281 # It could technically be problematic for scripts embedding yt-dlp.
282 # However, it is unlikely debug traffic is used in that context in a way this will cause problems.
283 urllib3
.connection
.HTTPConnection
.debuglevel
= 1
284 logger
.setLevel(logging
.DEBUG
)
285 # this is expected if we are using --no-check-certificate
286 urllib3
.disable_warnings(urllib3
.exceptions
.InsecureRequestWarning
)
289 self
._clear
_instances
()
290 # Remove the logging handler that contains a reference to our logger
291 # See: https://github.com/yt-dlp/yt-dlp/issues/8922
292 logging
.getLogger('urllib3').removeHandler(self
.__logging
_handler
)
294 def _check_extensions(self
, extensions
):
295 super()._check
_extensions
(extensions
)
296 extensions
.pop('cookiejar', None)
297 extensions
.pop('timeout', None)
298 extensions
.pop('legacy_ssl', None)
300 def _create_instance(self
, cookiejar
, legacy_ssl_support
=None):
301 session
= RequestsSession()
302 http_adapter
= RequestsHTTPAdapter(
303 ssl_context
=self
._make
_sslcontext
(legacy_ssl_support
=legacy_ssl_support
),
304 source_address
=self
.source_address
,
305 max_retries
=urllib3
.util
.retry
.Retry(False),
307 session
.adapters
.clear()
308 session
.headers
= requests
.models
.CaseInsensitiveDict({'Connection': 'keep-alive'})
309 session
.mount('https://', http_adapter
)
310 session
.mount('http://', http_adapter
)
311 session
.cookies
= cookiejar
312 session
.trust_env
= False # no need, we already load proxies from env
315 def _send(self
, request
):
317 headers
= self
._merge
_headers
(request
.headers
)
318 add_accept_encoding_header(headers
, SUPPORTED_ENCODINGS
)
320 max_redirects_exceeded
= False
322 session
= self
._get
_instance
(
323 cookiejar
=self
._get
_cookiejar
(request
),
324 legacy_ssl_support
=request
.extensions
.get('legacy_ssl'),
328 requests_res
= session
.request(
329 method
=request
.method
,
333 timeout
=self
._calculate
_timeout
(request
),
334 proxies
=self
._get
_proxies
(request
),
335 allow_redirects
=True,
339 except requests
.exceptions
.TooManyRedirects
as e
:
340 max_redirects_exceeded
= True
341 requests_res
= e
.response
343 except requests
.exceptions
.SSLError
as e
:
344 if 'CERTIFICATE_VERIFY_FAILED' in str(e
):
345 raise CertificateVerifyError(cause
=e
) from e
346 raise SSLError(cause
=e
) from e
348 except requests
.exceptions
.ProxyError
as e
:
349 raise ProxyError(cause
=e
) from e
351 except (requests
.exceptions
.ConnectionError
, requests
.exceptions
.Timeout
) as e
:
352 raise TransportError(cause
=e
) from e
354 except urllib3
.exceptions
.HTTPError
as e
:
355 # Catch any urllib3 exceptions that may leak through
356 raise TransportError(cause
=e
) from e
358 except requests
.exceptions
.RequestException
as e
:
359 # Miscellaneous Requests exceptions. May not necessary be network related e.g. InvalidURL
360 raise RequestError(cause
=e
) from e
362 res
= RequestsResponseAdapter(requests_res
)
364 if not 200 <= res
.status
< 300:
365 raise HTTPError(res
, redirect_loop
=max_redirects_exceeded
)
370 @register_preference(RequestsRH
)
371 def requests_preference(rh
, request
):
375 # Use our socks proxy implementation with requests to avoid an extra dependency.
376 class SocksHTTPConnection(urllib3
.connection
.HTTPConnection
):
377 def __init__(self
, _socks_options
, *args
, **kwargs
): # must use _socks_options to pass PoolKey checks
378 self
._proxy
_args
= _socks_options
379 super().__init
__(*args
, **kwargs
)
383 return create_connection(
384 address
=(self
._proxy
_args
['addr'], self
._proxy
_args
['port']),
385 timeout
=self
.timeout
,
386 source_address
=self
.source_address
,
387 _create_socket_func
=functools
.partial(
388 create_socks_proxy_socket
, (self
.host
, self
.port
), self
._proxy
_args
))
389 except (socket
.timeout
, TimeoutError
) as e
:
390 raise urllib3
.exceptions
.ConnectTimeoutError(
391 self
, f
'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e
392 except SocksProxyError
as e
:
393 raise urllib3
.exceptions
.ProxyError(str(e
), e
) from e
395 raise urllib3
.exceptions
.NewConnectionError(
396 self
, f
'Failed to establish a new connection: {e}') from e
399 class SocksHTTPSConnection(SocksHTTPConnection
, urllib3
.connection
.HTTPSConnection
):
403 class SocksHTTPConnectionPool(urllib3
.HTTPConnectionPool
):
404 ConnectionCls
= SocksHTTPConnection
407 class SocksHTTPSConnectionPool(urllib3
.HTTPSConnectionPool
):
408 ConnectionCls
= SocksHTTPSConnection
411 class SocksProxyManager(urllib3
.PoolManager
):
413 def __init__(self
, socks_proxy
, username
=None, password
=None, num_pools
=10, headers
=None, **connection_pool_kw
):
414 connection_pool_kw
['_socks_options'] = make_socks_proxy_opts(socks_proxy
)
415 super().__init
__(num_pools
, headers
, **connection_pool_kw
)
416 self
.pool_classes_by_scheme
= {
417 'http': SocksHTTPConnectionPool
,
418 'https': SocksHTTPSConnectionPool
,
422 requests
.adapters
.SOCKSProxyManager
= SocksProxyManager