[ie/soundcloud] Various fixes (#11820)
[yt-dlp.git] / yt_dlp / networking / _websockets.py
blobec55567dae7f590838972d1f96a4e8cda8dd2f19
1 from __future__ import annotations
3 import contextlib
4 import functools
5 import io
6 import logging
7 import ssl
8 import sys
10 from ._helper import (
11 create_connection,
12 create_socks_proxy_socket,
13 make_socks_proxy_opts,
14 select_proxy,
16 from .common import Features, Response, register_rh
17 from .exceptions import (
18 CertificateVerifyError,
19 HTTPError,
20 ProxyError,
21 RequestError,
22 SSLError,
23 TransportError,
25 from .websocket import WebSocketRequestHandler, WebSocketResponse
26 from ..dependencies import websockets
27 from ..socks import ProxyError as SocksProxyError
28 from ..utils import int_or_none
30 if not websockets:
31 raise ImportError('websockets is not installed')
33 import websockets.version
35 websockets_version = tuple(map(int_or_none, websockets.version.version.split('.')))
36 if websockets_version < (13, 0):
37 raise ImportError('Only websockets>=13.0 is supported')
39 import websockets.sync.client
40 from websockets.uri import parse_uri
42 # In websockets Connection, recv_exc and recv_events_exc are defined
43 # after the recv events handler thread is started [1].
44 # On our CI using PyPy, in some cases a race condition may occur
45 # where the recv events handler thread tries to use these attributes before they are defined [2].
46 # 1: https://github.com/python-websockets/websockets/blame/de768cf65e7e2b1a3b67854fb9e08816a5ff7050/src/websockets/sync/connection.py#L93
47 # 2: "AttributeError: 'ClientConnection' object has no attribute 'recv_events_exc'. Did you mean: 'recv_events'?"
48 import websockets.sync.connection # isort: split
49 with contextlib.suppress(Exception):
50 websockets.sync.connection.Connection.recv_exc = None
53 class WebsocketsResponseAdapter(WebSocketResponse):
55 def __init__(self, ws: websockets.sync.client.ClientConnection, url):
56 super().__init__(
57 fp=io.BytesIO(ws.response.body or b''),
58 url=url,
59 headers=ws.response.headers,
60 status=ws.response.status_code,
61 reason=ws.response.reason_phrase,
63 self._ws = ws
65 def close(self):
66 self._ws.close()
67 super().close()
69 def send(self, message):
70 # https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.send
71 try:
72 return self._ws.send(message)
73 except (websockets.exceptions.WebSocketException, RuntimeError, TimeoutError) as e:
74 raise TransportError(cause=e) from e
75 except SocksProxyError as e:
76 raise ProxyError(cause=e) from e
77 except TypeError as e:
78 raise RequestError(cause=e) from e
80 def recv(self):
81 # https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.recv
82 try:
83 return self._ws.recv()
84 except SocksProxyError as e:
85 raise ProxyError(cause=e) from e
86 except (websockets.exceptions.WebSocketException, RuntimeError, TimeoutError) as e:
87 raise TransportError(cause=e) from e
90 @register_rh
91 class WebsocketsRH(WebSocketRequestHandler):
92 """
93 Websockets request handler
94 https://websockets.readthedocs.io
95 https://github.com/python-websockets/websockets
96 """
97 _SUPPORTED_URL_SCHEMES = ('wss', 'ws')
98 _SUPPORTED_PROXY_SCHEMES = ('socks4', 'socks4a', 'socks5', 'socks5h')
99 _SUPPORTED_FEATURES = (Features.ALL_PROXY, Features.NO_PROXY)
100 RH_NAME = 'websockets'
102 def __init__(self, *args, **kwargs):
103 super().__init__(*args, **kwargs)
104 self.__logging_handlers = {}
105 for name in ('websockets.client', 'websockets.server'):
106 logger = logging.getLogger(name)
107 handler = logging.StreamHandler(stream=sys.stdout)
108 handler.setFormatter(logging.Formatter(f'{self.RH_NAME}: %(message)s'))
109 self.__logging_handlers[name] = handler
110 logger.addHandler(handler)
111 if self.verbose:
112 logger.setLevel(logging.DEBUG)
114 def _check_extensions(self, extensions):
115 super()._check_extensions(extensions)
116 extensions.pop('timeout', None)
117 extensions.pop('cookiejar', None)
118 extensions.pop('legacy_ssl', None)
120 def close(self):
121 # Remove the logging handler that contains a reference to our logger
122 # See: https://github.com/yt-dlp/yt-dlp/issues/8922
123 for name, handler in self.__logging_handlers.items():
124 logging.getLogger(name).removeHandler(handler)
126 def _send(self, request):
127 timeout = self._calculate_timeout(request)
128 headers = self._merge_headers(request.headers)
129 if 'cookie' not in headers:
130 cookiejar = self._get_cookiejar(request)
131 cookie_header = cookiejar.get_cookie_header(request.url)
132 if cookie_header:
133 headers['cookie'] = cookie_header
135 wsuri = parse_uri(request.url)
136 create_conn_kwargs = {
137 'source_address': (self.source_address, 0) if self.source_address else None,
138 'timeout': timeout,
140 proxy = select_proxy(request.url, self._get_proxies(request))
141 try:
142 if proxy:
143 socks_proxy_options = make_socks_proxy_opts(proxy)
144 sock = create_connection(
145 address=(socks_proxy_options['addr'], socks_proxy_options['port']),
146 _create_socket_func=functools.partial(
147 create_socks_proxy_socket, (wsuri.host, wsuri.port), socks_proxy_options),
148 **create_conn_kwargs,
150 else:
151 sock = create_connection(
152 address=(wsuri.host, wsuri.port),
153 **create_conn_kwargs,
155 ssl_ctx = self._make_sslcontext(legacy_ssl_support=request.extensions.get('legacy_ssl'))
156 conn = websockets.sync.client.connect(
157 sock=sock,
158 uri=request.url,
159 additional_headers=headers,
160 open_timeout=timeout,
161 user_agent_header=None,
162 ssl=ssl_ctx if wsuri.secure else None,
163 close_timeout=0, # not ideal, but prevents yt-dlp hanging
165 return WebsocketsResponseAdapter(conn, url=request.url)
167 # Exceptions as per https://websockets.readthedocs.io/en/stable/reference/sync/client.html
168 except SocksProxyError as e:
169 raise ProxyError(cause=e) from e
170 except websockets.exceptions.InvalidURI as e:
171 raise RequestError(cause=e) from e
172 except ssl.SSLCertVerificationError as e:
173 raise CertificateVerifyError(cause=e) from e
174 except ssl.SSLError as e:
175 raise SSLError(cause=e) from e
176 except websockets.exceptions.InvalidStatus as e:
177 raise HTTPError(
178 Response(
179 fp=io.BytesIO(e.response.body),
180 url=request.url,
181 headers=e.response.headers,
182 status=e.response.status_code,
183 reason=e.response.reason_phrase),
184 ) from e
185 except (OSError, TimeoutError, websockets.exceptions.WebSocketException) as e:
186 raise TransportError(cause=e) from e