[ie/dplay] Fix extractors (#10471)
[yt-dlp3.git] / test / test_networking.py
blob826f11a5614f160665cf87ad9a5d3cf62beb2b65
1 #!/usr/bin/env python3
3 # Allow direct execution
4 import os
5 import sys
7 import pytest
9 from yt_dlp.networking.common import Features, DEFAULT_TIMEOUT
11 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
13 import gzip
14 import http.client
15 import http.cookiejar
16 import http.server
17 import io
18 import logging
19 import pathlib
20 import random
21 import ssl
22 import tempfile
23 import threading
24 import time
25 import urllib.error
26 import urllib.request
27 import warnings
28 import zlib
29 from email.message import Message
30 from http.cookiejar import CookieJar
32 from test.helper import (
33 FakeYDL,
34 http_server_port,
35 validate_and_send,
36 verify_address_availability,
38 from yt_dlp.cookies import YoutubeDLCookieJar
39 from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
40 from yt_dlp.networking import (
41 HEADRequest,
42 PUTRequest,
43 Request,
44 RequestDirector,
45 RequestHandler,
46 Response,
48 from yt_dlp.networking._urllib import UrllibRH
49 from yt_dlp.networking.exceptions import (
50 CertificateVerifyError,
51 HTTPError,
52 IncompleteRead,
53 NoSupportingHandlers,
54 ProxyError,
55 RequestError,
56 SSLError,
57 TransportError,
58 UnsupportedRequest,
60 from yt_dlp.networking.impersonate import (
61 ImpersonateRequestHandler,
62 ImpersonateTarget,
64 from yt_dlp.utils import YoutubeDLError
65 from yt_dlp.utils._utils import _YDLLogger as FakeLogger
66 from yt_dlp.utils.networking import HTTPHeaderDict, std_headers
68 TEST_DIR = os.path.dirname(os.path.abspath(__file__))
71 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
72 protocol_version = 'HTTP/1.1'
73 default_request_version = 'HTTP/1.1'
75 def log_message(self, format, *args):
76 pass
78 def _headers(self):
79 payload = str(self.headers).encode()
80 self.send_response(200)
81 self.send_header('Content-Type', 'application/json')
82 self.send_header('Content-Length', str(len(payload)))
83 self.end_headers()
84 self.wfile.write(payload)
86 def _redirect(self):
87 self.send_response(int(self.path[len('/redirect_'):]))
88 self.send_header('Location', '/method')
89 self.send_header('Content-Length', '0')
90 self.end_headers()
92 def _method(self, method, payload=None):
93 self.send_response(200)
94 self.send_header('Content-Length', str(len(payload or '')))
95 self.send_header('Method', method)
96 self.end_headers()
97 if payload:
98 self.wfile.write(payload)
100 def _status(self, status):
101 payload = f'<html>{status} NOT FOUND</html>'.encode()
102 self.send_response(int(status))
103 self.send_header('Content-Type', 'text/html; charset=utf-8')
104 self.send_header('Content-Length', str(len(payload)))
105 self.end_headers()
106 self.wfile.write(payload)
108 def _read_data(self):
109 if 'Content-Length' in self.headers:
110 return self.rfile.read(int(self.headers['Content-Length']))
111 else:
112 return b''
114 def do_POST(self):
115 data = self._read_data() + str(self.headers).encode()
116 if self.path.startswith('/redirect_'):
117 self._redirect()
118 elif self.path.startswith('/method'):
119 self._method('POST', data)
120 elif self.path.startswith('/headers'):
121 self._headers()
122 else:
123 self._status(404)
125 def do_HEAD(self):
126 if self.path.startswith('/redirect_'):
127 self._redirect()
128 elif self.path.startswith('/method'):
129 self._method('HEAD')
130 else:
131 self._status(404)
133 def do_PUT(self):
134 data = self._read_data() + str(self.headers).encode()
135 if self.path.startswith('/redirect_'):
136 self._redirect()
137 elif self.path.startswith('/method'):
138 self._method('PUT', data)
139 else:
140 self._status(404)
142 def do_GET(self):
143 if self.path == '/video.html':
144 payload = b'<html><video src="/vid.mp4" /></html>'
145 self.send_response(200)
146 self.send_header('Content-Type', 'text/html; charset=utf-8')
147 self.send_header('Content-Length', str(len(payload)))
148 self.end_headers()
149 self.wfile.write(payload)
150 elif self.path == '/vid.mp4':
151 payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
152 self.send_response(200)
153 self.send_header('Content-Type', 'video/mp4')
154 self.send_header('Content-Length', str(len(payload)))
155 self.end_headers()
156 self.wfile.write(payload)
157 elif self.path == '/%E4%B8%AD%E6%96%87.html':
158 payload = b'<html><video src="/vid.mp4" /></html>'
159 self.send_response(200)
160 self.send_header('Content-Type', 'text/html; charset=utf-8')
161 self.send_header('Content-Length', str(len(payload)))
162 self.end_headers()
163 self.wfile.write(payload)
164 elif self.path == '/%c7%9f':
165 payload = b'<html><video src="/vid.mp4" /></html>'
166 self.send_response(200)
167 self.send_header('Content-Type', 'text/html; charset=utf-8')
168 self.send_header('Content-Length', str(len(payload)))
169 self.end_headers()
170 self.wfile.write(payload)
171 elif self.path.startswith('/redirect_loop'):
172 self.send_response(301)
173 self.send_header('Location', self.path)
174 self.send_header('Content-Length', '0')
175 self.end_headers()
176 elif self.path == '/redirect_dotsegments':
177 self.send_response(301)
178 # redirect to /headers but with dot segments before
179 self.send_header('Location', '/a/b/./../../headers')
180 self.send_header('Content-Length', '0')
181 self.end_headers()
182 elif self.path == '/redirect_dotsegments_absolute':
183 self.send_response(301)
184 # redirect to /headers but with dot segments before - absolute url
185 self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers')
186 self.send_header('Content-Length', '0')
187 self.end_headers()
188 elif self.path.startswith('/redirect_'):
189 self._redirect()
190 elif self.path.startswith('/method'):
191 self._method('GET', str(self.headers).encode())
192 elif self.path.startswith('/headers'):
193 self._headers()
194 elif self.path.startswith('/308-to-headers'):
195 self.send_response(308)
196 # redirect to "localhost" for testing cookie redirection handling
197 self.send_header('Location', f'http://localhost:{self.connection.getsockname()[1]}/headers')
198 self.send_header('Content-Length', '0')
199 self.end_headers()
200 elif self.path == '/trailing_garbage':
201 payload = b'<html><video src="/vid.mp4" /></html>'
202 self.send_response(200)
203 self.send_header('Content-Type', 'text/html; charset=utf-8')
204 self.send_header('Content-Encoding', 'gzip')
205 buf = io.BytesIO()
206 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
207 f.write(payload)
208 compressed = buf.getvalue() + b'trailing garbage'
209 self.send_header('Content-Length', str(len(compressed)))
210 self.end_headers()
211 self.wfile.write(compressed)
212 elif self.path == '/302-non-ascii-redirect':
213 new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
214 self.send_response(301)
215 self.send_header('Location', new_url)
216 self.send_header('Content-Length', '0')
217 self.end_headers()
218 elif self.path == '/content-encoding':
219 encodings = self.headers.get('ytdl-encoding', '')
220 payload = b'<html><video src="/vid.mp4" /></html>'
221 for encoding in filter(None, (e.strip() for e in encodings.split(','))):
222 if encoding == 'br' and brotli:
223 payload = brotli.compress(payload)
224 elif encoding == 'gzip':
225 buf = io.BytesIO()
226 with gzip.GzipFile(fileobj=buf, mode='wb') as f:
227 f.write(payload)
228 payload = buf.getvalue()
229 elif encoding == 'deflate':
230 payload = zlib.compress(payload)
231 elif encoding == 'unsupported':
232 payload = b'raw'
233 break
234 else:
235 self._status(415)
236 return
237 self.send_response(200)
238 self.send_header('Content-Encoding', encodings)
239 self.send_header('Content-Length', str(len(payload)))
240 self.end_headers()
241 self.wfile.write(payload)
242 elif self.path.startswith('/gen_'):
243 payload = b'<html></html>'
244 self.send_response(int(self.path[len('/gen_'):]))
245 self.send_header('Content-Type', 'text/html; charset=utf-8')
246 self.send_header('Content-Length', str(len(payload)))
247 self.end_headers()
248 self.wfile.write(payload)
249 elif self.path.startswith('/incompleteread'):
250 payload = b'<html></html>'
251 self.send_response(200)
252 self.send_header('Content-Type', 'text/html; charset=utf-8')
253 self.send_header('Content-Length', '234234')
254 self.end_headers()
255 self.wfile.write(payload)
256 self.finish()
257 elif self.path.startswith('/timeout_'):
258 time.sleep(int(self.path[len('/timeout_'):]))
259 self._headers()
260 elif self.path == '/source_address':
261 payload = str(self.client_address[0]).encode()
262 self.send_response(200)
263 self.send_header('Content-Type', 'text/html; charset=utf-8')
264 self.send_header('Content-Length', str(len(payload)))
265 self.end_headers()
266 self.wfile.write(payload)
267 self.finish()
268 elif self.path == '/get_cookie':
269 self.send_response(200)
270 self.send_header('Set-Cookie', 'test=ytdlp; path=/')
271 self.end_headers()
272 self.finish()
273 else:
274 self._status(404)
276 def send_header(self, keyword, value):
278 Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
279 This is against what is defined in RFC 3986, however we need to test we support this
280 since some sites incorrectly do this.
282 if keyword.lower() == 'connection':
283 return super().send_header(keyword, value)
285 if not hasattr(self, '_headers_buffer'):
286 self._headers_buffer = []
288 self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
291 class TestRequestHandlerBase:
292 @classmethod
293 def setup_class(cls):
294 cls.http_httpd = http.server.ThreadingHTTPServer(
295 ('127.0.0.1', 0), HTTPTestRequestHandler)
296 cls.http_port = http_server_port(cls.http_httpd)
297 cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
298 # FIXME: we should probably stop the http server thread after each test
299 # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
300 cls.http_server_thread.daemon = True
301 cls.http_server_thread.start()
303 # HTTPS server
304 certfn = os.path.join(TEST_DIR, 'testcert.pem')
305 cls.https_httpd = http.server.ThreadingHTTPServer(
306 ('127.0.0.1', 0), HTTPTestRequestHandler)
307 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
308 sslctx.load_cert_chain(certfn, None)
309 cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
310 cls.https_port = http_server_port(cls.https_httpd)
311 cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
312 cls.https_server_thread.daemon = True
313 cls.https_server_thread.start()
316 @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
317 class TestHTTPRequestHandler(TestRequestHandlerBase):
319 def test_verify_cert(self, handler):
320 with handler() as rh:
321 with pytest.raises(CertificateVerifyError):
322 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
324 with handler(verify=False) as rh:
325 r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
326 assert r.status == 200
327 r.close()
329 def test_ssl_error(self, handler):
330 # HTTPS server with too old TLS version
331 # XXX: is there a better way to test this than to create a new server?
332 https_httpd = http.server.ThreadingHTTPServer(
333 ('127.0.0.1', 0), HTTPTestRequestHandler)
334 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
335 https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
336 https_port = http_server_port(https_httpd)
337 https_server_thread = threading.Thread(target=https_httpd.serve_forever)
338 https_server_thread.daemon = True
339 https_server_thread.start()
341 with handler(verify=False) as rh:
342 with pytest.raises(SSLError, match=r'(?i)ssl(?:v3|/tls).alert.handshake.failure') as exc_info:
343 validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
344 assert not issubclass(exc_info.type, CertificateVerifyError)
346 @pytest.mark.skip_handler('CurlCFFI', 'legacy_ssl ignored by CurlCFFI')
347 def test_legacy_ssl_extension(self, handler):
348 # HTTPS server with old ciphers
349 # XXX: is there a better way to test this than to create a new server?
350 https_httpd = http.server.ThreadingHTTPServer(
351 ('127.0.0.1', 0), HTTPTestRequestHandler)
352 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
353 sslctx.maximum_version = ssl.TLSVersion.TLSv1_2
354 sslctx.set_ciphers('SHA1:AESCCM:aDSS:eNULL:aNULL')
355 sslctx.load_cert_chain(os.path.join(TEST_DIR, 'testcert.pem'), None)
356 https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
357 https_port = http_server_port(https_httpd)
358 https_server_thread = threading.Thread(target=https_httpd.serve_forever)
359 https_server_thread.daemon = True
360 https_server_thread.start()
362 with handler(verify=False) as rh:
363 res = validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers', extensions={'legacy_ssl': True}))
364 assert res.status == 200
365 res.close()
367 # Ensure only applies to request extension
368 with pytest.raises(SSLError):
369 validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
371 @pytest.mark.skip_handler('CurlCFFI', 'legacy_ssl ignored by CurlCFFI')
372 def test_legacy_ssl_support(self, handler):
373 # HTTPS server with old ciphers
374 # XXX: is there a better way to test this than to create a new server?
375 https_httpd = http.server.ThreadingHTTPServer(
376 ('127.0.0.1', 0), HTTPTestRequestHandler)
377 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
378 sslctx.maximum_version = ssl.TLSVersion.TLSv1_2
379 sslctx.set_ciphers('SHA1:AESCCM:aDSS:eNULL:aNULL')
380 sslctx.load_cert_chain(os.path.join(TEST_DIR, 'testcert.pem'), None)
381 https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
382 https_port = http_server_port(https_httpd)
383 https_server_thread = threading.Thread(target=https_httpd.serve_forever)
384 https_server_thread.daemon = True
385 https_server_thread.start()
387 with handler(verify=False, legacy_ssl_support=True) as rh:
388 res = validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
389 assert res.status == 200
390 res.close()
392 def test_percent_encode(self, handler):
393 with handler() as rh:
394 # Unicode characters should be encoded with uppercase percent-encoding
395 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
396 assert res.status == 200
397 res.close()
398 # don't normalize existing percent encodings
399 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
400 assert res.status == 200
401 res.close()
403 @pytest.mark.parametrize('path', [
404 '/a/b/./../../headers',
405 '/redirect_dotsegments',
406 # https://github.com/yt-dlp/yt-dlp/issues/9020
407 '/redirect_dotsegments_absolute',
409 def test_remove_dot_segments(self, handler, path):
410 with handler(verbose=True) as rh:
411 # This isn't a comprehensive test,
412 # but it should be enough to check whether the handler is removing dot segments in required scenarios
413 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}'))
414 assert res.status == 200
415 assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
416 res.close()
418 @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi (non-standard)')
419 def test_unicode_path_redirection(self, handler):
420 with handler() as rh:
421 r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
422 assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
423 r.close()
425 def test_raise_http_error(self, handler):
426 with handler() as rh:
427 for bad_status in (400, 500, 599, 302):
428 with pytest.raises(HTTPError):
429 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_{bad_status}'))
431 # Should not raise an error
432 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200')).close()
434 def test_response_url(self, handler):
435 with handler() as rh:
436 # Response url should be that of the last url in redirect chain
437 res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
438 assert res.url == f'http://127.0.0.1:{self.http_port}/method'
439 res.close()
440 res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
441 assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
442 res2.close()
444 # Covers some basic cases we expect some level of consistency between request handlers for
445 @pytest.mark.parametrize('redirect_status,method,expected', [
446 # A 303 must either use GET or HEAD for subsequent request
447 (303, 'POST', ('', 'GET', False)),
448 (303, 'HEAD', ('', 'HEAD', False)),
450 # 301 and 302 turn POST only into a GET
451 (301, 'POST', ('', 'GET', False)),
452 (301, 'HEAD', ('', 'HEAD', False)),
453 (302, 'POST', ('', 'GET', False)),
454 (302, 'HEAD', ('', 'HEAD', False)),
456 # 307 and 308 should not change method
457 (307, 'POST', ('testdata', 'POST', True)),
458 (308, 'POST', ('testdata', 'POST', True)),
459 (307, 'HEAD', ('', 'HEAD', False)),
460 (308, 'HEAD', ('', 'HEAD', False)),
462 def test_redirect(self, handler, redirect_status, method, expected):
463 with handler() as rh:
464 data = b'testdata' if method == 'POST' else None
465 headers = {}
466 if data is not None:
467 headers['Content-Type'] = 'application/test'
468 res = validate_and_send(
469 rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data,
470 headers=headers))
472 headers = b''
473 data_recv = b''
474 if data is not None:
475 data_recv += res.read(len(data))
476 if data_recv != data:
477 headers += data_recv
478 data_recv = b''
480 headers += res.read()
482 assert expected[0] == data_recv.decode()
483 assert expected[1] == res.headers.get('method')
484 assert expected[2] == ('content-length' in headers.decode().lower())
486 def test_request_cookie_header(self, handler):
487 # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
488 with handler() as rh:
489 # Specified Cookie header should be used
490 res = validate_and_send(
491 rh, Request(
492 f'http://127.0.0.1:{self.http_port}/headers',
493 headers={'Cookie': 'test=test'})).read().decode()
494 assert 'cookie: test=test' in res.lower()
496 # Specified Cookie header should be removed on any redirect
497 res = validate_and_send(
498 rh, Request(
499 f'http://127.0.0.1:{self.http_port}/308-to-headers',
500 headers={'Cookie': 'test=test2'})).read().decode()
501 assert 'cookie: test=test2' not in res.lower()
503 # Specified Cookie header should override global cookiejar for that request
504 # Whether cookies from the cookiejar is applied on the redirect is considered undefined for now
505 cookiejar = YoutubeDLCookieJar()
506 cookiejar.set_cookie(http.cookiejar.Cookie(
507 version=0, name='test', value='ytdlp', port=None, port_specified=False,
508 domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
509 path_specified=True, secure=False, expires=None, discard=False, comment=None,
510 comment_url=None, rest={}))
512 with handler(cookiejar=cookiejar) as rh:
513 data = validate_and_send(
514 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test3'})).read()
515 assert b'cookie: test=ytdlp' not in data.lower()
516 assert b'cookie: test=test3' in data.lower()
518 def test_redirect_loop(self, handler):
519 with handler() as rh:
520 with pytest.raises(HTTPError, match='redirect loop'):
521 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
523 def test_incompleteread(self, handler):
524 with handler(timeout=2) as rh:
525 with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'):
526 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/incompleteread')).read()
528 def test_cookies(self, handler):
529 cookiejar = YoutubeDLCookieJar()
530 cookiejar.set_cookie(http.cookiejar.Cookie(
531 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
532 False, '/headers', True, False, None, False, None, None, {}))
534 with handler(cookiejar=cookiejar) as rh:
535 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
536 assert b'cookie: test=ytdlp' in data.lower()
538 # Per request
539 with handler() as rh:
540 data = validate_and_send(
541 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
542 assert b'cookie: test=ytdlp' in data.lower()
544 def test_cookie_sync_only_cookiejar(self, handler):
545 # Ensure that cookies are ONLY being handled by the cookiejar
546 with handler() as rh:
547 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/get_cookie', extensions={'cookiejar': YoutubeDLCookieJar()}))
548 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': YoutubeDLCookieJar()})).read()
549 assert b'cookie: test=ytdlp' not in data.lower()
551 def test_cookie_sync_delete_cookie(self, handler):
552 # Ensure that cookies are ONLY being handled by the cookiejar
553 cookiejar = YoutubeDLCookieJar()
554 with handler(cookiejar=cookiejar) as rh:
555 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/get_cookie'))
556 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
557 assert b'cookie: test=ytdlp' in data.lower()
558 cookiejar.clear_session_cookies()
559 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
560 assert b'cookie: test=ytdlp' not in data.lower()
562 def test_headers(self, handler):
564 with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
565 # Global Headers
566 data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read().lower()
567 assert b'test1: test' in data
569 # Per request headers, merged with global
570 data = validate_and_send(rh, Request(
571 f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read().lower()
572 assert b'test1: test' in data
573 assert b'test2: changed' in data
574 assert b'test2: test2' not in data
575 assert b'test3: test3' in data
577 def test_read_timeout(self, handler):
578 with handler() as rh:
579 # Default timeout is 20 seconds, so this should go through
580 validate_and_send(
581 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
583 with handler(timeout=0.1) as rh:
584 with pytest.raises(TransportError):
585 validate_and_send(
586 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_5'))
588 # Per request timeout, should override handler timeout
589 validate_and_send(
590 rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
592 def test_connect_timeout(self, handler):
593 # nothing should be listening on this port
594 connect_timeout_url = 'http://10.255.255.255'
595 with handler(timeout=0.01) as rh, pytest.raises(TransportError):
596 now = time.time()
597 validate_and_send(rh, Request(connect_timeout_url))
598 assert time.time() - now < DEFAULT_TIMEOUT
600 # Per request timeout, should override handler timeout
601 request = Request(connect_timeout_url, extensions={'timeout': 0.01})
602 with handler() as rh, pytest.raises(TransportError):
603 now = time.time()
604 validate_and_send(rh, request)
605 assert time.time() - now < DEFAULT_TIMEOUT
607 def test_source_address(self, handler):
608 source_address = f'127.0.0.{random.randint(5, 255)}'
609 # on some systems these loopback addresses we need for testing may not be available
610 # see: https://github.com/yt-dlp/yt-dlp/issues/8890
611 verify_address_availability(source_address)
612 with handler(source_address=source_address) as rh:
613 data = validate_and_send(
614 rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
615 assert source_address == data
617 # Not supported by CurlCFFI
618 @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
619 def test_gzip_trailing_garbage(self, handler):
620 with handler() as rh:
621 data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
622 assert data == '<html><video src="/vid.mp4" /></html>'
624 @pytest.mark.skip_handler('CurlCFFI', 'not applicable to curl-cffi')
625 @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
626 def test_brotli(self, handler):
627 with handler() as rh:
628 res = validate_and_send(
629 rh, Request(
630 f'http://127.0.0.1:{self.http_port}/content-encoding',
631 headers={'ytdl-encoding': 'br'}))
632 assert res.headers.get('Content-Encoding') == 'br'
633 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
635 def test_deflate(self, handler):
636 with handler() as rh:
637 res = validate_and_send(
638 rh, Request(
639 f'http://127.0.0.1:{self.http_port}/content-encoding',
640 headers={'ytdl-encoding': 'deflate'}))
641 assert res.headers.get('Content-Encoding') == 'deflate'
642 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
644 def test_gzip(self, handler):
645 with handler() as rh:
646 res = validate_and_send(
647 rh, Request(
648 f'http://127.0.0.1:{self.http_port}/content-encoding',
649 headers={'ytdl-encoding': 'gzip'}))
650 assert res.headers.get('Content-Encoding') == 'gzip'
651 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
653 def test_multiple_encodings(self, handler):
654 with handler() as rh:
655 for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
656 res = validate_and_send(
657 rh, Request(
658 f'http://127.0.0.1:{self.http_port}/content-encoding',
659 headers={'ytdl-encoding': pair}))
660 assert res.headers.get('Content-Encoding') == pair
661 assert res.read() == b'<html><video src="/vid.mp4" /></html>'
663 @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
664 def test_unsupported_encoding(self, handler):
665 with handler() as rh:
666 res = validate_and_send(
667 rh, Request(
668 f'http://127.0.0.1:{self.http_port}/content-encoding',
669 headers={'ytdl-encoding': 'unsupported', 'Accept-Encoding': '*'}))
670 assert res.headers.get('Content-Encoding') == 'unsupported'
671 assert res.read() == b'raw'
673 def test_read(self, handler):
674 with handler() as rh:
675 res = validate_and_send(
676 rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
677 assert res.readable()
678 assert res.read(1) == b'H'
679 assert res.read(3) == b'ost'
680 assert res.read().decode().endswith('\n\n')
681 assert res.read() == b''
683 def test_request_disable_proxy(self, handler):
684 for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
685 # Given the handler is configured with a proxy
686 with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
687 # When a proxy is explicitly set to None for the request
688 res = validate_and_send(
689 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'http': None}))
690 # Then no proxy should be used
691 res.close()
692 assert res.status == 200
694 @pytest.mark.skip_handlers_if(
695 lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY')
696 def test_noproxy(self, handler):
697 for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
698 # Given the handler is configured with a proxy
699 with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
700 for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
701 # When request no proxy includes the request url host
702 nop_response = validate_and_send(
703 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy}))
704 # Then the proxy should not be used
705 assert nop_response.status == 200
706 nop_response.close()
708 @pytest.mark.skip_handlers_if(
709 lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY')
710 def test_allproxy(self, handler):
711 # This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy.
712 # 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures.
713 with handler(proxies={'all': 'http://10.255.255.255'}, timeout=0.1) as rh:
714 with pytest.raises(TransportError):
715 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).close()
717 with handler(timeout=0.1) as rh:
718 with pytest.raises(TransportError):
719 validate_and_send(
720 rh, Request(
721 f'http://127.0.0.1:{self.http_port}/headers', proxies={'all': 'http://10.255.255.255'})).close()
724 @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
725 class TestClientCertificate:
726 @classmethod
727 def setup_class(cls):
728 certfn = os.path.join(TEST_DIR, 'testcert.pem')
729 cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
730 cacertfn = os.path.join(cls.certdir, 'ca.crt')
731 cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
732 sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
733 sslctx.verify_mode = ssl.CERT_REQUIRED
734 sslctx.load_verify_locations(cafile=cacertfn)
735 sslctx.load_cert_chain(certfn, None)
736 cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
737 cls.port = http_server_port(cls.httpd)
738 cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
739 cls.server_thread.daemon = True
740 cls.server_thread.start()
742 def _run_test(self, handler, **handler_kwargs):
743 with handler(
744 # Disable client-side validation of unacceptable self-signed testcert.pem
745 # The test is of a check on the server side, so unaffected
746 verify=False,
747 **handler_kwargs,
748 ) as rh:
749 validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
751 def test_certificate_combined_nopass(self, handler):
752 self._run_test(handler, client_cert={
753 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
756 def test_certificate_nocombined_nopass(self, handler):
757 self._run_test(handler, client_cert={
758 'client_certificate': os.path.join(self.certdir, 'client.crt'),
759 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
762 def test_certificate_combined_pass(self, handler):
763 self._run_test(handler, client_cert={
764 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
765 'client_certificate_password': 'foobar',
768 def test_certificate_nocombined_pass(self, handler):
769 self._run_test(handler, client_cert={
770 'client_certificate': os.path.join(self.certdir, 'client.crt'),
771 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
772 'client_certificate_password': 'foobar',
776 @pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True)
777 class TestHTTPImpersonateRequestHandler(TestRequestHandlerBase):
778 def test_supported_impersonate_targets(self, handler):
779 with handler(headers=std_headers) as rh:
780 # note: this assumes the impersonate request handler supports the impersonate extension
781 for target in rh.supported_targets:
782 res = validate_and_send(rh, Request(
783 f'http://127.0.0.1:{self.http_port}/headers', extensions={'impersonate': target}))
784 assert res.status == 200
785 assert std_headers['user-agent'].lower() not in res.read().decode().lower()
787 def test_response_extensions(self, handler):
788 with handler() as rh:
789 for target in rh.supported_targets:
790 request = Request(
791 f'http://127.0.0.1:{self.http_port}/gen_200', extensions={'impersonate': target})
792 res = validate_and_send(rh, request)
793 assert res.extensions['impersonate'] == rh._get_request_target(request)
795 def test_http_error_response_extensions(self, handler):
796 with handler() as rh:
797 for target in rh.supported_targets:
798 request = Request(
799 f'http://127.0.0.1:{self.http_port}/gen_404', extensions={'impersonate': target})
800 try:
801 validate_and_send(rh, request)
802 except HTTPError as e:
803 res = e.response
804 assert res.extensions['impersonate'] == rh._get_request_target(request)
807 class TestRequestHandlerMisc:
808 """Misc generic tests for request handlers, not related to request or validation testing"""
809 @pytest.mark.parametrize('handler,logger_name', [
810 ('Requests', 'urllib3'),
811 ('Websockets', 'websockets.client'),
812 ('Websockets', 'websockets.server'),
813 ], indirect=['handler'])
814 def test_remove_logging_handler(self, handler, logger_name):
815 # Ensure any logging handlers, which may contain a YoutubeDL instance,
816 # are removed when we close the request handler
817 # See: https://github.com/yt-dlp/yt-dlp/issues/8922
818 logging_handlers = logging.getLogger(logger_name).handlers
819 before_count = len(logging_handlers)
820 rh = handler()
821 assert len(logging_handlers) == before_count + 1
822 rh.close()
823 assert len(logging_handlers) == before_count
826 @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
827 class TestUrllibRequestHandler(TestRequestHandlerBase):
828 def test_file_urls(self, handler):
829 # See https://github.com/ytdl-org/youtube-dl/issues/8227
830 tf = tempfile.NamedTemporaryFile(delete=False)
831 tf.write(b'foobar')
832 tf.close()
833 req = Request(pathlib.Path(tf.name).as_uri())
834 with handler() as rh:
835 with pytest.raises(UnsupportedRequest):
836 rh.validate(req)
838 # Test that urllib never loaded FileHandler
839 with pytest.raises(TransportError):
840 rh.send(req)
842 with handler(enable_file_urls=True) as rh:
843 res = validate_and_send(rh, req)
844 assert res.read() == b'foobar'
845 res.close()
847 os.unlink(tf.name)
849 def test_http_error_returns_content(self, handler):
850 # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
851 def get_response():
852 with handler() as rh:
853 # headers url
854 try:
855 validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
856 except HTTPError as e:
857 return e.response
859 assert get_response().read() == b'<html></html>'
861 def test_verify_cert_error_text(self, handler):
862 # Check the output of the error message
863 with handler() as rh:
864 with pytest.raises(
865 CertificateVerifyError,
866 match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate',
868 validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
870 @pytest.mark.parametrize('req,match,version_check', [
871 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
872 # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
874 Request('http://127.0.0.1', method='GET\n'),
875 'method can\'t contain control characters',
876 lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5),
878 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
879 # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
881 Request('http://127.0.0. 1', method='GET'),
882 'URL can\'t contain control characters',
883 lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3),
885 # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
886 (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
888 def test_httplib_validation_errors(self, handler, req, match, version_check):
889 if version_check and version_check(sys.version_info):
890 pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
892 with handler() as rh:
893 with pytest.raises(RequestError, match=match) as exc_info:
894 validate_and_send(rh, req)
895 assert not isinstance(exc_info.value, TransportError)
898 @pytest.mark.parametrize('handler', ['Requests'], indirect=True)
899 class TestRequestsRequestHandler(TestRequestHandlerBase):
900 @pytest.mark.parametrize('raised,expected', [
901 (lambda: requests.exceptions.ConnectTimeout(), TransportError),
902 (lambda: requests.exceptions.ReadTimeout(), TransportError),
903 (lambda: requests.exceptions.Timeout(), TransportError),
904 (lambda: requests.exceptions.ConnectionError(), TransportError),
905 (lambda: requests.exceptions.ProxyError(), ProxyError),
906 (lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError),
907 (lambda: requests.exceptions.SSLError(), SSLError),
908 (lambda: requests.exceptions.InvalidURL(), RequestError),
909 (lambda: requests.exceptions.InvalidHeader(), RequestError),
910 # catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
911 (lambda: urllib3.exceptions.HTTPError(), TransportError),
912 (lambda: requests.exceptions.RequestException(), RequestError),
913 # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
915 def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
916 with handler() as rh:
917 def mock_get_instance(*args, **kwargs):
918 class MockSession:
919 def request(self, *args, **kwargs):
920 raise raised()
921 return MockSession()
923 monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
925 with pytest.raises(expected) as exc_info:
926 rh.send(Request('http://fake'))
928 assert exc_info.type is expected
930 @pytest.mark.parametrize('raised,expected,match', [
931 (lambda: urllib3.exceptions.SSLError(), SSLError, None),
932 (lambda: urllib3.exceptions.TimeoutError(), TransportError, None),
933 (lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None),
934 (lambda: urllib3.exceptions.ProtocolError(), TransportError, None),
935 (lambda: urllib3.exceptions.DecodeError(), TransportError, None),
936 (lambda: urllib3.exceptions.HTTPError(), TransportError, None), # catch-all
938 lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)),
939 IncompleteRead,
940 '3 bytes read, 4 more expected',
943 lambda: urllib3.exceptions.ProtocolError('error', urllib3.exceptions.IncompleteRead(partial=3, expected=5)),
944 IncompleteRead,
945 '3 bytes read, 5 more expected',
948 def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
949 from requests.models import Response as RequestsResponse
950 from urllib3.response import HTTPResponse as Urllib3Response
952 from yt_dlp.networking._requests import RequestsResponseAdapter
953 requests_res = RequestsResponse()
954 requests_res.raw = Urllib3Response(body=b'', status=200)
955 res = RequestsResponseAdapter(requests_res)
957 def mock_read(*args, **kwargs):
958 raise raised()
959 monkeypatch.setattr(res.fp, 'read', mock_read)
961 with pytest.raises(expected, match=match) as exc_info:
962 res.read()
964 assert exc_info.type is expected
966 def test_close(self, handler, monkeypatch):
967 rh = handler()
968 session = rh._get_instance(cookiejar=rh.cookiejar)
969 called = False
970 original_close = session.close
972 def mock_close(*args, **kwargs):
973 nonlocal called
974 called = True
975 return original_close(*args, **kwargs)
977 monkeypatch.setattr(session, 'close', mock_close)
978 rh.close()
979 assert called
982 @pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True)
983 class TestCurlCFFIRequestHandler(TestRequestHandlerBase):
985 @pytest.mark.parametrize('params,extensions', [
986 ({'impersonate': ImpersonateTarget('chrome', '110')}, {}),
987 ({'impersonate': ImpersonateTarget('chrome', '99')}, {'impersonate': ImpersonateTarget('chrome', '110')}),
989 def test_impersonate(self, handler, params, extensions):
990 with handler(headers=std_headers, **params) as rh:
991 res = validate_and_send(
992 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions=extensions)).read().decode()
993 assert 'sec-ch-ua: "Chromium";v="110"' in res
994 # Check that user agent is added over ours
995 assert 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36' in res
997 def test_headers(self, handler):
998 with handler(headers=std_headers) as rh:
999 # Ensure curl-impersonate overrides our standard headers (usually added
1000 res = validate_and_send(
1001 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={
1002 'impersonate': ImpersonateTarget('safari')}, headers={'x-custom': 'test', 'sec-fetch-mode': 'custom'})).read().decode().lower()
1004 assert std_headers['user-agent'].lower() not in res
1005 assert std_headers['accept-language'].lower() not in res
1006 assert std_headers['sec-fetch-mode'].lower() not in res
1007 # other than UA, custom headers that differ from std_headers should be kept
1008 assert 'sec-fetch-mode: custom' in res
1009 assert 'x-custom: test' in res
1010 # but when not impersonating don't remove std_headers
1011 res = validate_and_send(
1012 rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'x-custom': 'test'})).read().decode().lower()
1013 # std_headers should be present
1014 for k, v in std_headers.items():
1015 assert f'{k}: {v}'.lower() in res
1017 @pytest.mark.parametrize('raised,expected,match', [
1018 (lambda: curl_cffi.requests.errors.RequestsError(
1019 '', code=curl_cffi.const.CurlECode.PARTIAL_FILE), IncompleteRead, None),
1020 (lambda: curl_cffi.requests.errors.RequestsError(
1021 '', code=curl_cffi.const.CurlECode.OPERATION_TIMEDOUT), TransportError, None),
1022 (lambda: curl_cffi.requests.errors.RequestsError(
1023 '', code=curl_cffi.const.CurlECode.RECV_ERROR), TransportError, None),
1025 def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
1026 import curl_cffi.requests
1028 from yt_dlp.networking._curlcffi import CurlCFFIResponseAdapter
1029 curl_res = curl_cffi.requests.Response()
1030 res = CurlCFFIResponseAdapter(curl_res)
1032 def mock_read(*args, **kwargs):
1033 try:
1034 raise raised()
1035 except Exception as e:
1036 e.response = curl_res
1037 raise
1038 monkeypatch.setattr(res.fp, 'read', mock_read)
1040 with pytest.raises(expected, match=match) as exc_info:
1041 res.read()
1043 assert exc_info.type is expected
1045 @pytest.mark.parametrize('raised,expected,match', [
1046 (lambda: curl_cffi.requests.errors.RequestsError(
1047 '', code=curl_cffi.const.CurlECode.OPERATION_TIMEDOUT), TransportError, None),
1048 (lambda: curl_cffi.requests.errors.RequestsError(
1049 '', code=curl_cffi.const.CurlECode.PEER_FAILED_VERIFICATION), CertificateVerifyError, None),
1050 (lambda: curl_cffi.requests.errors.RequestsError(
1051 '', code=curl_cffi.const.CurlECode.SSL_CONNECT_ERROR), SSLError, None),
1052 (lambda: curl_cffi.requests.errors.RequestsError(
1053 '', code=curl_cffi.const.CurlECode.TOO_MANY_REDIRECTS), HTTPError, None),
1054 (lambda: curl_cffi.requests.errors.RequestsError(
1055 '', code=curl_cffi.const.CurlECode.PROXY), ProxyError, None),
1057 def test_request_error_mapping(self, handler, monkeypatch, raised, expected, match):
1058 import curl_cffi.requests
1059 curl_res = curl_cffi.requests.Response()
1060 curl_res.status_code = 301
1062 with handler() as rh:
1063 original_get_instance = rh._get_instance
1065 def mock_get_instance(*args, **kwargs):
1066 instance = original_get_instance(*args, **kwargs)
1068 def request(*_, **__):
1069 try:
1070 raise raised()
1071 except Exception as e:
1072 e.response = curl_res
1073 raise
1074 monkeypatch.setattr(instance, 'request', request)
1075 return instance
1077 monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
1079 with pytest.raises(expected) as exc_info:
1080 rh.send(Request('http://fake'))
1082 assert exc_info.type is expected
1084 def test_response_reader(self, handler):
1085 class FakeResponse:
1086 def __init__(self, raise_error=False):
1087 self.raise_error = raise_error
1088 self.closed = False
1090 def iter_content(self):
1091 yield b'foo'
1092 yield b'bar'
1093 yield b'z'
1094 if self.raise_error:
1095 raise Exception('test')
1097 def close(self):
1098 self.closed = True
1100 from yt_dlp.networking._curlcffi import CurlCFFIResponseReader
1102 res = CurlCFFIResponseReader(FakeResponse())
1103 assert res.readable
1104 assert res.bytes_read == 0
1105 assert res.read(1) == b'f'
1106 assert res.bytes_read == 3
1107 assert res._buffer == b'oo'
1109 assert res.read(2) == b'oo'
1110 assert res.bytes_read == 3
1111 assert res._buffer == b''
1113 assert res.read(2) == b'ba'
1114 assert res.bytes_read == 6
1115 assert res._buffer == b'r'
1117 assert res.read(3) == b'rz'
1118 assert res.bytes_read == 7
1119 assert res._buffer == b''
1120 assert res.closed
1121 assert res._response.closed
1123 # should handle no size param
1124 res2 = CurlCFFIResponseReader(FakeResponse())
1125 assert res2.read() == b'foobarz'
1126 assert res2.bytes_read == 7
1127 assert res2._buffer == b''
1128 assert res2.closed
1130 # should close on an exception
1131 res3 = CurlCFFIResponseReader(FakeResponse(raise_error=True))
1132 with pytest.raises(Exception, match='test'):
1133 res3.read()
1134 assert res3._buffer == b''
1135 assert res3.bytes_read == 7
1136 assert res3.closed
1138 # buffer should be cleared on close
1139 res4 = CurlCFFIResponseReader(FakeResponse())
1140 res4.read(2)
1141 assert res4._buffer == b'o'
1142 res4.close()
1143 assert res4.closed
1144 assert res4._buffer == b''
1147 def run_validation(handler, error, req, **handler_kwargs):
1148 with handler(**handler_kwargs) as rh:
1149 if error:
1150 with pytest.raises(error):
1151 rh.validate(req)
1152 else:
1153 rh.validate(req)
1156 class TestRequestHandlerValidation:
1158 class ValidationRH(RequestHandler):
1159 def _send(self, request):
1160 raise RequestError('test')
1162 class NoCheckRH(ValidationRH):
1163 _SUPPORTED_FEATURES = None
1164 _SUPPORTED_PROXY_SCHEMES = None
1165 _SUPPORTED_URL_SCHEMES = None
1167 def _check_extensions(self, extensions):
1168 extensions.clear()
1170 class HTTPSupportedRH(ValidationRH):
1171 _SUPPORTED_URL_SCHEMES = ('http',)
1173 URL_SCHEME_TESTS = [
1174 # scheme, expected to fail, handler kwargs
1175 ('Urllib', [
1176 ('http', False, {}),
1177 ('https', False, {}),
1178 ('data', False, {}),
1179 ('ftp', False, {}),
1180 ('file', UnsupportedRequest, {}),
1181 ('file', False, {'enable_file_urls': True}),
1183 ('Requests', [
1184 ('http', False, {}),
1185 ('https', False, {}),
1187 ('Websockets', [
1188 ('ws', False, {}),
1189 ('wss', False, {}),
1191 ('CurlCFFI', [
1192 ('http', False, {}),
1193 ('https', False, {}),
1195 (NoCheckRH, [('http', False, {})]),
1196 (ValidationRH, [('http', UnsupportedRequest, {})]),
1199 PROXY_SCHEME_TESTS = [
1200 # proxy scheme, expected to fail
1201 ('Urllib', 'http', [
1202 ('http', False),
1203 ('https', UnsupportedRequest),
1204 ('socks4', False),
1205 ('socks4a', False),
1206 ('socks5', False),
1207 ('socks5h', False),
1208 ('socks', UnsupportedRequest),
1210 ('Requests', 'http', [
1211 ('http', False),
1212 ('https', False),
1213 ('socks4', False),
1214 ('socks4a', False),
1215 ('socks5', False),
1216 ('socks5h', False),
1218 ('CurlCFFI', 'http', [
1219 ('http', False),
1220 ('https', False),
1221 ('socks4', False),
1222 ('socks4a', False),
1223 ('socks5', False),
1224 ('socks5h', False),
1226 ('Websockets', 'ws', [
1227 ('http', UnsupportedRequest),
1228 ('https', UnsupportedRequest),
1229 ('socks4', False),
1230 ('socks4a', False),
1231 ('socks5', False),
1232 ('socks5h', False),
1234 (NoCheckRH, 'http', [('http', False)]),
1235 (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
1236 (NoCheckRH, 'http', [('http', False)]),
1237 (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
1240 PROXY_KEY_TESTS = [
1241 # proxy key, proxy scheme, expected to fail
1242 ('Urllib', 'http', [
1243 ('all', 'http', False),
1244 ('unrelated', 'http', False),
1246 ('Requests', 'http', [
1247 ('all', 'http', False),
1248 ('unrelated', 'http', False),
1250 ('CurlCFFI', 'http', [
1251 ('all', 'http', False),
1252 ('unrelated', 'http', False),
1254 ('Websockets', 'ws', [
1255 ('all', 'socks5', False),
1256 ('unrelated', 'socks5', False),
1258 (NoCheckRH, 'http', [('all', 'http', False)]),
1259 (HTTPSupportedRH, 'http', [('all', 'http', UnsupportedRequest)]),
1260 (HTTPSupportedRH, 'http', [('no', 'http', UnsupportedRequest)]),
1263 EXTENSION_TESTS = [
1264 ('Urllib', 'http', [
1265 ({'cookiejar': 'notacookiejar'}, AssertionError),
1266 ({'cookiejar': YoutubeDLCookieJar()}, False),
1267 ({'cookiejar': CookieJar()}, AssertionError),
1268 ({'timeout': 1}, False),
1269 ({'timeout': 'notatimeout'}, AssertionError),
1270 ({'unsupported': 'value'}, UnsupportedRequest),
1271 ({'legacy_ssl': False}, False),
1272 ({'legacy_ssl': True}, False),
1273 ({'legacy_ssl': 'notabool'}, AssertionError),
1275 ('Requests', 'http', [
1276 ({'cookiejar': 'notacookiejar'}, AssertionError),
1277 ({'cookiejar': YoutubeDLCookieJar()}, False),
1278 ({'timeout': 1}, False),
1279 ({'timeout': 'notatimeout'}, AssertionError),
1280 ({'unsupported': 'value'}, UnsupportedRequest),
1281 ({'legacy_ssl': False}, False),
1282 ({'legacy_ssl': True}, False),
1283 ({'legacy_ssl': 'notabool'}, AssertionError),
1285 ('CurlCFFI', 'http', [
1286 ({'cookiejar': 'notacookiejar'}, AssertionError),
1287 ({'cookiejar': YoutubeDLCookieJar()}, False),
1288 ({'timeout': 1}, False),
1289 ({'timeout': 'notatimeout'}, AssertionError),
1290 ({'unsupported': 'value'}, UnsupportedRequest),
1291 ({'impersonate': ImpersonateTarget('badtarget', None, None, None)}, UnsupportedRequest),
1292 ({'impersonate': 123}, AssertionError),
1293 ({'impersonate': ImpersonateTarget('chrome', None, None, None)}, False),
1294 ({'impersonate': ImpersonateTarget(None, None, None, None)}, False),
1295 ({'impersonate': ImpersonateTarget()}, False),
1296 ({'impersonate': 'chrome'}, AssertionError),
1297 ({'legacy_ssl': False}, False),
1298 ({'legacy_ssl': True}, False),
1299 ({'legacy_ssl': 'notabool'}, AssertionError),
1301 (NoCheckRH, 'http', [
1302 ({'cookiejar': 'notacookiejar'}, False),
1303 ({'somerandom': 'test'}, False), # but any extension is allowed through
1305 ('Websockets', 'ws', [
1306 ({'cookiejar': YoutubeDLCookieJar()}, False),
1307 ({'timeout': 2}, False),
1308 ({'legacy_ssl': False}, False),
1309 ({'legacy_ssl': True}, False),
1310 ({'legacy_ssl': 'notabool'}, AssertionError),
1314 @pytest.mark.parametrize('handler,fail,scheme', [
1315 ('Urllib', False, 'http'),
1316 ('Requests', False, 'http'),
1317 ('CurlCFFI', False, 'http'),
1318 ('Websockets', False, 'ws'),
1319 ], indirect=['handler'])
1320 def test_no_proxy(self, handler, fail, scheme):
1321 run_validation(handler, fail, Request(f'{scheme}://', proxies={'no': '127.0.0.1,github.com'}))
1322 run_validation(handler, fail, Request(f'{scheme}://'), proxies={'no': '127.0.0.1,github.com'})
1324 @pytest.mark.parametrize('handler,scheme', [
1325 ('Urllib', 'http'),
1326 (HTTPSupportedRH, 'http'),
1327 ('Requests', 'http'),
1328 ('CurlCFFI', 'http'),
1329 ('Websockets', 'ws'),
1330 ], indirect=['handler'])
1331 def test_empty_proxy(self, handler, scheme):
1332 run_validation(handler, False, Request(f'{scheme}://', proxies={scheme: None}))
1333 run_validation(handler, False, Request(f'{scheme}://'), proxies={scheme: None})
1335 @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
1336 @pytest.mark.parametrize('handler,scheme', [
1337 ('Urllib', 'http'),
1338 (HTTPSupportedRH, 'http'),
1339 ('Requests', 'http'),
1340 ('CurlCFFI', 'http'),
1341 ('Websockets', 'ws'),
1342 ], indirect=['handler'])
1343 def test_invalid_proxy_url(self, handler, scheme, proxy_url):
1344 run_validation(handler, UnsupportedRequest, Request(f'{scheme}://', proxies={scheme: proxy_url}))
1346 @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
1347 (handler_tests[0], scheme, fail, handler_kwargs)
1348 for handler_tests in URL_SCHEME_TESTS
1349 for scheme, fail, handler_kwargs in handler_tests[1]
1350 ], indirect=['handler'])
1351 def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
1352 run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
1354 @pytest.mark.parametrize('handler,scheme,proxy_key,proxy_scheme,fail', [
1355 (handler_tests[0], handler_tests[1], proxy_key, proxy_scheme, fail)
1356 for handler_tests in PROXY_KEY_TESTS
1357 for proxy_key, proxy_scheme, fail in handler_tests[2]
1358 ], indirect=['handler'])
1359 def test_proxy_key(self, handler, scheme, proxy_key, proxy_scheme, fail):
1360 run_validation(handler, fail, Request(f'{scheme}://', proxies={proxy_key: f'{proxy_scheme}://example.com'}))
1361 run_validation(handler, fail, Request(f'{scheme}://'), proxies={proxy_key: f'{proxy_scheme}://example.com'})
1363 @pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
1364 (handler_tests[0], handler_tests[1], scheme, fail)
1365 for handler_tests in PROXY_SCHEME_TESTS
1366 for scheme, fail in handler_tests[2]
1367 ], indirect=['handler'])
1368 def test_proxy_scheme(self, handler, req_scheme, scheme, fail):
1369 run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
1370 run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
1372 @pytest.mark.parametrize('handler,scheme,extensions,fail', [
1373 (handler_tests[0], handler_tests[1], extensions, fail)
1374 for handler_tests in EXTENSION_TESTS
1375 for extensions, fail in handler_tests[2]
1376 ], indirect=['handler'])
1377 def test_extension(self, handler, scheme, extensions, fail):
1378 run_validation(
1379 handler, fail, Request(f'{scheme}://', extensions=extensions))
1381 def test_invalid_request_type(self):
1382 rh = self.ValidationRH(logger=FakeLogger())
1383 for method in (rh.validate, rh.send):
1384 with pytest.raises(TypeError, match='Expected an instance of Request'):
1385 method('not a request')
1388 class FakeResponse(Response):
1389 def __init__(self, request):
1390 # XXX: we could make request part of standard response interface
1391 self.request = request
1392 super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
1395 class FakeRH(RequestHandler):
1397 def __init__(self, *args, **params):
1398 self.params = params
1399 super().__init__(*args, **params)
1401 def _validate(self, request):
1402 return
1404 def _send(self, request: Request):
1405 if request.url.startswith('ssl://'):
1406 raise SSLError(request.url[len('ssl://'):])
1407 return FakeResponse(request)
1410 class FakeRHYDL(FakeYDL):
1411 def __init__(self, *args, **kwargs):
1412 super().__init__(*args, **kwargs)
1413 self._request_director = self.build_request_director([FakeRH])
1416 class AllUnsupportedRHYDL(FakeYDL):
1418 def __init__(self, *args, **kwargs):
1420 class UnsupportedRH(RequestHandler):
1421 def _send(self, request: Request):
1422 pass
1424 _SUPPORTED_FEATURES = ()
1425 _SUPPORTED_PROXY_SCHEMES = ()
1426 _SUPPORTED_URL_SCHEMES = ()
1428 super().__init__(*args, **kwargs)
1429 self._request_director = self.build_request_director([UnsupportedRH])
1432 class TestRequestDirector:
1434 def test_handler_operations(self):
1435 director = RequestDirector(logger=FakeLogger())
1436 handler = FakeRH(logger=FakeLogger())
1437 director.add_handler(handler)
1438 assert director.handlers.get(FakeRH.RH_KEY) is handler
1440 # Handler should overwrite
1441 handler2 = FakeRH(logger=FakeLogger())
1442 director.add_handler(handler2)
1443 assert director.handlers.get(FakeRH.RH_KEY) is not handler
1444 assert director.handlers.get(FakeRH.RH_KEY) is handler2
1445 assert len(director.handlers) == 1
1447 class AnotherFakeRH(FakeRH):
1448 pass
1449 director.add_handler(AnotherFakeRH(logger=FakeLogger()))
1450 assert len(director.handlers) == 2
1451 assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
1453 director.handlers.pop(FakeRH.RH_KEY, None)
1454 assert director.handlers.get(FakeRH.RH_KEY) is None
1455 assert len(director.handlers) == 1
1457 # RequestErrors should passthrough
1458 with pytest.raises(SSLError):
1459 director.send(Request('ssl://something'))
1461 def test_send(self):
1462 director = RequestDirector(logger=FakeLogger())
1463 with pytest.raises(RequestError):
1464 director.send(Request('any://'))
1465 director.add_handler(FakeRH(logger=FakeLogger()))
1466 assert isinstance(director.send(Request('http://')), FakeResponse)
1468 def test_unsupported_handlers(self):
1469 class SupportedRH(RequestHandler):
1470 _SUPPORTED_URL_SCHEMES = ['http']
1472 def _send(self, request: Request):
1473 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
1475 director = RequestDirector(logger=FakeLogger())
1476 director.add_handler(SupportedRH(logger=FakeLogger()))
1477 director.add_handler(FakeRH(logger=FakeLogger()))
1479 # First should take preference
1480 assert director.send(Request('http://')).read() == b'supported'
1481 assert director.send(Request('any://')).read() == b''
1483 director.handlers.pop(FakeRH.RH_KEY)
1484 with pytest.raises(NoSupportingHandlers):
1485 director.send(Request('any://'))
1487 def test_unexpected_error(self):
1488 director = RequestDirector(logger=FakeLogger())
1490 class UnexpectedRH(FakeRH):
1491 def _send(self, request: Request):
1492 raise TypeError('something')
1494 director.add_handler(UnexpectedRH(logger=FakeLogger))
1495 with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
1496 director.send(Request('any://'))
1498 director.handlers.clear()
1499 assert len(director.handlers) == 0
1501 # Should not be fatal
1502 director.add_handler(FakeRH(logger=FakeLogger()))
1503 director.add_handler(UnexpectedRH(logger=FakeLogger))
1504 assert director.send(Request('any://'))
1506 def test_preference(self):
1507 director = RequestDirector(logger=FakeLogger())
1508 director.add_handler(FakeRH(logger=FakeLogger()))
1510 class SomeRH(RequestHandler):
1511 _SUPPORTED_URL_SCHEMES = ['http']
1513 def _send(self, request: Request):
1514 return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
1516 def some_preference(rh, request):
1517 return (0 if not isinstance(rh, SomeRH)
1518 else 100 if 'prefer' in request.headers
1519 else -1)
1521 director.add_handler(SomeRH(logger=FakeLogger()))
1522 director.preferences.add(some_preference)
1524 assert director.send(Request('http://')).read() == b''
1525 assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
1527 def test_close(self, monkeypatch):
1528 director = RequestDirector(logger=FakeLogger())
1529 director.add_handler(FakeRH(logger=FakeLogger()))
1530 called = False
1532 def mock_close(*args, **kwargs):
1533 nonlocal called
1534 called = True
1536 monkeypatch.setattr(director.handlers[FakeRH.RH_KEY], 'close', mock_close)
1537 director.close()
1538 assert called
1541 # XXX: do we want to move this to test_YoutubeDL.py?
1542 class TestYoutubeDLNetworking:
1544 @staticmethod
1545 def build_handler(ydl, handler: RequestHandler = FakeRH):
1546 return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
1548 def test_compat_opener(self):
1549 with FakeYDL() as ydl:
1550 with warnings.catch_warnings():
1551 warnings.simplefilter('ignore', category=DeprecationWarning)
1552 assert isinstance(ydl._opener, urllib.request.OpenerDirector)
1554 @pytest.mark.parametrize('proxy,expected', [
1555 ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
1556 ('', {'all': '__noproxy__'}),
1557 (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}), # env, set https
1559 def test_proxy(self, proxy, expected, monkeypatch):
1560 monkeypatch.setenv('HTTP_PROXY', 'http://127.0.0.1:8081')
1561 with FakeYDL({'proxy': proxy}) as ydl:
1562 assert ydl.proxies == expected
1564 def test_compat_request(self):
1565 with FakeRHYDL() as ydl:
1566 assert ydl.urlopen('test://')
1567 urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
1568 urllib_req.add_unredirected_header('Cookie', 'bob=bob')
1569 urllib_req.timeout = 2
1570 with warnings.catch_warnings():
1571 warnings.simplefilter('ignore', category=DeprecationWarning)
1572 req = ydl.urlopen(urllib_req).request
1573 assert req.url == urllib_req.get_full_url()
1574 assert req.data == urllib_req.data
1575 assert req.method == urllib_req.get_method()
1576 assert 'X-Test' in req.headers
1577 assert 'Cookie' in req.headers
1578 assert req.extensions.get('timeout') == 2
1580 with pytest.raises(AssertionError):
1581 ydl.urlopen(None)
1583 def test_extract_basic_auth(self):
1584 with FakeRHYDL() as ydl:
1585 res = ydl.urlopen(Request('http://user:pass@foo.bar'))
1586 assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
1588 def test_sanitize_url(self):
1589 with FakeRHYDL() as ydl:
1590 res = ydl.urlopen(Request('httpss://foo.bar'))
1591 assert res.request.url == 'https://foo.bar'
1593 def test_file_urls_error(self):
1594 # use urllib handler
1595 with FakeYDL() as ydl:
1596 with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
1597 ydl.urlopen('file://')
1599 @pytest.mark.parametrize('scheme', (['ws', 'wss']))
1600 def test_websocket_unavailable_error(self, scheme):
1601 with AllUnsupportedRHYDL() as ydl:
1602 with pytest.raises(RequestError, match=r'This request requires WebSocket support'):
1603 ydl.urlopen(f'{scheme}://')
1605 def test_legacy_server_connect_error(self):
1606 with FakeRHYDL() as ydl:
1607 for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1608 with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
1609 ydl.urlopen(f'ssl://{error}')
1611 with pytest.raises(SSLError, match='testerror'):
1612 ydl.urlopen('ssl://testerror')
1614 def test_unsupported_impersonate_target(self):
1615 class FakeImpersonationRHYDL(FakeYDL):
1616 def __init__(self, *args, **kwargs):
1617 class HTTPRH(RequestHandler):
1618 def _send(self, request: Request):
1619 pass
1620 _SUPPORTED_URL_SCHEMES = ('http',)
1621 _SUPPORTED_PROXY_SCHEMES = None
1623 super().__init__(*args, **kwargs)
1624 self._request_director = self.build_request_director([HTTPRH])
1626 with FakeImpersonationRHYDL() as ydl:
1627 with pytest.raises(
1628 RequestError,
1629 match=r'Impersonate target "test" is not available',
1631 ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)}))
1633 def test_unsupported_impersonate_extension(self):
1634 class FakeHTTPRHYDL(FakeYDL):
1635 def __init__(self, *args, **kwargs):
1636 class IRH(ImpersonateRequestHandler):
1637 def _send(self, request: Request):
1638 pass
1640 _SUPPORTED_URL_SCHEMES = ('http',)
1641 _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc'): 'test'}
1642 _SUPPORTED_PROXY_SCHEMES = None
1644 super().__init__(*args, **kwargs)
1645 self._request_director = self.build_request_director([IRH])
1647 with FakeHTTPRHYDL() as ydl:
1648 with pytest.raises(
1649 RequestError,
1650 match=r'Impersonate target "test" is not available',
1652 ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)}))
1654 def test_raise_impersonate_error(self):
1655 with pytest.raises(
1656 YoutubeDLError,
1657 match=r'Impersonate target "test" is not available',
1659 FakeYDL({'impersonate': ImpersonateTarget('test', None, None, None)})
1661 def test_pass_impersonate_param(self, monkeypatch):
1663 class IRH(ImpersonateRequestHandler):
1664 def _send(self, request: Request):
1665 pass
1667 _SUPPORTED_URL_SCHEMES = ('http',)
1668 _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc'): 'test'}
1670 # Bypass the check on initialize
1671 brh = FakeYDL.build_request_director
1672 monkeypatch.setattr(FakeYDL, 'build_request_director', lambda cls, handlers, preferences=None: brh(cls, handlers=[IRH]))
1674 with FakeYDL({
1675 'impersonate': ImpersonateTarget('abc', None, None, None),
1676 }) as ydl:
1677 rh = self.build_handler(ydl, IRH)
1678 assert rh.impersonate == ImpersonateTarget('abc', None, None, None)
1680 def test_get_impersonate_targets(self):
1681 handlers = []
1682 for target_client in ('abc', 'xyz', 'asd'):
1683 class TestRH(ImpersonateRequestHandler):
1684 def _send(self, request: Request):
1685 pass
1686 _SUPPORTED_URL_SCHEMES = ('http',)
1687 _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget(target_client): 'test'}
1688 RH_KEY = target_client
1689 RH_NAME = target_client
1690 handlers.append(TestRH)
1692 with FakeYDL() as ydl:
1693 ydl._request_director = ydl.build_request_director(handlers)
1694 assert set(ydl._get_available_impersonate_targets()) == {
1695 (ImpersonateTarget('xyz'), 'xyz'),
1696 (ImpersonateTarget('abc'), 'abc'),
1697 (ImpersonateTarget('asd'), 'asd'),
1699 assert ydl._impersonate_target_available(ImpersonateTarget('abc'))
1700 assert ydl._impersonate_target_available(ImpersonateTarget())
1701 assert not ydl._impersonate_target_available(ImpersonateTarget('zxy'))
1703 @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
1704 ('http', '__noproxy__', None),
1705 ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
1706 ('https', 'example.com', 'http://example.com'),
1707 ('https', '//example.com', 'http://example.com'),
1708 ('https', 'socks5://example.com', 'socks5h://example.com'),
1709 ('http', 'socks://example.com', 'socks4://example.com'),
1710 ('http', 'socks4://example.com', 'socks4://example.com'),
1711 ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
1713 def test_clean_proxy(self, proxy_key, proxy_url, expected, monkeypatch):
1714 # proxies should be cleaned in urlopen()
1715 with FakeRHYDL() as ydl:
1716 req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
1717 assert req.proxies[proxy_key] == expected
1719 # and should also be cleaned when building the handler
1720 monkeypatch.setenv(f'{proxy_key.upper()}_PROXY', proxy_url)
1721 with FakeYDL() as ydl:
1722 rh = self.build_handler(ydl)
1723 assert rh.proxies[proxy_key] == expected
1725 def test_clean_proxy_header(self):
1726 with FakeRHYDL() as ydl:
1727 req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
1728 assert 'ytdl-request-proxy' not in req.headers
1729 assert req.proxies == {'all': 'http://foo.bar'}
1731 with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
1732 rh = self.build_handler(ydl)
1733 assert 'ytdl-request-proxy' not in rh.headers
1734 assert rh.proxies == {'all': 'http://foo.bar'}
1736 def test_clean_header(self):
1737 with FakeRHYDL() as ydl:
1738 res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
1739 assert 'Youtubedl-no-compression' not in res.request.headers
1740 assert res.request.headers.get('Accept-Encoding') == 'identity'
1742 with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
1743 rh = self.build_handler(ydl)
1744 assert 'Youtubedl-no-compression' not in rh.headers
1745 assert rh.headers.get('Accept-Encoding') == 'identity'
1747 with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl:
1748 rh = self.build_handler(ydl)
1749 assert 'Ytdl-socks-proxy' not in rh.headers
1751 def test_build_handler_params(self):
1752 with FakeYDL({
1753 'http_headers': {'test': 'testtest'},
1754 'socket_timeout': 2,
1755 'proxy': 'http://127.0.0.1:8080',
1756 'source_address': '127.0.0.45',
1757 'debug_printtraffic': True,
1758 'compat_opts': ['no-certifi'],
1759 'nocheckcertificate': True,
1760 'legacyserverconnect': True,
1761 }) as ydl:
1762 rh = self.build_handler(ydl)
1763 assert rh.headers.get('test') == 'testtest'
1764 assert 'Accept' in rh.headers # ensure std_headers are still there
1765 assert rh.timeout == 2
1766 assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
1767 assert rh.source_address == '127.0.0.45'
1768 assert rh.verbose is True
1769 assert rh.prefer_system_certs is True
1770 assert rh.verify is False
1771 assert rh.legacy_ssl_support is True
1773 @pytest.mark.parametrize('ydl_params', [
1774 {'client_certificate': 'fakecert.crt'},
1775 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
1776 {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1777 {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
1779 def test_client_certificate(self, ydl_params):
1780 with FakeYDL(ydl_params) as ydl:
1781 rh = self.build_handler(ydl)
1782 assert rh._client_cert == ydl_params # XXX: Too bound to implementation
1784 def test_urllib_file_urls(self):
1785 with FakeYDL({'enable_file_urls': False}) as ydl:
1786 rh = self.build_handler(ydl, UrllibRH)
1787 assert rh.enable_file_urls is False
1789 with FakeYDL({'enable_file_urls': True}) as ydl:
1790 rh = self.build_handler(ydl, UrllibRH)
1791 assert rh.enable_file_urls is True
1793 def test_compat_opt_prefer_urllib(self):
1794 # This assumes urllib only has a preference when this compat opt is given
1795 with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl:
1796 director = ydl.build_request_director([UrllibRH])
1797 assert len(director.preferences) == 1
1798 assert director.preferences.pop()(UrllibRH, None)
1801 class TestRequest:
1803 def test_query(self):
1804 req = Request('http://example.com?q=something', query={'v': 'xyz'})
1805 assert req.url == 'http://example.com?q=something&v=xyz'
1807 req.update(query={'v': '123'})
1808 assert req.url == 'http://example.com?q=something&v=123'
1809 req.update(url='http://example.com', query={'v': 'xyz'})
1810 assert req.url == 'http://example.com?v=xyz'
1812 def test_method(self):
1813 req = Request('http://example.com')
1814 assert req.method == 'GET'
1815 req.data = b'test'
1816 assert req.method == 'POST'
1817 req.data = None
1818 assert req.method == 'GET'
1819 req.data = b'test2'
1820 req.method = 'PUT'
1821 assert req.method == 'PUT'
1822 req.data = None
1823 assert req.method == 'PUT'
1824 with pytest.raises(TypeError):
1825 req.method = 1
1827 def test_request_helpers(self):
1828 assert HEADRequest('http://example.com').method == 'HEAD'
1829 assert PUTRequest('http://example.com').method == 'PUT'
1831 def test_headers(self):
1832 req = Request('http://example.com', headers={'tesT': 'test'})
1833 assert req.headers == HTTPHeaderDict({'test': 'test'})
1834 req.update(headers={'teSt2': 'test2'})
1835 assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
1837 req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
1838 assert req.headers == HTTPHeaderDict({'test': 'test'})
1839 assert req.headers is new_headers
1841 # test converts dict to case insensitive dict
1842 req.headers = new_headers = {'test2': 'test2'}
1843 assert isinstance(req.headers, HTTPHeaderDict)
1844 assert req.headers is not new_headers
1846 with pytest.raises(TypeError):
1847 req.headers = None
1849 def test_data_type(self):
1850 req = Request('http://example.com')
1851 assert req.data is None
1852 # test bytes is allowed
1853 req.data = b'test'
1854 assert req.data == b'test'
1855 # test iterable of bytes is allowed
1856 i = [b'test', b'test2']
1857 req.data = i
1858 assert req.data == i
1860 # test file-like object is allowed
1861 f = io.BytesIO(b'test')
1862 req.data = f
1863 assert req.data == f
1865 # common mistake: test str not allowed
1866 with pytest.raises(TypeError):
1867 req.data = 'test'
1868 assert req.data != 'test'
1870 # common mistake: test dict is not allowed
1871 with pytest.raises(TypeError):
1872 req.data = {'test': 'test'}
1873 assert req.data != {'test': 'test'}
1875 def test_content_length_header(self):
1876 req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
1877 assert req.headers.get('Content-Length') == '0'
1879 req.data = b'test'
1880 assert 'Content-Length' not in req.headers
1882 req = Request('http://example.com', headers={'Content-Length': '10'})
1883 assert 'Content-Length' not in req.headers
1885 def test_content_type_header(self):
1886 req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
1887 assert req.headers.get('Content-Type') == 'test'
1888 req.data = b'test2'
1889 assert req.headers.get('Content-Type') == 'test'
1890 req.data = None
1891 assert 'Content-Type' not in req.headers
1892 req.data = b'test3'
1893 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1895 def test_update_req(self):
1896 req = Request('http://example.com')
1897 assert req.data is None
1898 assert req.method == 'GET'
1899 assert 'Content-Type' not in req.headers
1900 # Test that zero-byte payloads will be sent
1901 req.update(data=b'')
1902 assert req.data == b''
1903 assert req.method == 'POST'
1904 assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
1906 def test_proxies(self):
1907 req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
1908 assert req.proxies == {'http': 'http://127.0.0.1:8080'}
1910 def test_extensions(self):
1911 req = Request(url='http://example.com', extensions={'timeout': 2})
1912 assert req.extensions == {'timeout': 2}
1914 def test_copy(self):
1915 req = Request(
1916 url='http://example.com',
1917 extensions={'cookiejar': CookieJar()},
1918 headers={'Accept-Encoding': 'br'},
1919 proxies={'http': 'http://127.0.0.1'},
1920 data=[b'123'],
1922 req_copy = req.copy()
1923 assert req_copy is not req
1924 assert req_copy.url == req.url
1925 assert req_copy.headers == req.headers
1926 assert req_copy.headers is not req.headers
1927 assert req_copy.proxies == req.proxies
1928 assert req_copy.proxies is not req.proxies
1930 # Data is not able to be copied
1931 assert req_copy.data == req.data
1932 assert req_copy.data is req.data
1934 # Shallow copy extensions
1935 assert req_copy.extensions is not req.extensions
1936 assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
1938 # Subclasses are copied by default
1939 class AnotherRequest(Request):
1940 pass
1942 req = AnotherRequest(url='http://127.0.0.1')
1943 assert isinstance(req.copy(), AnotherRequest)
1945 def test_url(self):
1946 req = Request(url='https://фtest.example.com/ some spaceв?ä=c')
1947 assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
1949 assert Request(url='//example.com').url == 'http://example.com'
1951 with pytest.raises(TypeError):
1952 Request(url='https://').url = None
1955 class TestResponse:
1957 @pytest.mark.parametrize('reason,status,expected', [
1958 ('custom', 200, 'custom'),
1959 (None, 404, 'Not Found'), # fallback status
1960 ('', 403, 'Forbidden'),
1961 (None, 999, None),
1963 def test_reason(self, reason, status, expected):
1964 res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
1965 assert res.reason == expected
1967 def test_headers(self):
1968 headers = Message()
1969 headers.add_header('Test', 'test')
1970 headers.add_header('Test', 'test2')
1971 headers.add_header('content-encoding', 'br')
1972 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1973 assert res.headers.get_all('test') == ['test', 'test2']
1974 assert 'Content-Encoding' in res.headers
1976 def test_get_header(self):
1977 headers = Message()
1978 headers.add_header('Set-Cookie', 'cookie1')
1979 headers.add_header('Set-cookie', 'cookie2')
1980 headers.add_header('Test', 'test')
1981 headers.add_header('Test', 'test2')
1982 res = Response(io.BytesIO(b''), headers=headers, url='test://')
1983 assert res.get_header('test') == 'test, test2'
1984 assert res.get_header('set-Cookie') == 'cookie1'
1985 assert res.get_header('notexist', 'default') == 'default'
1987 def test_compat(self):
1988 res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
1989 with warnings.catch_warnings():
1990 warnings.simplefilter('ignore', category=DeprecationWarning)
1991 assert res.code == res.getcode() == res.status
1992 assert res.geturl() == res.url
1993 assert res.info() is res.headers
1994 assert res.getheader('test') == res.get_header('test')
1997 class TestImpersonateTarget:
1998 @pytest.mark.parametrize('target_str,expected', [
1999 ('abc', ImpersonateTarget('abc', None, None, None)),
2000 ('abc-120_esr', ImpersonateTarget('abc', '120_esr', None, None)),
2001 ('abc-120:xyz', ImpersonateTarget('abc', '120', 'xyz', None)),
2002 ('abc-120:xyz-5.6', ImpersonateTarget('abc', '120', 'xyz', '5.6')),
2003 ('abc:xyz', ImpersonateTarget('abc', None, 'xyz', None)),
2004 ('abc:', ImpersonateTarget('abc', None, None, None)),
2005 ('abc-120:', ImpersonateTarget('abc', '120', None, None)),
2006 (':xyz', ImpersonateTarget(None, None, 'xyz', None)),
2007 (':xyz-6.5', ImpersonateTarget(None, None, 'xyz', '6.5')),
2008 (':', ImpersonateTarget(None, None, None, None)),
2009 ('', ImpersonateTarget(None, None, None, None)),
2011 def test_target_from_str(self, target_str, expected):
2012 assert ImpersonateTarget.from_str(target_str) == expected
2014 @pytest.mark.parametrize('target_str', [
2015 '-120', ':-12.0', '-12:-12', '-:-',
2016 '::', 'a-c-d:', 'a-c-d:e-f-g', 'a:b:',
2018 def test_target_from_invalid_str(self, target_str):
2019 with pytest.raises(ValueError):
2020 ImpersonateTarget.from_str(target_str)
2022 @pytest.mark.parametrize('target,expected', [
2023 (ImpersonateTarget('abc', None, None, None), 'abc'),
2024 (ImpersonateTarget('abc', '120', None, None), 'abc-120'),
2025 (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'),
2026 (ImpersonateTarget('abc', '120', 'xyz', '5'), 'abc-120:xyz-5'),
2027 (ImpersonateTarget('abc', None, 'xyz', None), 'abc:xyz'),
2028 (ImpersonateTarget('abc', '120', None, None), 'abc-120'),
2029 (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'),
2030 (ImpersonateTarget('abc', None, 'xyz'), 'abc:xyz'),
2031 (ImpersonateTarget(None, None, 'xyz', '6.5'), ':xyz-6.5'),
2032 (ImpersonateTarget('abc'), 'abc'),
2033 (ImpersonateTarget(None, None, None, None), ''),
2035 def test_str(self, target, expected):
2036 assert str(target) == expected
2038 @pytest.mark.parametrize('args', [
2039 ('abc', None, None, '5'),
2040 ('abc', '120', None, '5'),
2041 (None, '120', None, None),
2042 (None, '120', None, '5'),
2043 (None, None, None, '5'),
2044 (None, '120', 'xyz', '5'),
2046 def test_invalid_impersonate_target(self, args):
2047 with pytest.raises(ValueError):
2048 ImpersonateTarget(*args)
2050 @pytest.mark.parametrize('target1,target2,is_in,is_eq', [
2051 (ImpersonateTarget('abc', None, None, None), ImpersonateTarget('abc', None, None, None), True, True),
2052 (ImpersonateTarget('abc', None, None, None), ImpersonateTarget('abc', '120', None, None), True, False),
2053 (ImpersonateTarget('abc', None, 'xyz', 'test'), ImpersonateTarget('abc', '120', 'xyz', None), True, False),
2054 (ImpersonateTarget('abc', '121', 'xyz', 'test'), ImpersonateTarget('abc', '120', 'xyz', 'test'), False, False),
2055 (ImpersonateTarget('abc'), ImpersonateTarget('abc', '120', 'xyz', 'test'), True, False),
2056 (ImpersonateTarget('abc', '120', 'xyz', 'test'), ImpersonateTarget('abc'), True, False),
2057 (ImpersonateTarget(), ImpersonateTarget('abc', '120', 'xyz'), True, False),
2058 (ImpersonateTarget(), ImpersonateTarget(), True, True),
2060 def test_impersonate_target_in(self, target1, target2, is_in, is_eq):
2061 assert (target1 in target2) is is_in
2062 assert (target1 == target2) is is_eq