6 from ._utils
import remove_start
9 def random_user_agent():
10 _USER_AGENT_TPL
= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
51 return _USER_AGENT_TPL
% random
.choice(_CHROME_VERSIONS
)
54 class HTTPHeaderDict(collections
.UserDict
, dict):
56 Store and access keys case-insensitively.
57 The constructor can take multiple dicts, in which keys in the latter are prioritised.
60 def __init__(self
, *args
, **kwargs
):
67 def __setitem__(self
, key
, value
):
68 if isinstance(value
, bytes
):
69 value
= value
.decode('latin-1')
70 super().__setitem
__(key
.title(), str(value
).strip())
72 def __getitem__(self
, key
):
73 return super().__getitem
__(key
.title())
75 def __delitem__(self
, key
):
76 super().__delitem
__(key
.title())
78 def __contains__(self
, key
):
79 return super().__contains
__(key
.title() if isinstance(key
, str) else key
)
82 std_headers
= HTTPHeaderDict({
83 'User-Agent': random_user_agent(),
84 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
85 'Accept-Language': 'en-us,en;q=0.5',
86 'Sec-Fetch-Mode': 'navigate',
90 def clean_proxies(proxies
: dict, headers
: HTTPHeaderDict
):
91 req_proxy
= headers
.pop('Ytdl-Request-Proxy', None)
93 proxies
.clear() # XXX: compat: Ytdl-Request-Proxy takes preference over everything, including NO_PROXY
94 proxies
['all'] = req_proxy
95 for proxy_key
, proxy_url
in proxies
.items():
96 if proxy_url
== '__noproxy__':
97 proxies
[proxy_key
] = None
99 if proxy_key
== 'no': # special case
101 if proxy_url
is not None:
102 # Ensure proxies without a scheme are http.
104 proxy_scheme
= urllib
.request
._parse
_proxy
(proxy_url
)[0]
106 # Ignore invalid proxy URLs. Sometimes these may be introduced through environment
107 # variables unrelated to proxy settings - e.g. Colab `COLAB_LANGUAGE_SERVER_PROXY`.
108 # If the proxy is going to be used, the Request Handler proxy validation will handle it.
110 if proxy_scheme
is None:
111 proxies
[proxy_key
] = 'http://' + remove_start(proxy_url
, '//')
114 'socks5': 'socks5h', # compat: socks5 was treated as socks5h
115 'socks': 'socks4', # compat: non-standard
117 if proxy_scheme
in replace_scheme
:
118 proxies
[proxy_key
] = urllib
.parse
.urlunparse(
119 urllib
.parse
.urlparse(proxy_url
)._replace
(scheme
=replace_scheme
[proxy_scheme
]))
122 def clean_headers(headers
: HTTPHeaderDict
):
123 if 'Youtubedl-No-Compression' in headers
: # compat
124 del headers
['Youtubedl-No-Compression']
125 headers
['Accept-Encoding'] = 'identity'
126 headers
.pop('Ytdl-socks-proxy', None)
129 def remove_dot_segments(path
):
130 # Implements RFC3986 5.2.4 remote_dot_segments
131 # Pseudo-code: https://tools.ietf.org/html/rfc3986#section-5.2.4
132 # https://github.com/urllib3/urllib3/blob/ba49f5c4e19e6bca6827282feb77a3c9f937e64b/src/urllib3/util/url.py#L263
134 segments
= path
.split('/')
143 if not segments
[0] and (not output
or output
[0]):
145 if segments
[-1] in ('.', '..'):
147 return '/'.join(output
)
150 def escape_rfc3986(s
):
151 """Escape non-ASCII characters as suggested by RFC 3986"""
152 return urllib
.parse
.quote(s
, b
"%/;:@&=+$,!~*'()?#[]")
155 def normalize_url(url
):
156 """Normalize URL as suggested by RFC 3986"""
157 url_parsed
= urllib
.parse
.urlparse(url
)
158 return url_parsed
._replace
(
159 netloc
=url_parsed
.netloc
.encode('idna').decode('ascii'),
160 path
=escape_rfc3986(remove_dot_segments(url_parsed
.path
)),
161 params
=escape_rfc3986(url_parsed
.params
),
162 query
=escape_rfc3986(url_parsed
.query
),
163 fragment
=escape_rfc3986(url_parsed
.fragment
),