4 # Christophe DUMEZ (chris@qbittorrent.org)
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
9 # * Redistributions of source code must retain the above copyright notice,
10 # this list of conditions and the following disclaimer.
11 # * Redistributions in binary form must reproduce the above copyright
12 # notice, this list of conditions and the following disclaimer in the
13 # documentation and/or other materials provided with the distribution.
14 # * Neither the name of the author nor the names of its contributors may be
15 # used to endorse or promote products derived from this software without
16 # specific prior written permission.
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 # POSSIBILITY OF SUCH DAMAGE.
42 from collections
.abc
import Mapping
43 from typing
import Any
, Optional
46 def getBrowserUserAgent() -> str:
47 """ Disguise as browser to circumvent website blocking """
49 # Firefox release calendar
50 # https://whattrainisitnow.com/calendar/
51 # https://wiki.mozilla.org/index.php?title=Release_Management/Calendar&redirect=no
53 baseDate
= datetime
.date(2024, 4, 16)
56 nowDate
= datetime
.date
.today()
57 nowVersion
= baseVersion
+ ((nowDate
- baseDate
).days
// 30)
59 return f
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:{nowVersion}.0) Gecko/20100101 Firefox/{nowVersion}.0"
62 headers
: dict[str, Any
] = {'User-Agent': getBrowserUserAgent()}
64 # SOCKS5 Proxy support
65 if "sock_proxy" in os
.environ
and len(os
.environ
["sock_proxy"].strip()) > 0:
66 proxy_str
= os
.environ
["sock_proxy"].strip()
67 m
= re
.match(r
"^(?:(?P<username>[^:]+):(?P<password>[^@]+)@)?(?P<host>[^:]+):(?P<port>\w+)$",
70 socks
.setdefaultproxy(socks
.PROXY_TYPE_SOCKS5
, m
.group('host'),
71 int(m
.group('port')), True, m
.group('username'), m
.group('password'))
72 socket
.socket
= socks
.socksocket
# type: ignore[misc]
75 def htmlentitydecode(s
: str) -> str:
76 # First convert alpha entities (such as é)
77 # (Inspired from http://mail.python.org/pipermail/python-list/2007-June/443813.html)
78 def entity2char(m
: re
.Match
[str]) -> str:
80 if entity
in html
.entities
.name2codepoint
:
81 return chr(html
.entities
.name2codepoint
[entity
])
82 return " " # Unknown entity: We replace with a space.
83 t
= re
.sub('&(%s);' % '|'.join(html
.entities
.name2codepoint
), entity2char
, s
)
85 # Then convert numerical entities (such as é)
86 t
= re
.sub(r
'&#(\d+);', lambda x
: chr(int(x
.group(1))), t
)
88 # Then convert hexa entities (such as é)
89 return re
.sub(r
'&#x(\w+);', lambda x
: chr(int(x
.group(1), 16)), t
)
92 def retrieve_url(url
: str, custom_headers
: Mapping
[str, Any
] = {}, request_data
: Optional
[Any
] = None) -> str:
93 """ Return the content of the url page as a string """
95 request
= urllib
.request
.Request(url
, request_data
, {**headers
, **custom_headers
})
97 response
= urllib
.request
.urlopen(request
)
98 except urllib
.error
.URLError
as errno
:
99 print(f
"Connection error: {errno.reason}", file=sys
.stderr
)
101 data
: bytes
= response
.read()
103 # Check if it is gzipped
104 if data
[:2] == b
'\x1f\x8b':
105 # Data is gzip encoded, decode it
106 with io
.BytesIO(data
) as compressedStream
, gzip
.GzipFile(fileobj
=compressedStream
) as gzipper
:
107 data
= gzipper
.read()
111 charset
= response
.getheader('Content-Type', '').split('charset=', 1)[1]
115 dataStr
= data
.decode(charset
, 'replace')
116 dataStr
= htmlentitydecode(dataStr
)
120 def download_file(url
: str, referer
: Optional
[str] = None) -> str:
121 """ Download file at url and write it to a file, return the path to the file and the url """
124 request
= urllib
.request
.Request(url
, headers
=headers
)
125 if referer
is not None:
126 request
.add_header('referer', referer
)
127 response
= urllib
.request
.urlopen(request
)
128 data
= response
.read()
130 # Check if it is gzipped
131 if data
[:2] == b
'\x1f\x8b':
132 # Data is gzip encoded, decode it
133 with io
.BytesIO(data
) as compressedStream
, gzip
.GzipFile(fileobj
=compressedStream
) as gzipper
:
134 data
= gzipper
.read()
137 fileHandle
, path
= tempfile
.mkstemp()
138 with os
.fdopen(fileHandle
, "wb") as file:
142 return f
"{path} {url}"