Sync translations from Transifex and run lupdate
[qBittorrent.git] / src / searchengine / nova3 / helpers.py
blob2633c0eeaa4839f33966d06883ae0cf556486f11
1 #VERSION: 1.43
3 # Author:
4 # Christophe DUMEZ (chris@qbittorrent.org)
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
9 # * Redistributions of source code must retain the above copyright notice,
10 # this list of conditions and the following disclaimer.
11 # * Redistributions in binary form must reproduce the above copyright
12 # notice, this list of conditions and the following disclaimer in the
13 # documentation and/or other materials provided with the distribution.
14 # * Neither the name of the author nor the names of its contributors may be
15 # used to endorse or promote products derived from this software without
16 # specific prior written permission.
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 # POSSIBILITY OF SUCH DAMAGE.
30 import gzip
31 import html.entities
32 import io
33 import os
34 import re
35 import socket
36 import socks
37 import tempfile
38 import urllib.error
39 import urllib.parse
40 import urllib.request
42 # Some sites blocks default python User-agent
43 user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0'
44 headers = {'User-Agent': user_agent}
45 # SOCKS5 Proxy support
46 if "sock_proxy" in os.environ and len(os.environ["sock_proxy"].strip()) > 0:
47 proxy_str = os.environ["sock_proxy"].strip()
48 m = re.match(r"^(?:(?P<username>[^:]+):(?P<password>[^@]+)@)?(?P<host>[^:]+):(?P<port>\w+)$",
49 proxy_str)
50 if m is not None:
51 socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, m.group('host'),
52 int(m.group('port')), True, m.group('username'), m.group('password'))
53 socket.socket = socks.socksocket
56 def htmlentitydecode(s):
57 # First convert alpha entities (such as &eacute;)
58 # (Inspired from http://mail.python.org/pipermail/python-list/2007-June/443813.html)
59 def entity2char(m):
60 entity = m.group(1)
61 if entity in html.entities.name2codepoint:
62 return chr(html.entities.name2codepoint[entity])
63 return " " # Unknown entity: We replace with a space.
64 t = re.sub('&(%s);' % '|'.join(html.entities.name2codepoint), entity2char, s)
66 # Then convert numerical entities (such as &#233;)
67 t = re.sub(r'&#(\d+);', lambda x: chr(int(x.group(1))), t)
69 # Then convert hexa entities (such as &#x00E9;)
70 return re.sub(r'&#x(\w+);', lambda x: chr(int(x.group(1), 16)), t)
73 def retrieve_url(url):
74 """ Return the content of the url page as a string """
75 req = urllib.request.Request(url, headers=headers)
76 try:
77 response = urllib.request.urlopen(req)
78 except urllib.error.URLError as errno:
79 print(" ".join(("Connection error:", str(errno.reason))))
80 return ""
81 dat = response.read()
82 # Check if it is gzipped
83 if dat[:2] == b'\x1f\x8b':
84 # Data is gzip encoded, decode it
85 compressedstream = io.BytesIO(dat)
86 gzipper = gzip.GzipFile(fileobj=compressedstream)
87 extracted_data = gzipper.read()
88 dat = extracted_data
89 info = response.info()
90 charset = 'utf-8'
91 try:
92 ignore, charset = info['Content-Type'].split('charset=')
93 except Exception:
94 pass
95 dat = dat.decode(charset, 'replace')
96 dat = htmlentitydecode(dat)
97 # return dat.encode('utf-8', 'replace')
98 return dat
101 def download_file(url, referer=None):
102 """ Download file at url and write it to a file, return the path to the file and the url """
103 file, path = tempfile.mkstemp()
104 file = os.fdopen(file, "wb")
105 # Download url
106 req = urllib.request.Request(url, headers=headers)
107 if referer is not None:
108 req.add_header('referer', referer)
109 response = urllib.request.urlopen(req)
110 dat = response.read()
111 # Check if it is gzipped
112 if dat[:2] == b'\x1f\x8b':
113 # Data is gzip encoded, decode it
114 compressedstream = io.BytesIO(dat)
115 gzipper = gzip.GzipFile(fileobj=compressedstream)
116 extracted_data = gzipper.read()
117 dat = extracted_data
119 # Write it to a file
120 file.write(dat)
121 file.close()
122 # return file path
123 return (path + " " + url)