Correctly handle "torrent finished" events
[qBittorrent.git] / src / searchengine / nova3 / helpers.py
blobef8376a28d536c860a45ef8fc4115be1b62db9b2
1 #VERSION: 1.49
3 # Author:
4 # Christophe DUMEZ (chris@qbittorrent.org)
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
9 # * Redistributions of source code must retain the above copyright notice,
10 # this list of conditions and the following disclaimer.
11 # * Redistributions in binary form must reproduce the above copyright
12 # notice, this list of conditions and the following disclaimer in the
13 # documentation and/or other materials provided with the distribution.
14 # * Neither the name of the author nor the names of its contributors may be
15 # used to endorse or promote products derived from this software without
16 # specific prior written permission.
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 # POSSIBILITY OF SUCH DAMAGE.
30 import datetime
31 import gzip
32 import html.entities
33 import io
34 import os
35 import re
36 import socket
37 import socks
38 import sys
39 import tempfile
40 import urllib.error
41 import urllib.request
42 from collections.abc import Mapping
43 from typing import Any, Optional
46 def getBrowserUserAgent() -> str:
47 """ Disguise as browser to circumvent website blocking """
49 # Firefox release calendar
50 # https://whattrainisitnow.com/calendar/
51 # https://wiki.mozilla.org/index.php?title=Release_Management/Calendar&redirect=no
53 baseDate = datetime.date(2024, 4, 16)
54 baseVersion = 125
56 nowDate = datetime.date.today()
57 nowVersion = baseVersion + ((nowDate - baseDate).days // 30)
59 return f"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:{nowVersion}.0) Gecko/20100101 Firefox/{nowVersion}.0"
62 headers: dict[str, Any] = {'User-Agent': getBrowserUserAgent()}
64 # SOCKS5 Proxy support
65 if "sock_proxy" in os.environ and len(os.environ["sock_proxy"].strip()) > 0:
66 proxy_str = os.environ["sock_proxy"].strip()
67 m = re.match(r"^(?:(?P<username>[^:]+):(?P<password>[^@]+)@)?(?P<host>[^:]+):(?P<port>\w+)$",
68 proxy_str)
69 if m is not None:
70 socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, m.group('host'),
71 int(m.group('port')), True, m.group('username'), m.group('password'))
72 socket.socket = socks.socksocket # type: ignore[misc]
75 def htmlentitydecode(s: str) -> str:
76 # First convert alpha entities (such as &eacute;)
77 # (Inspired from http://mail.python.org/pipermail/python-list/2007-June/443813.html)
78 def entity2char(m: re.Match[str]) -> str:
79 entity = m.group(1)
80 if entity in html.entities.name2codepoint:
81 return chr(html.entities.name2codepoint[entity])
82 return " " # Unknown entity: We replace with a space.
83 t = re.sub('&(%s);' % '|'.join(html.entities.name2codepoint), entity2char, s)
85 # Then convert numerical entities (such as &#233;)
86 t = re.sub(r'&#(\d+);', lambda x: chr(int(x.group(1))), t)
88 # Then convert hexa entities (such as &#x00E9;)
89 return re.sub(r'&#x(\w+);', lambda x: chr(int(x.group(1), 16)), t)
92 def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: Optional[Any] = None) -> str:
93 """ Return the content of the url page as a string """
95 request = urllib.request.Request(url, request_data, {**headers, **custom_headers})
96 try:
97 response = urllib.request.urlopen(request)
98 except urllib.error.URLError as errno:
99 print(f"Connection error: {errno.reason}", file=sys.stderr)
100 return ""
101 data: bytes = response.read()
103 # Check if it is gzipped
104 if data[:2] == b'\x1f\x8b':
105 # Data is gzip encoded, decode it
106 with io.BytesIO(data) as compressedStream, gzip.GzipFile(fileobj=compressedStream) as gzipper:
107 data = gzipper.read()
109 charset = 'utf-8'
110 try:
111 charset = response.getheader('Content-Type', '').split('charset=', 1)[1]
112 except IndexError:
113 pass
115 dataStr = data.decode(charset, 'replace')
116 dataStr = htmlentitydecode(dataStr)
117 return dataStr
120 def download_file(url: str, referer: Optional[str] = None) -> str:
121 """ Download file at url and write it to a file, return the path to the file and the url """
123 # Download url
124 request = urllib.request.Request(url, headers=headers)
125 if referer is not None:
126 request.add_header('referer', referer)
127 response = urllib.request.urlopen(request)
128 data = response.read()
130 # Check if it is gzipped
131 if data[:2] == b'\x1f\x8b':
132 # Data is gzip encoded, decode it
133 with io.BytesIO(data) as compressedStream, gzip.GzipFile(fileobj=compressedStream) as gzipper:
134 data = gzipper.read()
136 # Write it to a file
137 fileHandle, path = tempfile.mkstemp()
138 with os.fdopen(fileHandle, "wb") as file:
139 file.write(data)
141 # return file path
142 return f"{path} {url}"