Make getCaptchaImage return (bytes, str).
[tor-bridgedb.git] / bridgedb / antibot.py
blobe724c68003b00f827a1336d1f39c9aab5b69d77f
1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_metrics ; -*-
2 # _____________________________________________________________________________
4 # This file is part of BridgeDB, a Tor bridge distribution system.
6 # :authors: please see included AUTHORS file
7 # :copyright: (c) 2019, The Tor Project, Inc.
8 # (c) 2019, Philipp Winter
9 # :license: see LICENSE for licensing information
10 # _____________________________________________________________________________
12 """Functions for dealing with bot requests."""
14 import re
15 import logging
17 # Maps transport types and IP version (e.g., "obfs4v4", "vanillav4", or
18 # "vanillav6") to bridge lines (e.g., "1.2.3.4:1234 ...".
19 DECOY_BRIDGES = {}
21 # Maps HTTP request headers (e.g., "Accept-Language") to regular expressions
22 # that suggest that the request was issued by a bot (e.g., "[Kk]lingon").
23 BLACKLISTED_REQUEST_HEADERS = {}
26 def _loadCSV(filename):
27 """Load and return the content of the given CSV file.
29 :param str filename: The filename to read.
30 :rtype: dict
31 :returns: A dictionary mapping keys (first column) to values (second
32 column).
33 """
35 csv = dict()
36 try:
37 with open(filename) as fh:
38 for line in fh.readlines():
39 if line.count(",") != 1:
40 logging.warning("Line must have exactly one comma: %s" %
41 line)
42 continue
43 key, value = line.split(",")
44 csv[key.strip()] = value.strip()
45 except IOError as err:
46 logging.warning("I/O error while reading from file %s: %s" %
47 (filename, err))
49 return csv
52 def loadBlacklistedRequestHeaders(filename):
53 """Load and globally set a dictionary of blacklisted request headers.
55 :param str filename: The filename to read.
56 """
58 content = _loadCSV(filename)
59 blacklisted = dict()
60 # Turn dictionary values into compiled regular expressions.
61 for header, regexp in content.items():
62 try:
63 blacklisted[header] = re.compile(regexp)
64 except Exception as err:
65 logging.warning("Skipping regexp %s because we couldn't compile "
66 "it: %s" % (regexp, err))
68 global BLACKLISTED_REQUEST_HEADERS
69 BLACKLISTED_REQUEST_HEADERS = blacklisted
72 def loadDecoyBridges(filename):
73 """Load and globally set a dictionary of decoy bridges.
75 :param str filename: The filename to read.
76 """
78 d = _loadCSV(filename)
79 # Turn our bridge lines (which are strings) into lists.
80 decoyBridges = {ttype: [line] for ttype, line in d.items()}
82 global DECOY_BRIDGES
83 DECOY_BRIDGES = decoyBridges
86 def getDecoyBridge(transport, ipVersion):
87 """Return a decoy bridge or, if none is available, None.
89 :param str transport: The desired transport, e.g., "vanilla" or "obfs4".
90 :param int ipVersion: The IP version, which must be either 4 or 6.
91 :rtype: list
92 :returns: Return a list of bridge lines or, if we don't have any, None.
93 """
95 if ipVersion not in [4, 6]:
96 return None
98 logging.info("Returning IPv%d decoy bridge for transport %s." %
99 (ipVersion, transport))
100 return DECOY_BRIDGES.get("%sv%d" % (transport, ipVersion), None)
103 def isRequestFromBot(request):
104 """Determine if the given request is coming from a bot.
106 :type request: :api:`twisted.web.http.Request`
107 :param request: A ``Request`` object, including POST arguments which
108 should include two key/value pairs.
109 :rtype: bool
110 :returns: True if the request is coming from a bot and False otherwise.
113 for header, badRegexp in BLACKLISTED_REQUEST_HEADERS.items():
114 value = request.getHeader(header)
115 if value is None:
116 continue
118 if badRegexp.search(value) is not None:
119 logging.info("Found bot request. Headers: %s" %
120 request.requestHeaders)
121 return True
123 return False