1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_metrics ; -*-
2 # _____________________________________________________________________________
4 # This file is part of BridgeDB, a Tor bridge distribution system.
6 # :authors: please see included AUTHORS file
7 # :copyright: (c) 2019, The Tor Project, Inc.
8 # (c) 2019, Philipp Winter
9 # :license: see LICENSE for licensing information
10 # _____________________________________________________________________________
12 """Functions for dealing with bot requests."""
17 # Maps transport types and IP version (e.g., "obfs4v4", "vanillav4", or
18 # "vanillav6") to bridge lines (e.g., "1.2.3.4:1234 ...".
21 # Maps HTTP request headers (e.g., "Accept-Language") to regular expressions
22 # that suggest that the request was issued by a bot (e.g., "[Kk]lingon").
23 BLACKLISTED_REQUEST_HEADERS
= {}
26 def _loadCSV(filename
):
27 """Load and return the content of the given CSV file.
29 :param str filename: The filename to read.
31 :returns: A dictionary mapping keys (first column) to values (second
37 with
open(filename
) as fh
:
38 for line
in fh
.readlines():
39 if line
.count(",") != 1:
40 logging
.warning("Line must have exactly one comma: %s" %
43 key
, value
= line
.split(",")
44 csv
[key
.strip()] = value
.strip()
45 except IOError as err
:
46 logging
.warning("I/O error while reading from file %s: %s" %
52 def loadBlacklistedRequestHeaders(filename
):
53 """Load and globally set a dictionary of blacklisted request headers.
55 :param str filename: The filename to read.
58 content
= _loadCSV(filename
)
60 # Turn dictionary values into compiled regular expressions.
61 for header
, regexp
in content
.items():
63 blacklisted
[header
] = re
.compile(regexp
)
64 except Exception as err
:
65 logging
.warning("Skipping regexp %s because we couldn't compile "
66 "it: %s" % (regexp
, err
))
68 global BLACKLISTED_REQUEST_HEADERS
69 BLACKLISTED_REQUEST_HEADERS
= blacklisted
72 def loadDecoyBridges(filename
):
73 """Load and globally set a dictionary of decoy bridges.
75 :param str filename: The filename to read.
78 d
= _loadCSV(filename
)
79 # Turn our bridge lines (which are strings) into lists.
80 decoyBridges
= {ttype
: [line
] for ttype
, line
in d
.items()}
83 DECOY_BRIDGES
= decoyBridges
86 def getDecoyBridge(transport
, ipVersion
):
87 """Return a decoy bridge or, if none is available, None.
89 :param str transport: The desired transport, e.g., "vanilla" or "obfs4".
90 :param int ipVersion: The IP version, which must be either 4 or 6.
92 :returns: Return a list of bridge lines or, if we don't have any, None.
95 if ipVersion
not in [4, 6]:
98 logging
.info("Returning IPv%d decoy bridge for transport %s." %
99 (ipVersion
, transport
))
100 return DECOY_BRIDGES
.get("%sv%d" % (transport
, ipVersion
), None)
103 def isRequestFromBot(request
):
104 """Determine if the given request is coming from a bot.
106 :type request: :api:`twisted.web.http.Request`
107 :param request: A ``Request`` object, including POST arguments which
108 should include two key/value pairs.
110 :returns: True if the request is coming from a bot and False otherwise.
113 for header
, badRegexp
in BLACKLISTED_REQUEST_HEADERS
.items():
114 value
= request
.getHeader(header
)
118 if badRegexp
.search(value
) is not None:
119 logging
.info("Found bot request. Headers: %s" %
120 request
.requestHeaders
)