1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_metrics ; -*-
2 # _____________________________________________________________________________
4 # This file is part of BridgeDB, a Tor bridge distribution system.
6 # :authors: please see included AUTHORS file
7 # :copyright: (c) 2019, The Tor Project, Inc.
8 # (c) 2019, Philipp Winter
9 # :license: see LICENSE for licensing information
10 # _____________________________________________________________________________
12 """API for keeping track of BridgeDB metrics, e.g., the demand for bridges
22 from bridgedb
import geo
23 from bridgedb
.distributors
.common
.http
import getClientIP
24 from bridgedb
.distributors
.email
import request
25 from bridgedb
.distributors
.email
.distributor
import EmailRequestedHelp
27 from twisted
.mail
.smtp
import Address
29 # Our data structure to keep track of exit relays. The variable is of type
30 # bridgedb.proxy.ProxySet. We reserve a special country code (determined by
31 # PROXY_CC below) for exit relays and other proxies.
34 # Our custom country code for IP addresses that we couldn't map to a country.
35 # This can happen for private IP addresses or if our geo-location provider has
39 # Our custom country code for IP addresses that are proxies, e.g., Tor exit
40 # relays. The code "zz" is free for assignment for user needs as specified
41 # here: <https://en.wikipedia.org/w/index.php?title=ISO_3166-1_alpha-2&oldid=906611218#Decoding_table>
44 # We use BIN_SIZE to reduce the granularity of our counters. We round up
45 # numbers to the next multiple of BIN_SIZE, e.g., 28 is rounded up to:
49 # The prefix length that we use to keep track of the number of unique subnets
50 # we have seen HTTPS requests from.
51 SUBNET_CTR_PREFIX_LEN
= 20
53 # All of the pluggable transports BridgeDB currently supports.
54 SUPPORTED_TRANSPORTS
= None
56 # Version number for our metrics format. We increment the version if our
61 def setProxies(proxies
):
62 """Set the given proxies.
64 :type proxies: :class:`~bridgedb.proxy.ProxySet`
65 :param proxies: The container for the IP addresses of any currently
68 logging
.debug("Setting %d proxies." % len(proxies
))
73 def setSupportedTransports(supportedTransports
):
74 """Set the given supported transports.
76 :param dict supportedTransports: The transport types that BridgeDB
80 logging
.debug("Setting %d supported transports." %
81 len(supportedTransports
))
82 global SUPPORTED_TRANSPORTS
83 SUPPORTED_TRANSPORTS
= supportedTransports
86 def isBridgeTypeSupported(bridgeType
):
87 """Return `True' or `False' depending on if the given bridge type is
90 :param str bridgeType: The bridge type, e.g., "vanilla" or "obfs4".
93 if SUPPORTED_TRANSPORTS
is None:
94 logging
.error("Bug: Variable SUPPORTED_TRANSPORTS is None.")
97 # Note that "vanilla" isn't a transport protocol (in fact, it's the absence
98 # of a transport), which is why it isn't in SUPPORTED_TRANSPORTS.
99 return (bridgeType
in SUPPORTED_TRANSPORTS
) or (bridgeType
== "vanilla")
102 def export(fh
, measurementInterval
):
103 """Export metrics by writing them to the given file handle.
105 :param file fh: The file handle to which we're writing our metrics.
106 :param int measurementInterval: The number of seconds after which we rotate
107 and dump our metrics.
110 httpsMetrix
= HTTPSMetrics()
111 emailMetrix
= EmailMetrics()
112 moatMetrix
= MoatMetrics()
114 # Rotate our metrics.
119 numProxies
= len(PROXIES
) if PROXIES
is not None else 0
121 logging
.error("Metrics module doesn't have any proxies.")
123 logging
.debug("Metrics module knows about %d proxies." % numProxies
)
125 now
= datetime
.datetime
.utcnow()
126 fh
.write("bridgedb-metrics-end %s (%d s)\n" % (
127 now
.strftime("%Y-%m-%d %H:%M:%S"),
128 measurementInterval
))
129 fh
.write("bridgedb-metrics-version %d\n" % METRICS_VERSION
)
131 httpsLines
= httpsMetrix
.getMetrics()
132 for line
in httpsLines
:
133 fh
.write("bridgedb-metric-count %s\n" % line
)
135 moatLines
= moatMetrix
.getMetrics()
136 for line
in moatLines
:
137 fh
.write("bridgedb-metric-count %s\n" % line
)
139 emailLines
= emailMetrix
.getMetrics()
140 for line
in emailLines
:
141 fh
.write("bridgedb-metric-count %s\n" % line
)
144 def resolveCountryCode(ipAddr
):
145 """Return the country code of the given IP address.
147 :param str ipAddr: The IP address to resolve.
150 :returns: A two-letter country code.
154 logging
.warning("Given IP address was None. Using %s as country "
155 "code." % UNKNOWN_CC
)
159 logging
.warning("Proxies are not yet set.")
160 elif ipAddr
in PROXIES
:
163 countryCode
= geo
.getCountryCode(ipaddr
.IPAddress(ipAddr
))
165 # countryCode may be None if GeoIP is unable to map an IP address to a
167 return UNKNOWN_CC
if countryCode
is None else countryCode
170 class Singleton(type):
173 def __call__(cls
, *args
, **kwargs
):
174 if cls
not in cls
._instances
:
175 cls
._instances
[cls
] = super(Singleton
, cls
).__call
__(*args
,
177 return cls
._instances
[cls
]
180 """Drop the instance (necessary for unit tests)."""
182 del cls
._instances
[cls
]
187 class Metrics(metaclass
=Singleton
):
188 """Base class representing metrics.
190 This class provides functionality that our three distribution mechanisms
194 def __init__(self
, binSize
=BIN_SIZE
):
195 logging
.debug("Instantiating metrics class.")
196 self
.binSize
= binSize
198 # Metrics cover a 24 hour period. To that end, we're maintaining two
199 # data structures: our "hot" metrics are currently being populated
200 # while our "cold" metrics are finished, and valid for 24 hours. After
201 # that, our hot metrics turn into cold metrics, and we start over.
202 self
.hotMetrics
= dict()
203 self
.coldMetrics
= dict()
206 """Rotate our metrics."""
208 self
.coldMetrics
= self
.hotMetrics
209 self
.hotMetrics
= dict()
211 def findAnomaly(self
, request
):
214 # TODO: Inspect email for traces of bots, Sherlock Homes-style!
215 # See <https://bugs.torproject.org/9316#comment:19> for the rationale.
216 # All classes that inherit from Metrics() should implement this method.
220 def getMetrics(self
):
221 """Get our sanitized current metrics, one per line.
223 Metrics are of the form:
226 "moat.obfs4.us.success.none 10",
227 "https.vanilla.de.success.none 30",
232 :returns: A list of metric lines.
235 for key
, value
in self
.coldMetrics
.items():
236 # Round up our value to the nearest multiple of self.binSize to
237 # reduce the accuracy of our real values.
238 if (value
% self
.binSize
) > 0:
239 value
+= self
.binSize
- (value
% self
.binSize
)
240 lines
.append("%s %d" % (key
, value
))
243 def set(self
, key
, value
):
244 """Set the given key to the given value.
246 :param str key: The time series key.
247 :param int value: The time series value.
249 self
.hotMetrics
[key
] = value
252 """Increment the given key.
254 :param str key: The time series key.
256 if key
in self
.hotMetrics
:
257 self
.hotMetrics
[key
] += 1
261 def createKey(self
, distMechanism
, bridgeType
, countryOrProvider
,
263 """Create and return a time series key.
265 :param str distMechanism: A string representing our distribution
266 mechanism, e.g., "https".
267 :param str bridgeType: A string representing the requested bridge
268 type, e.g., "vanilla" or "obfs4".
269 :param str countryOrProvider: A string representing the client's
270 two-letter country code or email provider, e.g., "it" or
272 :param bool success: ``True`` if the request was successful and
273 BridgeDB handed out a bridge; ``False`` otherwise.
274 :param str anomaly: ``None`` if the request was not anomalous and hence
275 believed to have come from a real user; otherwise a string
276 representing the type of anomaly.
278 :returns: A key that uniquely identifies the given metrics
282 if isinstance(countryOrProvider
, bytes
):
283 countryOrProvider
= countryOrProvider
.decode('utf-8')
285 countryOrProvider
= countryOrProvider
.lower()
286 bridgeType
= bridgeType
.lower()
287 success
= "success" if success
else "fail"
289 key
= "%s.%s.%s.%s.%s" % (distMechanism
, bridgeType
,
290 countryOrProvider
, success
, anomaly
)
295 class HTTPSMetrics(Metrics
):
298 super(HTTPSMetrics
, self
).__init
__()
300 # Maps subnets (e.g., "1.2.0.0/16") to the number of times we've seen
301 # requests from the given subnet.
302 self
.subnetCounter
= dict()
303 self
.keyPrefix
= "https"
305 def getTopNSubnets(self
, n
=10):
307 sortedByNum
= sorted(self
.subnetCounter
.items(),
308 key
=operator
.itemgetter(1),
310 return sortedByNum
[:n
]
312 def _recordHTTPSRequest(self
, request
, success
):
314 logging
.debug("HTTPS request has user agent: %s" %
315 request
.requestHeaders
.getRawHeaders("User-Agent"))
317 # Pull the client's IP address out of the request and convert it to a
318 # two-letter country code.
319 ipAddr
= getClientIP(request
,
320 useForwardedHeader
=True,
322 self
.updateSubnetCounter(ipAddr
)
323 countryCode
= resolveCountryCode(ipAddr
)
325 transports
= request
.args
.get("transport", list())
326 if len(transports
) > 1:
327 logging
.warning("Expected a maximum of one transport but %d are "
328 "given." % len(transports
))
330 if len(transports
) == 0:
331 bridgeType
= "vanilla"
332 elif transports
[0] == "" or transports
[0] == "0":
333 bridgeType
= "vanilla"
335 bridgeType
= transports
[0]
337 # BridgeDB's HTTPS interface exposes transport types as a drop down
338 # menu but users can still request anything by manipulating HTTP
340 if not isBridgeTypeSupported(bridgeType
):
341 logging
.warning("User requested unsupported transport type %s "
342 "over HTTPS." % bridgeType
)
345 logging
.debug("Recording %svalid HTTPS request for %s from %s (%s)." %
346 ("" if success
else "in",
347 bridgeType
, ipAddr
, countryCode
))
349 # Now update our metrics.
350 key
= self
.createKey(self
.keyPrefix
, bridgeType
, countryCode
,
351 success
, self
.findAnomaly(request
))
354 def recordValidHTTPSRequest(self
, request
):
355 self
._recordHTTPSRequest
(request
, True)
357 def recordInvalidHTTPSRequest(self
, request
):
358 self
._recordHTTPSRequest
(request
, False)
360 def updateSubnetCounter(self
, ipAddr
):
365 nw
= ipaddr
.IPNetwork(ipAddr
+ "/" + str(SUBNET_CTR_PREFIX_LEN
),
367 subnet
= nw
.network
.compressed
368 logging
.debug("Updating subnet counter with %s" % subnet
)
370 num
= self
.subnetCounter
.get(subnet
, 0)
371 self
.subnetCounter
[subnet
] = num
+ 1
374 class EmailMetrics(Metrics
):
377 super(EmailMetrics
, self
).__init
__()
378 self
.keyPrefix
= "email"
380 def _recordEmailRequest(self
, smtpAutoresp
, success
):
382 emailAddrs
= smtpAutoresp
.getMailTo()
383 if len(emailAddrs
) == 0:
384 # This is just for unit tests.
385 emailAddr
= Address("foo@gmail.com")
387 emailAddr
= emailAddrs
[0]
389 # Get the requested transport protocol.
391 br
= request
.determineBridgeRequestOptions(
392 smtpAutoresp
.incoming
.lines
)
393 except EmailRequestedHelp
:
395 bridgeType
= "vanilla" if not len(br
.transports
) else br
.transports
[0]
397 # Over email, transports are requested by typing them. Typos happen
398 # and users can request anything, really.
399 if not isBridgeTypeSupported(bridgeType
):
400 logging
.warning("User requested unsupported transport type %s "
401 "over email." % bridgeType
)
404 logging
.debug("Recording %svalid email request for %s from %s." %
405 ("" if success
else "in", bridgeType
, emailAddr
))
406 sld
= emailAddr
.domain
.split(b
".")[0]
408 # Now update our metrics.
409 key
= self
.createKey(self
.keyPrefix
, bridgeType
, sld
, success
,
410 self
.findAnomaly(request
))
413 def recordValidEmailRequest(self
, smtpAutoresp
):
414 self
._recordEmailRequest
(smtpAutoresp
, True)
416 def recordInvalidEmailRequest(self
, smtpAutoresp
):
417 self
._recordEmailRequest
(smtpAutoresp
, False)
420 class MoatMetrics(Metrics
):
423 super(MoatMetrics
, self
).__init
__()
424 self
.keyPrefix
= "moat"
426 def _recordMoatRequest(self
, request
, success
):
428 logging
.debug("Moat request has user agent: %s" %
429 request
.requestHeaders
.getRawHeaders("User-Agent"))
431 ipAddr
= getClientIP(request
,
432 useForwardedHeader
=True,
434 countryCode
= resolveCountryCode(ipAddr
)
437 encodedClientData
= request
.content
.read()
438 clientData
= json
.loads(encodedClientData
)["data"][0]
439 transport
= clientData
["transport"]
440 bridgeType
= "vanilla" if not len(transport
) else transport
441 except Exception as err
:
442 logging
.warning("Could not decode request: %s" % err
)
445 if not isBridgeTypeSupported(bridgeType
):
446 logging
.warning("User requested unsupported transport type %s "
447 "over moat." % bridgeType
)
450 logging
.debug("Recording %svalid moat request for %s from %s (%s)." %
451 ("" if success
else "in",
452 bridgeType
, ipAddr
, countryCode
))
454 # Now update our metrics.
455 key
= self
.createKey(self
.keyPrefix
, bridgeType
,
456 countryCode
, success
, self
.findAnomaly(request
))
459 def recordValidMoatRequest(self
, request
):
460 self
._recordMoatRequest
(request
, True)
462 def recordInvalidMoatRequest(self
, request
):
463 self
._recordMoatRequest
(request
, False)