1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_metrics ; -*-
2 # _____________________________________________________________________________
4 # This file is part of BridgeDB, a Tor bridge distribution system.
6 # :authors: please see included AUTHORS file
7 # :copyright: (c) 2019, The Tor Project, Inc.
8 # (c) 2019, Philipp Winter
9 # :license: see LICENSE for licensing information
10 # _____________________________________________________________________________
12 """API for keeping track of BridgeDB metrics, e.g., the demand for bridges
22 from bridgedb
import geo
23 from bridgedb
.distributors
.common
.http
import getClientIP
24 from bridgedb
.distributors
.email
import request
26 from twisted
.mail
.smtp
import Address
28 # Our data structure to keep track of exit relays. The variable is of type
29 # bridgedb.proxy.ProxySet. We reserve a special country code (determined by
30 # PROXY_CC below) for exit relays and other proxies.
33 # Our custom country code for IP addresses that we couldn't map to a country.
34 # This can happen for private IP addresses or if our geo-location provider has
38 # Our custom country code for IP addresses that are proxies, e.g., Tor exit
39 # relays. The code "zz" is free for assignment for user needs as specified
40 # here: <https://en.wikipedia.org/w/index.php?title=ISO_3166-1_alpha-2&oldid=906611218#Decoding_table>
43 # We use BIN_SIZE to reduce the granularity of our counters. We round up
44 # numbers to the next multiple of BIN_SIZE, e.g., 28 is rounded up to:
48 # The prefix length that we use to keep track of the number of unique subnets
49 # we have seen HTTPS requests from.
50 SUBNET_CTR_PREFIX_LEN
= 20
52 # All of the pluggable transports BridgeDB currently supports.
53 SUPPORTED_TRANSPORTS
= None
55 # Version number for our metrics format. We increment the version if our
60 def setProxies(proxies
):
61 """Set the given proxies.
63 :type proxies: :class:`~bridgedb.proxy.ProxySet`
64 :param proxies: The container for the IP addresses of any currently
67 logging
.debug("Setting %d proxies." % len(proxies
))
72 def setSupportedTransports(supportedTransports
):
73 """Set the given supported transports.
75 :param dict supportedTransports: The transport types that BridgeDB
79 logging
.debug("Setting %d supported transports." %
80 len(supportedTransports
))
81 global SUPPORTED_TRANSPORTS
82 SUPPORTED_TRANSPORTS
= supportedTransports
85 def isBridgeTypeSupported(bridgeType
):
86 """Return `True' or `False' depending on if the given bridge type is
89 :param str bridgeType: The bridge type, e.g., "vanilla" or "obfs4".
92 if SUPPORTED_TRANSPORTS
is None:
93 logging
.error("Bug: Variable SUPPORTED_TRANSPORTS is None.")
96 # Note that "vanilla" isn't a transport protocol (in fact, it's the absence
97 # of a transport), which is why it isn't in SUPPORTED_TRANSPORTS.
98 return (bridgeType
in SUPPORTED_TRANSPORTS
) or (bridgeType
== "vanilla")
101 def export(fh
, measurementInterval
):
102 """Export metrics by writing them to the given file handle.
104 :param file fh: The file handle to which we're writing our metrics.
105 :param int measurementInterval: The number of seconds after which we rotate
106 and dump our metrics.
109 httpsMetrix
= HTTPSMetrics()
110 emailMetrix
= EmailMetrics()
111 moatMetrix
= MoatMetrics()
113 # Rotate our metrics.
118 numProxies
= len(PROXIES
) if PROXIES
is not None else 0
120 logging
.error("Metrics module doesn't have any proxies.")
122 logging
.debug("Metrics module knows about %d proxies." % numProxies
)
124 now
= datetime
.datetime
.utcnow()
125 fh
.write("bridgedb-metrics-end %s (%d s)\n" % (
126 now
.strftime("%Y-%m-%d %H:%M:%S"),
127 measurementInterval
))
128 fh
.write("bridgedb-metrics-version %d\n" % METRICS_VERSION
)
130 httpsLines
= httpsMetrix
.getMetrics()
131 for line
in httpsLines
:
132 fh
.write("bridgedb-metric-count %s\n" % line
)
134 moatLines
= moatMetrix
.getMetrics()
135 for line
in moatLines
:
136 fh
.write("bridgedb-metric-count %s\n" % line
)
138 emailLines
= emailMetrix
.getMetrics()
139 for line
in emailLines
:
140 fh
.write("bridgedb-metric-count %s\n" % line
)
143 def resolveCountryCode(ipAddr
):
144 """Return the country code of the given IP address.
146 :param str ipAddr: The IP address to resolve.
149 :returns: A two-letter country code.
153 logging
.warning("Given IP address was None. Using %s as country "
154 "code." % UNKNOWN_CC
)
158 logging
.warning("Proxies are not yet set.")
159 elif ipAddr
in PROXIES
:
162 countryCode
= geo
.getCountryCode(ipaddr
.IPAddress(ipAddr
))
164 # countryCode may be None if GeoIP is unable to map an IP address to a
166 return UNKNOWN_CC
if countryCode
is None else countryCode
169 class Singleton(type):
172 def __call__(cls
, *args
, **kwargs
):
173 if cls
not in cls
._instances
:
174 cls
._instances
[cls
] = super(Singleton
, cls
).__call
__(*args
,
176 return cls
._instances
[cls
]
179 """Drop the instance (necessary for unit tests)."""
181 del cls
._instances
[cls
]
186 class Metrics(metaclass
=Singleton
):
187 """Base class representing metrics.
189 This class provides functionality that our three distribution mechanisms
193 def __init__(self
, binSize
=BIN_SIZE
):
194 logging
.debug("Instantiating metrics class.")
195 self
.binSize
= binSize
197 # Metrics cover a 24 hour period. To that end, we're maintaining two
198 # data structures: our "hot" metrics are currently being populated
199 # while our "cold" metrics are finished, and valid for 24 hours. After
200 # that, our hot metrics turn into cold metrics, and we start over.
201 self
.hotMetrics
= dict()
202 self
.coldMetrics
= dict()
205 """Rotate our metrics."""
207 self
.coldMetrics
= self
.hotMetrics
208 self
.hotMetrics
= dict()
210 def findAnomaly(self
, request
):
213 # TODO: Inspect email for traces of bots, Sherlock Homes-style!
214 # See <https://bugs.torproject.org/9316#comment:19> for the rationale.
215 # All classes that inherit from Metrics() should implement this method.
219 def getMetrics(self
):
220 """Get our sanitized current metrics, one per line.
222 Metrics are of the form:
225 "moat.obfs4.us.success.none 10",
226 "https.vanilla.de.success.none 30",
231 :returns: A list of metric lines.
234 for key
, value
in self
.coldMetrics
.items():
235 # Round up our value to the nearest multiple of self.binSize to
236 # reduce the accuracy of our real values.
237 if (value
% self
.binSize
) > 0:
238 value
+= self
.binSize
- (value
% self
.binSize
)
239 lines
.append("%s %d" % (key
, value
))
242 def set(self
, key
, value
):
243 """Set the given key to the given value.
245 :param str key: The time series key.
246 :param int value: The time series value.
248 self
.hotMetrics
[key
] = value
251 """Increment the given key.
253 :param str key: The time series key.
255 if key
in self
.hotMetrics
:
256 self
.hotMetrics
[key
] += 1
260 def createKey(self
, distMechanism
, bridgeType
, countryOrProvider
,
262 """Create and return a time series key.
264 :param str distMechanism: A string representing our distribution
265 mechanism, e.g., "https".
266 :param str bridgeType: A string representing the requested bridge
267 type, e.g., "vanilla" or "obfs4".
268 :param str countryOrProvider: A string representing the client's
269 two-letter country code or email provider, e.g., "it" or
271 :param bool success: ``True`` if the request was successful and
272 BridgeDB handed out a bridge; ``False`` otherwise.
273 :param str anomaly: ``None`` if the request was not anomalous and hence
274 believed to have come from a real user; otherwise a string
275 representing the type of anomaly.
277 :returns: A key that uniquely identifies the given metrics
281 if isinstance(countryOrProvider
, bytes
):
282 countryOrProvider
= countryOrProvider
.decode('utf-8')
284 countryOrProvider
= countryOrProvider
.lower()
285 bridgeType
= bridgeType
.lower()
286 success
= "success" if success
else "fail"
288 key
= "%s.%s.%s.%s.%s" % (distMechanism
, bridgeType
,
289 countryOrProvider
, success
, anomaly
)
294 class HTTPSMetrics(Metrics
):
297 super(HTTPSMetrics
, self
).__init
__()
299 # Maps subnets (e.g., "1.2.0.0/16") to the number of times we've seen
300 # requests from the given subnet.
301 self
.subnetCounter
= dict()
302 self
.keyPrefix
= "https"
304 def getTopNSubnets(self
, n
=10):
306 sortedByNum
= sorted(self
.subnetCounter
.items(),
307 key
=operator
.itemgetter(1),
309 return sortedByNum
[:n
]
311 def _recordHTTPSRequest(self
, request
, success
):
313 logging
.debug("HTTPS request has user agent: %s" %
314 request
.requestHeaders
.getRawHeaders("User-Agent"))
316 # Pull the client's IP address out of the request and convert it to a
317 # two-letter country code.
318 ipAddr
= getClientIP(request
,
319 useForwardedHeader
=True,
321 self
.updateSubnetCounter(ipAddr
)
322 countryCode
= resolveCountryCode(ipAddr
)
324 transports
= request
.args
.get("transport", list())
325 if len(transports
) > 1:
326 logging
.warning("Expected a maximum of one transport but %d are "
327 "given." % len(transports
))
329 if len(transports
) == 0:
330 bridgeType
= "vanilla"
331 elif transports
[0] == "" or transports
[0] == "0":
332 bridgeType
= "vanilla"
334 bridgeType
= transports
[0]
336 # BridgeDB's HTTPS interface exposes transport types as a drop down
337 # menu but users can still request anything by manipulating HTTP
339 if not isBridgeTypeSupported(bridgeType
):
340 logging
.warning("User requested unsupported transport type %s "
341 "over HTTPS." % bridgeType
)
344 logging
.debug("Recording %svalid HTTPS request for %s from %s (%s)." %
345 ("" if success
else "in",
346 bridgeType
, ipAddr
, countryCode
))
348 # Now update our metrics.
349 key
= self
.createKey(self
.keyPrefix
, bridgeType
, countryCode
,
350 success
, self
.findAnomaly(request
))
353 def recordValidHTTPSRequest(self
, request
):
354 self
._recordHTTPSRequest
(request
, True)
356 def recordInvalidHTTPSRequest(self
, request
):
357 self
._recordHTTPSRequest
(request
, False)
359 def updateSubnetCounter(self
, ipAddr
):
364 nw
= ipaddr
.IPNetwork(ipAddr
+ "/" + str(SUBNET_CTR_PREFIX_LEN
),
366 subnet
= nw
.network
.compressed
367 logging
.debug("Updating subnet counter with %s" % subnet
)
369 num
= self
.subnetCounter
.get(subnet
, 0)
370 self
.subnetCounter
[subnet
] = num
+ 1
373 class EmailMetrics(Metrics
):
376 super(EmailMetrics
, self
).__init
__()
377 self
.keyPrefix
= "email"
379 def _recordEmailRequest(self
, smtpAutoresp
, success
):
381 emailAddrs
= smtpAutoresp
.getMailTo()
382 if len(emailAddrs
) == 0:
383 # This is just for unit tests.
384 emailAddr
= Address("foo@gmail.com")
386 emailAddr
= emailAddrs
[0]
388 # Get the requested transport protocol.
389 br
= request
.determineBridgeRequestOptions( smtpAutoresp
.incoming
.lines
)
390 bridgeType
= "vanilla" if not len(br
.transports
) else br
.transports
[0]
392 # Over email, transports are requested by typing them. Typos happen
393 # and users can request anything, really.
394 if not isBridgeTypeSupported(bridgeType
):
395 logging
.warning("User requested unsupported transport type %s "
396 "over email." % bridgeType
)
399 logging
.debug("Recording %svalid email request for %s from %s." %
400 ("" if success
else "in", bridgeType
, emailAddr
))
401 sld
= emailAddr
.domain
.split(b
".")[0]
403 # Now update our metrics.
404 key
= self
.createKey(self
.keyPrefix
, bridgeType
, sld
, success
,
405 self
.findAnomaly(request
))
408 def recordValidEmailRequest(self
, smtpAutoresp
):
409 self
._recordEmailRequest
(smtpAutoresp
, True)
411 def recordInvalidEmailRequest(self
, smtpAutoresp
):
412 self
._recordEmailRequest
(smtpAutoresp
, False)
415 class MoatMetrics(Metrics
):
418 super(MoatMetrics
, self
).__init
__()
419 self
.keyPrefix
= "moat"
421 def _recordMoatRequest(self
, request
, success
):
423 logging
.debug("Moat request has user agent: %s" %
424 request
.requestHeaders
.getRawHeaders("User-Agent"))
426 ipAddr
= getClientIP(request
,
427 useForwardedHeader
=True,
429 countryCode
= resolveCountryCode(ipAddr
)
432 encodedClientData
= request
.content
.read()
433 clientData
= json
.loads(encodedClientData
)["data"][0]
434 transport
= clientData
["transport"]
435 bridgeType
= "vanilla" if not len(transport
) else transport
436 except Exception as err
:
437 logging
.warning("Could not decode request: %s" % err
)
440 if not isBridgeTypeSupported(bridgeType
):
441 logging
.warning("User requested unsupported transport type %s "
442 "over moat." % bridgeType
)
445 logging
.debug("Recording %svalid moat request for %s from %s (%s)." %
446 ("" if success
else "in",
447 bridgeType
, ipAddr
, countryCode
))
449 # Now update our metrics.
450 key
= self
.createKey(self
.keyPrefix
, bridgeType
,
451 countryCode
, success
, self
.findAnomaly(request
))
454 def recordValidMoatRequest(self
, request
):
455 self
._recordMoatRequest
(request
, True)
457 def recordInvalidMoatRequest(self
, request
):
458 self
._recordMoatRequest
(request
, False)