bridgedb/Stability.py

   1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_Stability -*-
   2 #
   3 # This file is part of BridgeDB, a Tor bridge distribution system.
   4 #
   5 # :authors: please see the AUTHORS file for attributions
   6 # :copyright: (c) 2013-2017, Isis Lovecruft
   7 #             (c) 2013-2017, Matthew Finkel
   8 #             (c) 2012-2017, Aaron Gibson
   9 #             (c) 2007-2017, Nick Mathewson
  10 #             (c) 2007-2017, The Tor Project, Inc.
  11 # :license: see LICENSE for licensing information
  12
  13 """This module provides functionality for tracking bridge stability metrics.
  14
  15 Bridge stability metrics are calculated using the model introduced in
  16 `"An Analysis of Tor Bridge Stability"`_ and
  17 `implemented in the Tor Metrics library`_.
  18
  19 .. An Analysis of Tor Bridge Stability:
  20     https://metrics.torproject.org/papers/bridge-stability-2011-10-31.pdf
  21     Karsten Loesing, An Analysis of Tor Bridge Stability. Technical Report.
  22     The Tor Project, October 2011.
  23
  24 .. implemented in the Tor Metrics library:
  25     https://gitweb.torproject.org/metrics-tasks/task-4255/SimulateBridgeStability.java
  26 """
  27
  28 import logging
  29 import bridgedb.Storage
  30
  31 from bridgedb.schedule import toUnixSeconds
  32
  33
  34 # tunables
  35 weighting_factor = float(19)/float(20)
  36 discountIntervalMillis = 60*60*12*1000
  37
  38
  39 class BridgeHistory(object):
  40     """ Record Class that tracks a single Bridge
  41     The fields stored are:
  42
  43     fingerprint, ip, port, weightedUptime, weightedTime, weightedRunLength,
  44     totalRunWeights, lastSeenWithDifferentAddressAndPort,
  45     lastSeenWithThisAddressAndPort, lastDiscountedHistoryValues.
  46
  47     fingerprint         The Bridge Fingerprint (unicode)
  48     ip                  The Bridge IP (unicode)
  49     port                The Bridge orport (integer)
  50
  51     weightedUptime      Weighted uptime in seconds (long int)
  52     weightedTime        Weighted time in seconds (long int)
  53     weightedRunLength   Weighted run length of previous addresses or ports in
  54                         seconds. (long int)
  55     totalRunWeights     Total run weights of previously used addresses or
  56                         ports. (float)
  57
  58     lastSeenWithDifferentAddressAndPort
  59         Timestamp in milliseconds when this
  60         bridge was last seen with a different address or port. (long int)
  61
  62     lastSeenWithThisAddressAndPort
  63         Timestamp in milliseconds when this bridge was last seen
  64         with this address and port. (long int)
  65
  66     lastDiscountedHistoryValues:
  67         Timestamp in milliseconds when this bridge was last discounted. (long int)
  68
  69     lastUpdatedWeightedTime:
  70         Timestamp in milliseconds when the weighted time was updated. (long int)
  71     """
  72     def __init__(self, fingerprint, ip, port,
  73             weightedUptime, weightedTime, weightedRunLength, totalRunWeights,
  74             lastSeenWithDifferentAddressAndPort, lastSeenWithThisAddressAndPort,
  75             lastDiscountedHistoryValues, lastUpdatedWeightedTime):
  76         self.fingerprint = fingerprint
  77         self.ip = ip
  78         self.port = port
  79         self.weightedUptime = int(weightedUptime)
  80         self.weightedTime = int(weightedTime)
  81         self.weightedRunLength = int(weightedRunLength)
  82         self.totalRunWeights = float(totalRunWeights)
  83         self.lastSeenWithDifferentAddressAndPort = \
  84                 int(lastSeenWithDifferentAddressAndPort)
  85         self.lastSeenWithThisAddressAndPort = int(lastSeenWithThisAddressAndPort)
  86         self.lastDiscountedHistoryValues = int(lastDiscountedHistoryValues)
  87         self.lastUpdatedWeightedTime = int(lastUpdatedWeightedTime)
  88
  89     def discountWeightedFractionalUptimeAndWeightedTime(self, discountUntilMillis):
  90         """ discount weighted times """
  91         if self.lastDiscountedHistoryValues == 0:
  92             self.lastDiscountedHistoryValues = discountUntilMillis
  93         rounds = self.numDiscountRounds(discountUntilMillis)
  94         if rounds > 0:
  95             discount = lambda x: (weighting_factor**rounds)*x
  96             self.weightedUptime = discount(self.weightedUptime)
  97             self.weightedTime = discount(self.weightedTime)
  98             self.weightedRunLength = discount(self.weightedRunLength)
  99             self.totalRunWeights = discount(self.totalRunWeights)
 100
 101             self.lastDiscountedHistoryValues += discountIntervalMillis * rounds
 102         return rounds
 103
 104     def numDiscountRounds(self, discountUntilMillis):
 105         """ return the number of rounds of discounting needed to bring this
 106         history element current """
 107         result = discountUntilMillis - self.lastDiscountedHistoryValues
 108         result = int(result/discountIntervalMillis)
 109         return max(result,0)
 110
 111     @property
 112     def weightedFractionalUptime(self):
 113         """Weighted Fractional Uptime"""
 114         if self.weightedTime <0.0001: return 0
 115         return 10000 * self.weightedUptime / self.weightedTime
 116
 117     @property
 118     def tosa(self):
 119         """the Time On Same Address (TOSA)"""
 120         return ( self.lastSeenWithThisAddressAndPort - \
 121                     self.lastSeenWithDifferentAddressAndPort ) / 1000
 122
 123     @property
 124     def familiar(self):
 125         """
 126         A bridge is 'familiar' if 1/8 of all active bridges have appeared
 127         more recently than it, or if it has been around for a Weighted Time of 8 days.
 128         """
 129         # if this bridge has been around longer than 8 days
 130         if self.weightedTime >= 8 * 24 * 60 * 60:
 131             return True
 132
 133         # return True if self.weightedTime is greater than the weightedTime
 134         # of the > bottom 1/8 all bridges, sorted by weightedTime
 135         with bridgedb.Storage.getDB() as db:
 136             allWeightedTimes = [ bh.weightedTime for bh in db.getAllBridgeHistory()]
 137             numBridges = len(allWeightedTimes)
 138             logging.debug("Got %d weightedTimes", numBridges)
 139             allWeightedTimes.sort()
 140             if self.weightedTime >= allWeightedTimes[numBridges/8]:
 141                 return True
 142             return False
 143
 144     @property
 145     def wmtbac(self):
 146         """Weighted Mean Time Between Address Change"""
 147         totalRunLength = self.weightedRunLength + \
 148                 ((self.lastSeenWithThisAddressAndPort -
 149                 self.lastSeenWithDifferentAddressAndPort) / 1000)
 150
 151         totalWeights = self.totalRunWeights + 1.0
 152         if totalWeights <  0.0001: return 0
 153         assert(isinstance(long,totalRunLength))
 154         assert(isinstance(long,totalWeights))
 155         return totalRunlength / totalWeights
 156
 157 def addOrUpdateBridgeHistory(bridge, timestamp):
 158     with bridgedb.Storage.getDB() as db:
 159         bhe = db.getBridgeHistory(bridge.fingerprint)
 160         if not bhe:
 161             # This is the first status, assume 60 minutes.
 162             secondsSinceLastStatusPublication = 60*60
 163             lastSeenWithDifferentAddressAndPort = timestamp * 1000
 164             lastSeenWithThisAddressAndPort = timestamp * 1000
 165
 166             bhe = BridgeHistory(
 167                     bridge.fingerprint, bridge.address, bridge.orPort,
 168                     0,#weightedUptime
 169                     0,#weightedTime
 170                     0,#weightedRunLength
 171                     0,# totalRunWeights
 172                     lastSeenWithDifferentAddressAndPort, # first timestamnp
 173                     lastSeenWithThisAddressAndPort,
 174                     0,#lastDiscountedHistoryValues,
 175                     0,#lastUpdatedWeightedTime
 176                     )
 177             # first time we have seen this descriptor
 178             db.updateIntoBridgeHistory(bhe)
 179         # Calculate the seconds since the last parsed status.  If this is
 180         # the first status or we haven't seen a status for more than 60
 181         # minutes, assume 60 minutes.
 182         statusPublicationMillis = timestamp * 1000
 183         if (statusPublicationMillis - bhe.lastSeenWithThisAddressAndPort) > 60*60*1000:
 184             secondsSinceLastStatusPublication = 60*60
 185             logging.debug("Capping secondsSinceLastStatusPublication to 1 hour")
 186         # otherwise, roll with it
 187         else:
 188             secondsSinceLastStatusPublication = \
 189                     (statusPublicationMillis - bhe.lastSeenWithThisAddressAndPort)/1000
 190         if secondsSinceLastStatusPublication <= 0 and bhe.weightedTime > 0:
 191             # old descriptor, bail
 192             logging.warn("Received old descriptor for bridge %s with timestamp %d",
 193                     bhe.fingerprint, statusPublicationMillis/1000)
 194             return bhe
 195
 196         # iterate over all known bridges and apply weighting factor
 197         discountAndPruneBridgeHistories(statusPublicationMillis)
 198
 199         # Update the weighted times of bridges
 200         updateWeightedTime(statusPublicationMillis)
 201
 202         # For Running Bridges only:
 203         # compare the stored history against the descriptor and see if the
 204         # bridge has changed its address or port
 205         bhe = db.getBridgeHistory(bridge.fingerprint)
 206
 207         if not bridge.running:
 208             logging.info("%s is not running" % bridge.fingerprint)
 209             return bhe
 210
 211         # Parse the descriptor and see if the address or port changed
 212         # If so, store the weighted run time
 213         if bridge.orport != bhe.port or bridge.ip != bhe.ip:
 214             bhe.totalRunWeights += 1.0;
 215             bhe.weightedRunLength += bhe.tosa
 216             bhe.lastSeenWithDifferentAddressAndPort =\
 217                     bhe.lastSeenWithThisAddressAndPort
 218
 219         # Regardless of whether the bridge is new, kept or changed
 220         # its address and port, raise its WFU times and note its
 221         # current address and port, and that we saw it using them.
 222         bhe.weightedUptime += secondsSinceLastStatusPublication
 223         bhe.lastSeenWithThisAddressAndPort = statusPublicationMillis
 224         bhe.ip = str(bridge.ip)
 225         bhe.port = bridge.orport
 226         return db.updateIntoBridgeHistory(bhe)
 227
 228 def discountAndPruneBridgeHistories(discountUntilMillis):
 229     with bridgedb.Storage.getDB() as db:
 230         bhToRemove = []
 231         bhToUpdate = []
 232
 233         for bh in db.getAllBridgeHistory():
 234             # discount previous values by factor of 0.95 every 12 hours
 235             bh.discountWeightedFractionalUptimeAndWeightedTime(discountUntilMillis)
 236             # give the thing at least 24 hours before pruning it
 237             if bh.weightedFractionalUptime < 1 and bh.weightedTime > 60*60*24:
 238                 logging.debug("Removing bridge from history: %s" % bh.fingerprint)
 239                 bhToRemove.append(bh.fingerprint)
 240             else:
 241                 bhToUpdate.append(bh)
 242
 243         for k in bhToUpdate: db.updateIntoBridgeHistory(k)
 244         for k in bhToRemove: db.delBridgeHistory(k)
 245
 246 def updateWeightedTime(statusPublicationMillis):
 247     bhToUpdate = []
 248     with bridgedb.Storage.getDB() as db:
 249         for bh in db.getBridgesLastUpdatedBefore(statusPublicationMillis):
 250             interval = (statusPublicationMillis - bh.lastUpdatedWeightedTime)/1000
 251             if interval > 0:
 252                 bh.weightedTime += min(3600,interval) # cap to 1hr
 253                 bh.lastUpdatedWeightedTime = statusPublicationMillis
 254                 #db.updateIntoBridgeHistory(bh)
 255                 bhToUpdate.append(bh)
 256
 257         for bh in bhToUpdate:
 258             db.updateIntoBridgeHistory(bh)
 259
 260 def updateBridgeHistory(bridges, timestamps):
 261     """Process all the timestamps and update the bridge stability statistics in
 262     the database.
 263
 264     .. warning: This function is extremely expensive, and will keep getting
 265         more and more expensive, on a linearithmic scale, every time it is
 266         called. Blame the :mod:`bridgedb.Stability` module.
 267
 268     :param dict bridges: All bridges from the descriptors, parsed into
 269         :class:`bridgedb.bridges.Bridge`s.
 270     :param dict timestamps: A dictionary whose keys are bridge fingerprints,
 271         and whose values are lists of integers, each integer being a timestamp
 272         (in seconds since Unix Epoch) for when a descriptor for that bridge
 273         was published.
 274     :rtype: dict
 275     :returns: The original **timestamps**, but which each list of integers
 276         (re)sorted.
 277     """
 278     logging.debug("Beginning bridge stability calculations")
 279     sortedTimestamps = {}
 280
 281     for fingerprint, stamps in timestamps.items():
 282         stamps.sort()
 283         bridge = bridges[fingerprint]
 284         for timestamp in stamps:
 285             logging.debug(
 286                 ("Adding/updating timestamps in BridgeHistory for %s in "
 287                  "database: %s") % (fingerprint, timestamp))
 288             timestamp = toUnixSeconds(timestamp.timetuple())
 289             addOrUpdateBridgeHistory(bridge, timestamp)
 290         # Replace the timestamps so the next sort is (hopefully) less
 291         # expensive:
 292         sortedTimestamps[fingerprint] = stamps
 293
 294     logging.debug("Stability calculations complete")
 295     return sortedTimestamps