Parse bridge blocking info from SQL database.
[tor-bridgedb.git] / bridgedb / Stability.py
blob34ea51d35f74e1438deb1fa531d738b342c19916
1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_Stability -*-
3 # This file is part of BridgeDB, a Tor bridge distribution system.
5 # :authors: please see the AUTHORS file for attributions
6 # :copyright: (c) 2013-2017, Isis Lovecruft
7 # (c) 2013-2017, Matthew Finkel
8 # (c) 2012-2017, Aaron Gibson
9 # (c) 2007-2017, Nick Mathewson
10 # (c) 2007-2017, The Tor Project, Inc.
11 # :license: see LICENSE for licensing information
13 """This module provides functionality for tracking bridge stability metrics.
15 Bridge stability metrics are calculated using the model introduced in
16 `"An Analysis of Tor Bridge Stability"`_ and
17 `implemented in the Tor Metrics library`_.
19 .. An Analysis of Tor Bridge Stability:
20 https://metrics.torproject.org/papers/bridge-stability-2011-10-31.pdf
21 Karsten Loesing, An Analysis of Tor Bridge Stability. Technical Report.
22 The Tor Project, October 2011.
24 .. implemented in the Tor Metrics library:
25 https://gitweb.torproject.org/metrics-tasks/task-4255/SimulateBridgeStability.java
26 """
28 import logging
29 import bridgedb.Storage
31 from bridgedb.schedule import toUnixSeconds
34 # tunables
35 weighting_factor = float(19)/float(20)
36 discountIntervalMillis = 60*60*12*1000
39 class BridgeHistory(object):
40 """ Record Class that tracks a single Bridge
41 The fields stored are:
43 fingerprint, ip, port, weightedUptime, weightedTime, weightedRunLength,
44 totalRunWeights, lastSeenWithDifferentAddressAndPort,
45 lastSeenWithThisAddressAndPort, lastDiscountedHistoryValues.
47 fingerprint The Bridge Fingerprint (unicode)
48 ip The Bridge IP (unicode)
49 port The Bridge orport (integer)
51 weightedUptime Weighted uptime in seconds (long int)
52 weightedTime Weighted time in seconds (long int)
53 weightedRunLength Weighted run length of previous addresses or ports in
54 seconds. (long int)
55 totalRunWeights Total run weights of previously used addresses or
56 ports. (float)
58 lastSeenWithDifferentAddressAndPort
59 Timestamp in milliseconds when this
60 bridge was last seen with a different address or port. (long int)
62 lastSeenWithThisAddressAndPort
63 Timestamp in milliseconds when this bridge was last seen
64 with this address and port. (long int)
66 lastDiscountedHistoryValues:
67 Timestamp in milliseconds when this bridge was last discounted. (long int)
69 lastUpdatedWeightedTime:
70 Timestamp in milliseconds when the weighted time was updated. (long int)
71 """
72 def __init__(self, fingerprint, ip, port,
73 weightedUptime, weightedTime, weightedRunLength, totalRunWeights,
74 lastSeenWithDifferentAddressAndPort, lastSeenWithThisAddressAndPort,
75 lastDiscountedHistoryValues, lastUpdatedWeightedTime):
76 self.fingerprint = fingerprint
77 self.ip = ip
78 self.port = port
79 self.weightedUptime = int(weightedUptime)
80 self.weightedTime = int(weightedTime)
81 self.weightedRunLength = int(weightedRunLength)
82 self.totalRunWeights = float(totalRunWeights)
83 self.lastSeenWithDifferentAddressAndPort = \
84 int(lastSeenWithDifferentAddressAndPort)
85 self.lastSeenWithThisAddressAndPort = int(lastSeenWithThisAddressAndPort)
86 self.lastDiscountedHistoryValues = int(lastDiscountedHistoryValues)
87 self.lastUpdatedWeightedTime = int(lastUpdatedWeightedTime)
89 def discountWeightedFractionalUptimeAndWeightedTime(self, discountUntilMillis):
90 """ discount weighted times """
91 if self.lastDiscountedHistoryValues == 0:
92 self.lastDiscountedHistoryValues = discountUntilMillis
93 rounds = self.numDiscountRounds(discountUntilMillis)
94 if rounds > 0:
95 discount = lambda x: (weighting_factor**rounds)*x
96 self.weightedUptime = discount(self.weightedUptime)
97 self.weightedTime = discount(self.weightedTime)
98 self.weightedRunLength = discount(self.weightedRunLength)
99 self.totalRunWeights = discount(self.totalRunWeights)
101 self.lastDiscountedHistoryValues += discountIntervalMillis * rounds
102 return rounds
104 def numDiscountRounds(self, discountUntilMillis):
105 """ return the number of rounds of discounting needed to bring this
106 history element current """
107 result = discountUntilMillis - self.lastDiscountedHistoryValues
108 result = int(result/discountIntervalMillis)
109 return max(result,0)
111 @property
112 def weightedFractionalUptime(self):
113 """Weighted Fractional Uptime"""
114 if self.weightedTime <0.0001: return 0
115 return 10000 * self.weightedUptime / self.weightedTime
117 @property
118 def tosa(self):
119 """the Time On Same Address (TOSA)"""
120 return ( self.lastSeenWithThisAddressAndPort - \
121 self.lastSeenWithDifferentAddressAndPort ) / 1000
123 @property
124 def familiar(self):
126 A bridge is 'familiar' if 1/8 of all active bridges have appeared
127 more recently than it, or if it has been around for a Weighted Time of 8 days.
129 # if this bridge has been around longer than 8 days
130 if self.weightedTime >= 8 * 24 * 60 * 60:
131 return True
133 # return True if self.weightedTime is greater than the weightedTime
134 # of the > bottom 1/8 all bridges, sorted by weightedTime
135 with bridgedb.Storage.getDB() as db:
136 allWeightedTimes = [ bh.weightedTime for bh in db.getAllBridgeHistory()]
137 numBridges = len(allWeightedTimes)
138 logging.debug("Got %d weightedTimes", numBridges)
139 allWeightedTimes.sort()
140 if self.weightedTime >= allWeightedTimes[numBridges/8]:
141 return True
142 return False
144 @property
145 def wmtbac(self):
146 """Weighted Mean Time Between Address Change"""
147 totalRunLength = self.weightedRunLength + \
148 ((self.lastSeenWithThisAddressAndPort -
149 self.lastSeenWithDifferentAddressAndPort) / 1000)
151 totalWeights = self.totalRunWeights + 1.0
152 if totalWeights < 0.0001: return 0
153 assert(isinstance(long,totalRunLength))
154 assert(isinstance(long,totalWeights))
155 return totalRunlength / totalWeights
157 def addOrUpdateBridgeHistory(bridge, timestamp):
158 with bridgedb.Storage.getDB() as db:
159 bhe = db.getBridgeHistory(bridge.fingerprint)
160 if not bhe:
161 # This is the first status, assume 60 minutes.
162 secondsSinceLastStatusPublication = 60*60
163 lastSeenWithDifferentAddressAndPort = timestamp * 1000
164 lastSeenWithThisAddressAndPort = timestamp * 1000
166 bhe = BridgeHistory(
167 bridge.fingerprint, bridge.address, bridge.orPort,
168 0,#weightedUptime
169 0,#weightedTime
170 0,#weightedRunLength
171 0,# totalRunWeights
172 lastSeenWithDifferentAddressAndPort, # first timestamnp
173 lastSeenWithThisAddressAndPort,
174 0,#lastDiscountedHistoryValues,
175 0,#lastUpdatedWeightedTime
177 # first time we have seen this descriptor
178 db.updateIntoBridgeHistory(bhe)
179 # Calculate the seconds since the last parsed status. If this is
180 # the first status or we haven't seen a status for more than 60
181 # minutes, assume 60 minutes.
182 statusPublicationMillis = timestamp * 1000
183 if (statusPublicationMillis - bhe.lastSeenWithThisAddressAndPort) > 60*60*1000:
184 secondsSinceLastStatusPublication = 60*60
185 logging.debug("Capping secondsSinceLastStatusPublication to 1 hour")
186 # otherwise, roll with it
187 else:
188 secondsSinceLastStatusPublication = \
189 (statusPublicationMillis - bhe.lastSeenWithThisAddressAndPort)/1000
190 if secondsSinceLastStatusPublication <= 0 and bhe.weightedTime > 0:
191 # old descriptor, bail
192 logging.warn("Received old descriptor for bridge %s with timestamp %d",
193 bhe.fingerprint, statusPublicationMillis/1000)
194 return bhe
196 # iterate over all known bridges and apply weighting factor
197 discountAndPruneBridgeHistories(statusPublicationMillis)
199 # Update the weighted times of bridges
200 updateWeightedTime(statusPublicationMillis)
202 # For Running Bridges only:
203 # compare the stored history against the descriptor and see if the
204 # bridge has changed its address or port
205 bhe = db.getBridgeHistory(bridge.fingerprint)
207 if not bridge.running:
208 logging.info("%s is not running" % bridge.fingerprint)
209 return bhe
211 # Parse the descriptor and see if the address or port changed
212 # If so, store the weighted run time
213 if bridge.orport != bhe.port or bridge.ip != bhe.ip:
214 bhe.totalRunWeights += 1.0;
215 bhe.weightedRunLength += bhe.tosa
216 bhe.lastSeenWithDifferentAddressAndPort =\
217 bhe.lastSeenWithThisAddressAndPort
219 # Regardless of whether the bridge is new, kept or changed
220 # its address and port, raise its WFU times and note its
221 # current address and port, and that we saw it using them.
222 bhe.weightedUptime += secondsSinceLastStatusPublication
223 bhe.lastSeenWithThisAddressAndPort = statusPublicationMillis
224 bhe.ip = str(bridge.ip)
225 bhe.port = bridge.orport
226 return db.updateIntoBridgeHistory(bhe)
228 def discountAndPruneBridgeHistories(discountUntilMillis):
229 with bridgedb.Storage.getDB() as db:
230 bhToRemove = []
231 bhToUpdate = []
233 for bh in db.getAllBridgeHistory():
234 # discount previous values by factor of 0.95 every 12 hours
235 bh.discountWeightedFractionalUptimeAndWeightedTime(discountUntilMillis)
236 # give the thing at least 24 hours before pruning it
237 if bh.weightedFractionalUptime < 1 and bh.weightedTime > 60*60*24:
238 logging.debug("Removing bridge from history: %s" % bh.fingerprint)
239 bhToRemove.append(bh.fingerprint)
240 else:
241 bhToUpdate.append(bh)
243 for k in bhToUpdate: db.updateIntoBridgeHistory(k)
244 for k in bhToRemove: db.delBridgeHistory(k)
246 def updateWeightedTime(statusPublicationMillis):
247 bhToUpdate = []
248 with bridgedb.Storage.getDB() as db:
249 for bh in db.getBridgesLastUpdatedBefore(statusPublicationMillis):
250 interval = (statusPublicationMillis - bh.lastUpdatedWeightedTime)/1000
251 if interval > 0:
252 bh.weightedTime += min(3600,interval) # cap to 1hr
253 bh.lastUpdatedWeightedTime = statusPublicationMillis
254 #db.updateIntoBridgeHistory(bh)
255 bhToUpdate.append(bh)
257 for bh in bhToUpdate:
258 db.updateIntoBridgeHistory(bh)
260 def updateBridgeHistory(bridges, timestamps):
261 """Process all the timestamps and update the bridge stability statistics in
262 the database.
264 .. warning: This function is extremely expensive, and will keep getting
265 more and more expensive, on a linearithmic scale, every time it is
266 called. Blame the :mod:`bridgedb.Stability` module.
268 :param dict bridges: All bridges from the descriptors, parsed into
269 :class:`bridgedb.bridges.Bridge`s.
270 :param dict timestamps: A dictionary whose keys are bridge fingerprints,
271 and whose values are lists of integers, each integer being a timestamp
272 (in seconds since Unix Epoch) for when a descriptor for that bridge
273 was published.
274 :rtype: dict
275 :returns: The original **timestamps**, but which each list of integers
276 (re)sorted.
278 logging.debug("Beginning bridge stability calculations")
279 sortedTimestamps = {}
281 for fingerprint, stamps in timestamps.items():
282 stamps.sort()
283 bridge = bridges[fingerprint]
284 for timestamp in stamps:
285 logging.debug(
286 ("Adding/updating timestamps in BridgeHistory for %s in "
287 "database: %s") % (fingerprint, timestamp))
288 timestamp = toUnixSeconds(timestamp.timetuple())
289 addOrUpdateBridgeHistory(bridge, timestamp)
290 # Replace the timestamps so the next sort is (hopefully) less
291 # expensive:
292 sortedTimestamps[fingerprint] = stamps
294 logging.debug("Stability calculations complete")
295 return sortedTimestamps