1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_Stability -*-
3 # This file is part of BridgeDB, a Tor bridge distribution system.
5 # :authors: please see the AUTHORS file for attributions
6 # :copyright: (c) 2013-2017, Isis Lovecruft
7 # (c) 2013-2017, Matthew Finkel
8 # (c) 2012-2017, Aaron Gibson
9 # (c) 2007-2017, Nick Mathewson
10 # (c) 2007-2017, The Tor Project, Inc.
11 # :license: see LICENSE for licensing information
13 """This module provides functionality for tracking bridge stability metrics.
15 Bridge stability metrics are calculated using the model introduced in
16 `"An Analysis of Tor Bridge Stability"`_ and
17 `implemented in the Tor Metrics library`_.
19 .. An Analysis of Tor Bridge Stability:
20 https://metrics.torproject.org/papers/bridge-stability-2011-10-31.pdf
21 Karsten Loesing, An Analysis of Tor Bridge Stability. Technical Report.
22 The Tor Project, October 2011.
24 .. implemented in the Tor Metrics library:
25 https://gitweb.torproject.org/metrics-tasks/task-4255/SimulateBridgeStability.java
29 import bridgedb
.Storage
31 from bridgedb
.schedule
import toUnixSeconds
35 weighting_factor
= float(19)/float(20)
36 discountIntervalMillis
= 60*60*12*1000
39 class BridgeHistory(object):
40 """ Record Class that tracks a single Bridge
41 The fields stored are:
43 fingerprint, ip, port, weightedUptime, weightedTime, weightedRunLength,
44 totalRunWeights, lastSeenWithDifferentAddressAndPort,
45 lastSeenWithThisAddressAndPort, lastDiscountedHistoryValues.
47 fingerprint The Bridge Fingerprint (unicode)
48 ip The Bridge IP (unicode)
49 port The Bridge orport (integer)
51 weightedUptime Weighted uptime in seconds (long int)
52 weightedTime Weighted time in seconds (long int)
53 weightedRunLength Weighted run length of previous addresses or ports in
55 totalRunWeights Total run weights of previously used addresses or
58 lastSeenWithDifferentAddressAndPort
59 Timestamp in milliseconds when this
60 bridge was last seen with a different address or port. (long int)
62 lastSeenWithThisAddressAndPort
63 Timestamp in milliseconds when this bridge was last seen
64 with this address and port. (long int)
66 lastDiscountedHistoryValues:
67 Timestamp in milliseconds when this bridge was last discounted. (long int)
69 lastUpdatedWeightedTime:
70 Timestamp in milliseconds when the weighted time was updated. (long int)
72 def __init__(self
, fingerprint
, ip
, port
,
73 weightedUptime
, weightedTime
, weightedRunLength
, totalRunWeights
,
74 lastSeenWithDifferentAddressAndPort
, lastSeenWithThisAddressAndPort
,
75 lastDiscountedHistoryValues
, lastUpdatedWeightedTime
):
76 self
.fingerprint
= fingerprint
79 self
.weightedUptime
= int(weightedUptime
)
80 self
.weightedTime
= int(weightedTime
)
81 self
.weightedRunLength
= int(weightedRunLength
)
82 self
.totalRunWeights
= float(totalRunWeights
)
83 self
.lastSeenWithDifferentAddressAndPort
= \
84 int(lastSeenWithDifferentAddressAndPort
)
85 self
.lastSeenWithThisAddressAndPort
= int(lastSeenWithThisAddressAndPort
)
86 self
.lastDiscountedHistoryValues
= int(lastDiscountedHistoryValues
)
87 self
.lastUpdatedWeightedTime
= int(lastUpdatedWeightedTime
)
89 def discountWeightedFractionalUptimeAndWeightedTime(self
, discountUntilMillis
):
90 """ discount weighted times """
91 if self
.lastDiscountedHistoryValues
== 0:
92 self
.lastDiscountedHistoryValues
= discountUntilMillis
93 rounds
= self
.numDiscountRounds(discountUntilMillis
)
95 discount
= lambda x
: (weighting_factor
**rounds
)*x
96 self
.weightedUptime
= discount(self
.weightedUptime
)
97 self
.weightedTime
= discount(self
.weightedTime
)
98 self
.weightedRunLength
= discount(self
.weightedRunLength
)
99 self
.totalRunWeights
= discount(self
.totalRunWeights
)
101 self
.lastDiscountedHistoryValues
+= discountIntervalMillis
* rounds
104 def numDiscountRounds(self
, discountUntilMillis
):
105 """ return the number of rounds of discounting needed to bring this
106 history element current """
107 result
= discountUntilMillis
- self
.lastDiscountedHistoryValues
108 result
= int(result
/discountIntervalMillis
)
112 def weightedFractionalUptime(self
):
113 """Weighted Fractional Uptime"""
114 if self
.weightedTime
<0.0001: return 0
115 return 10000 * self
.weightedUptime
/ self
.weightedTime
119 """the Time On Same Address (TOSA)"""
120 return ( self
.lastSeenWithThisAddressAndPort
- \
121 self
.lastSeenWithDifferentAddressAndPort
) / 1000
126 A bridge is 'familiar' if 1/8 of all active bridges have appeared
127 more recently than it, or if it has been around for a Weighted Time of 8 days.
129 # if this bridge has been around longer than 8 days
130 if self
.weightedTime
>= 8 * 24 * 60 * 60:
133 # return True if self.weightedTime is greater than the weightedTime
134 # of the > bottom 1/8 all bridges, sorted by weightedTime
135 with bridgedb
.Storage
.getDB() as db
:
136 allWeightedTimes
= [ bh
.weightedTime
for bh
in db
.getAllBridgeHistory()]
137 numBridges
= len(allWeightedTimes
)
138 logging
.debug("Got %d weightedTimes", numBridges
)
139 allWeightedTimes
.sort()
140 if self
.weightedTime
>= allWeightedTimes
[numBridges
/8]:
146 """Weighted Mean Time Between Address Change"""
147 totalRunLength
= self
.weightedRunLength
+ \
148 ((self
.lastSeenWithThisAddressAndPort
-
149 self
.lastSeenWithDifferentAddressAndPort
) / 1000)
151 totalWeights
= self
.totalRunWeights
+ 1.0
152 if totalWeights
< 0.0001: return 0
153 assert(isinstance(long,totalRunLength
))
154 assert(isinstance(long,totalWeights
))
155 return totalRunlength
/ totalWeights
157 def addOrUpdateBridgeHistory(bridge
, timestamp
):
158 with bridgedb
.Storage
.getDB() as db
:
159 bhe
= db
.getBridgeHistory(bridge
.fingerprint
)
161 # This is the first status, assume 60 minutes.
162 secondsSinceLastStatusPublication
= 60*60
163 lastSeenWithDifferentAddressAndPort
= timestamp
* 1000
164 lastSeenWithThisAddressAndPort
= timestamp
* 1000
167 bridge
.fingerprint
, bridge
.address
, bridge
.orPort
,
172 lastSeenWithDifferentAddressAndPort
, # first timestamnp
173 lastSeenWithThisAddressAndPort
,
174 0,#lastDiscountedHistoryValues,
175 0,#lastUpdatedWeightedTime
177 # first time we have seen this descriptor
178 db
.updateIntoBridgeHistory(bhe
)
179 # Calculate the seconds since the last parsed status. If this is
180 # the first status or we haven't seen a status for more than 60
181 # minutes, assume 60 minutes.
182 statusPublicationMillis
= timestamp
* 1000
183 if (statusPublicationMillis
- bhe
.lastSeenWithThisAddressAndPort
) > 60*60*1000:
184 secondsSinceLastStatusPublication
= 60*60
185 logging
.debug("Capping secondsSinceLastStatusPublication to 1 hour")
186 # otherwise, roll with it
188 secondsSinceLastStatusPublication
= \
189 (statusPublicationMillis
- bhe
.lastSeenWithThisAddressAndPort
)/1000
190 if secondsSinceLastStatusPublication
<= 0 and bhe
.weightedTime
> 0:
191 # old descriptor, bail
192 logging
.warn("Received old descriptor for bridge %s with timestamp %d",
193 bhe
.fingerprint
, statusPublicationMillis
/1000)
196 # iterate over all known bridges and apply weighting factor
197 discountAndPruneBridgeHistories(statusPublicationMillis
)
199 # Update the weighted times of bridges
200 updateWeightedTime(statusPublicationMillis
)
202 # For Running Bridges only:
203 # compare the stored history against the descriptor and see if the
204 # bridge has changed its address or port
205 bhe
= db
.getBridgeHistory(bridge
.fingerprint
)
207 if not bridge
.running
:
208 logging
.info("%s is not running" % bridge
.fingerprint
)
211 # Parse the descriptor and see if the address or port changed
212 # If so, store the weighted run time
213 if bridge
.orport
!= bhe
.port
or bridge
.ip
!= bhe
.ip
:
214 bhe
.totalRunWeights
+= 1.0;
215 bhe
.weightedRunLength
+= bhe
.tosa
216 bhe
.lastSeenWithDifferentAddressAndPort
=\
217 bhe
.lastSeenWithThisAddressAndPort
219 # Regardless of whether the bridge is new, kept or changed
220 # its address and port, raise its WFU times and note its
221 # current address and port, and that we saw it using them.
222 bhe
.weightedUptime
+= secondsSinceLastStatusPublication
223 bhe
.lastSeenWithThisAddressAndPort
= statusPublicationMillis
224 bhe
.ip
= str(bridge
.ip
)
225 bhe
.port
= bridge
.orport
226 return db
.updateIntoBridgeHistory(bhe
)
228 def discountAndPruneBridgeHistories(discountUntilMillis
):
229 with bridgedb
.Storage
.getDB() as db
:
233 for bh
in db
.getAllBridgeHistory():
234 # discount previous values by factor of 0.95 every 12 hours
235 bh
.discountWeightedFractionalUptimeAndWeightedTime(discountUntilMillis
)
236 # give the thing at least 24 hours before pruning it
237 if bh
.weightedFractionalUptime
< 1 and bh
.weightedTime
> 60*60*24:
238 logging
.debug("Removing bridge from history: %s" % bh
.fingerprint
)
239 bhToRemove
.append(bh
.fingerprint
)
241 bhToUpdate
.append(bh
)
243 for k
in bhToUpdate
: db
.updateIntoBridgeHistory(k
)
244 for k
in bhToRemove
: db
.delBridgeHistory(k
)
246 def updateWeightedTime(statusPublicationMillis
):
248 with bridgedb
.Storage
.getDB() as db
:
249 for bh
in db
.getBridgesLastUpdatedBefore(statusPublicationMillis
):
250 interval
= (statusPublicationMillis
- bh
.lastUpdatedWeightedTime
)/1000
252 bh
.weightedTime
+= min(3600,interval
) # cap to 1hr
253 bh
.lastUpdatedWeightedTime
= statusPublicationMillis
254 #db.updateIntoBridgeHistory(bh)
255 bhToUpdate
.append(bh
)
257 for bh
in bhToUpdate
:
258 db
.updateIntoBridgeHistory(bh
)
260 def updateBridgeHistory(bridges
, timestamps
):
261 """Process all the timestamps and update the bridge stability statistics in
264 .. warning: This function is extremely expensive, and will keep getting
265 more and more expensive, on a linearithmic scale, every time it is
266 called. Blame the :mod:`bridgedb.Stability` module.
268 :param dict bridges: All bridges from the descriptors, parsed into
269 :class:`bridgedb.bridges.Bridge`s.
270 :param dict timestamps: A dictionary whose keys are bridge fingerprints,
271 and whose values are lists of integers, each integer being a timestamp
272 (in seconds since Unix Epoch) for when a descriptor for that bridge
275 :returns: The original **timestamps**, but which each list of integers
278 logging
.debug("Beginning bridge stability calculations")
279 sortedTimestamps
= {}
281 for fingerprint
, stamps
in timestamps
.items():
283 bridge
= bridges
[fingerprint
]
284 for timestamp
in stamps
:
286 ("Adding/updating timestamps in BridgeHistory for %s in "
287 "database: %s") % (fingerprint
, timestamp
))
288 timestamp
= toUnixSeconds(timestamp
.timetuple())
289 addOrUpdateBridgeHistory(bridge
, timestamp
)
290 # Replace the timestamps so the next sort is (hopefully) less
292 sortedTimestamps
[fingerprint
] = stamps
294 logging
.debug("Stability calculations complete")
295 return sortedTimestamps