1 import java
.io
.BufferedWriter
;
3 import java
.io
.FileWriter
;
4 import java
.text
.SimpleDateFormat
;
5 import java
.util
.ArrayList
;
6 import java
.util
.Arrays
;
7 import java
.util
.HashMap
;
8 import java
.util
.Iterator
;
11 import java
.util
.SortedMap
;
12 import java
.util
.SortedSet
;
13 import java
.util
.TimeZone
;
14 import java
.util
.TreeMap
;
15 import java
.util
.TreeSet
;
17 import org
.torproject
.descriptor
.Descriptor
;
18 import org
.torproject
.descriptor
.DescriptorFile
;
19 import org
.torproject
.descriptor
.DescriptorReader
;
20 import org
.torproject
.descriptor
.DescriptorSourceFactory
;
21 import org
.torproject
.descriptor
.NetworkStatusEntry
;
22 import org
.torproject
.descriptor
.RelayNetworkStatusConsensus
;
23 import org
.torproject
.descriptor
.ServerDescriptor
;
26 * Calculate path-selection probabilities for relays based on consensus
27 * weights and advertised bandwidths:
29 * - advertised_bandwidth_fraction: Relative advertised bandwidth of
30 * this relay compared to the total advertised bandwidth in the
31 * network. If there were no bandwidth authorities, this fraction
32 * would be the probability of this relay to be selected by clients.
34 * - consensus_weight_fraction: Fraction of this relay's consensus weight
35 * compared to the sum of all consensus weights in the network. This
36 * fraction is a very rough approximation of the probability of this
37 * relay to be selected by clients.
39 * - guard_probability: Probability of this relay to be selected for the
40 * guard position. This probability is calculated based on consensus
41 * weights, relay flags, directory ports, and bandwidth weights in the
42 * consensus. Path selection depends on more factors, so that this
43 * probability can only be an approximation.
45 * - middle_probability: Probability of this relay to be selected for the
46 * middle position. This probability is calculated based on consensus
47 * weights, relay flags, directory ports, and bandwidth weights in the
48 * consensus. Path selection depends on more factors, so that this
49 * probability can only be an approximation.
51 * - exit_probability: Probability of this relay to be selected for the
52 * exit position. This probability is calculated based on consensus
53 * weights, relay flags, directory ports, and bandwidth weights in the
54 * consensus. Path selection depends on more factors, so that this
55 * probability can only be an approximation.
57 * - exit_probability_advbw: Probability of this relay to be selected for
58 * the exit position under the assumption that clients would use
59 * advertised bandwidth for path selection.
61 public class CalculatePathSelectionProbabilities
{
62 public static void main(String
[] args
) throws Exception
{
64 /* Note: change to true if raw weights shall be written to disk. */
65 boolean writeRawWeights
= false;
67 /* Read advertised bandwidths of all server descriptors in
68 * in/server-descriptors/ to memory. This is a rather brute-force
69 * approach, but it's fine for running this analysis. */
70 DescriptorReader descriptorReader
=
71 DescriptorSourceFactory
.createDescriptorReader();
72 descriptorReader
.addDirectory(new File("in/server-descriptors"));
73 Iterator
<DescriptorFile
> descriptorFiles
=
74 descriptorReader
.readDescriptors();
75 Map
<String
, Integer
> serverDescriptors
=
76 new HashMap
<String
, Integer
>();
77 while (descriptorFiles
.hasNext()) {
78 DescriptorFile descriptorFile
= descriptorFiles
.next();
79 for (Descriptor descriptor
: descriptorFile
.getDescriptors()) {
80 if (!(descriptor
instanceof ServerDescriptor
)) {
83 ServerDescriptor serverDescriptor
= (ServerDescriptor
) descriptor
;
84 String digest
= serverDescriptor
.getServerDescriptorDigest();
85 int advertisedBandwidth
= Math
.min(Math
.min(
86 serverDescriptor
.getBandwidthBurst(),
87 serverDescriptor
.getBandwidthObserved()),
88 serverDescriptor
.getBandwidthRate());
89 serverDescriptors
.put(digest
.toUpperCase(), advertisedBandwidth
);
93 /* Go through consensuses in in/consensuses/ in arbitrary order and
94 * calculate the five path-selection probabilities for each of them.
95 * Write results for a given consensuses to a single new line per
96 * relay to out.csv. */
97 descriptorReader
= DescriptorSourceFactory
.createDescriptorReader();
98 descriptorReader
.addDirectory(new File("in/consensuses"));
99 descriptorFiles
= descriptorReader
.readDescriptors();
100 BufferedWriter bw
= null;
101 if (writeRawWeights
) {
102 bw
= new BufferedWriter(new FileWriter("weights.csv"));
103 bw
.write("validafter,fingerprint,advertised_bandwidth_fraction,"
104 + "consensus_weight_fraction,guard_probability,"
105 + "middle_probability,exit_probability,"
106 + "exit_probability_advbw\n");
108 BufferedWriter bw2
= new BufferedWriter(new FileWriter(
109 "cumulated-weights.csv"));
110 bw2
.write("validafter,weight_type,top_relays,"
111 + "total_exit_probability\n");
112 BufferedWriter bw3
= new BufferedWriter(new FileWriter(
113 "inverse-cumulated-weights.csv"));
114 bw3
.write("validafter,weight_type,total_exit_probability,"
116 while (descriptorFiles
.hasNext()) {
117 DescriptorFile descriptorFile
= descriptorFiles
.next();
118 for (Descriptor descriptor
: descriptorFile
.getDescriptors()) {
119 if (!(descriptor
instanceof RelayNetworkStatusConsensus
)) {
122 RelayNetworkStatusConsensus consensus
=
123 (RelayNetworkStatusConsensus
) descriptor
;
125 /* Extract valid-after time and bandwidth weights from the parsed
127 SimpleDateFormat dateTimeFormat
= new SimpleDateFormat(
128 "yyyy-MM-dd HH:mm:ss");
129 dateTimeFormat
.setTimeZone(TimeZone
.getTimeZone("UTC"));
130 String validAfter
= dateTimeFormat
.format(
131 consensus
.getValidAfterMillis());
132 SortedMap
<String
, Integer
> bandwidthWeights
=
133 consensus
.getBandwidthWeights();
134 if (bandwidthWeights
== null) {
135 /* Consensus doesn't contain any bandwidth weights. */
138 SortedSet
<String
> weightKeys
= new TreeSet
<String
>(Arrays
.asList((
139 "Wgg,Wgm,Wgd,Wmg,Wmm,Wme,Wmd,Weg,Wem,Wee,Wed,Wgb,Wmb,Web,Wdb,"
140 + "Wbg,Wbm,Wbe,Wbd").split(",")));
141 weightKeys
.removeAll(bandwidthWeights
.keySet());
142 if (!weightKeys
.isEmpty()) {
143 /* Consensus is missing at least some required bandwidth
147 double wgg
= ((double) bandwidthWeights
.get("Wgg")) / 10000.0,
148 wgd
= ((double) bandwidthWeights
.get("Wgd")) / 10000.0,
149 wmg
= ((double) bandwidthWeights
.get("Wmg")) / 10000.0,
150 wmm
= ((double) bandwidthWeights
.get("Wmm")) / 10000.0,
151 wme
= ((double) bandwidthWeights
.get("Wme")) / 10000.0,
152 wmd
= ((double) bandwidthWeights
.get("Wmd")) / 10000.0,
153 wee
= ((double) bandwidthWeights
.get("Wee")) / 10000.0,
154 wed
= ((double) bandwidthWeights
.get("Wed")) / 10000.0;
156 /* Go through network statuses and calculate the five weights for
157 * each of them. Also sum up totals to calculate probabilities
159 SortedMap
<String
, Double
>
160 advertisedBandwidths
= new TreeMap
<String
, Double
>(),
161 consensusWeights
= new TreeMap
<String
, Double
>(),
162 guardWeights
= new TreeMap
<String
, Double
>(),
163 middleWeights
= new TreeMap
<String
, Double
>(),
164 exitWeights
= new TreeMap
<String
, Double
>(),
165 exitWeightsAdvBw
= new TreeMap
<String
, Double
>();
166 double totalAdvertisedBandwidth
= 0.0;
167 double totalConsensusWeight
= 0.0;
168 double totalGuardWeight
= 0.0;
169 double totalMiddleWeight
= 0.0;
170 double totalExitWeight
= 0.0;
171 double totalExitWeightAdvBw
= 0.0;
172 for (NetworkStatusEntry relay
:
173 consensus
.getStatusEntries().values()) {
174 String fingerprint
= relay
.getFingerprint();
175 if (!relay
.getFlags().contains("Running")) {
178 boolean isExit
= relay
.getFlags().contains("Exit") &&
179 !relay
.getFlags().contains("BadExit");
180 boolean isGuard
= relay
.getFlags().contains("Guard");
181 String serverDescriptorDigest
= relay
.getDescriptor().
183 double advertisedBandwidth
;
184 if (!serverDescriptors
.containsKey(serverDescriptorDigest
)) {
185 advertisedBandwidth
= 0.0;
187 advertisedBandwidth
= (double) serverDescriptors
.get(
188 serverDescriptorDigest
);
190 double consensusWeight
= (double) relay
.getBandwidth();
191 double guardWeight
= (double) relay
.getBandwidth();
192 double middleWeight
= (double) relay
.getBandwidth();
193 double exitWeight
= (double) relay
.getBandwidth();
194 double exitWeightAdvBw
= advertisedBandwidth
;
196 /* Case 1: relay has both Guard and Exit flag and could be
197 * selected for either guard, middle, or exit position. Apply
198 * bandwidth weights W?d. */
199 if (isGuard
&& isExit
) {
203 exitWeightAdvBw
*= wed
;
205 /* Case 2: relay only has the Guard flag, not the Exit flag.
206 * While, in theory, the relay could also be picked for the exit
207 * position (if it has a weird exit policy), Weg is hard-coded
208 * to 0 here. Otherwise, relays with exit policy reject *:*
209 * would show up as possible exits, which makes no sense. Maybe
210 * this is too much of an oversimplification? For the other
211 * positions, apply bandwidth weights W?g. */
212 } else if (isGuard
) {
216 exitWeightAdvBw
= 0.0;
218 /* Case 3: relay only has the Exit flag, not the Guard flag. It
219 * cannot be picked for the guard position, so set Wge to 0.
220 * For the other positions, apply bandwidth weights W?e. */
225 exitWeightAdvBw
*= wee
;
227 /* Case 4: relay has neither Exit nor Guard flag. Similar to
228 * case 2, this relay *could* have a weird exit policy and be
229 * picked in the exit position. Same rationale applies, so Wme
230 * is set to 0. The relay cannot be a guard, so Wgm is 0, too.
231 * For middle position, apply bandwidth weight Wmm. */
236 exitWeightAdvBw
= 0.0;
239 /* Store calculated weights and update totals. */
240 advertisedBandwidths
.put(fingerprint
, advertisedBandwidth
);
241 consensusWeights
.put(fingerprint
, consensusWeight
);
242 guardWeights
.put(fingerprint
, guardWeight
);
243 middleWeights
.put(fingerprint
, middleWeight
);
244 exitWeights
.put(fingerprint
, exitWeight
);
245 exitWeightsAdvBw
.put(fingerprint
, exitWeightAdvBw
);
246 totalAdvertisedBandwidth
+= advertisedBandwidth
;
247 totalConsensusWeight
+= consensusWeight
;
248 totalGuardWeight
+= guardWeight
;
249 totalMiddleWeight
+= middleWeight
;
250 totalExitWeight
+= exitWeight
;
251 totalExitWeightAdvBw
+= exitWeightAdvBw
;
254 /* Write calculated path-selection probabilities to the output
257 for (NetworkStatusEntry relay
:
258 consensus
.getStatusEntries().values()) {
259 String fingerprint
= relay
.getFingerprint();
260 bw
.write(String
.format("%s,%s,%.9f,%.9f,%.9f,%.9f,%.9f,"
264 advertisedBandwidths
.get(fingerprint
)
265 / totalAdvertisedBandwidth
,
266 consensusWeights
.get(fingerprint
) / totalConsensusWeight
,
267 guardWeights
.get(fingerprint
) / totalGuardWeight
,
268 middleWeights
.get(fingerprint
) / totalMiddleWeight
,
269 exitWeights
.get(fingerprint
) / totalExitWeight
,
270 exitWeightsAdvBw
.get(fingerprint
)
271 / totalExitWeightAdvBw
));
275 /* Write exit probabilities for top-x relays to the second and
276 * third output files. */
277 String
[] weightTypes
= new String
[] { "consensus weights",
278 "advertised bandwidth" };
279 Double
[][] sortedExitWeightsArray
= new Double
[][] {
280 exitWeights
.values().toArray(new Double
[exitWeights
.size()]),
281 exitWeightsAdvBw
.values().toArray(
282 new Double
[exitWeightsAdvBw
.size()]) };
283 double[] totalWeights
= new double[] { totalExitWeight
,
284 totalExitWeightAdvBw
};
285 for (int i
= 0; i
< weightTypes
.length
; i
++) {
286 String weightType
= weightTypes
[i
];
287 Double
[] sortedExitWeights
= sortedExitWeightsArray
[i
];
288 double totalWeight
= totalWeights
[i
];
289 Arrays
.sort(sortedExitWeights
);
291 double totalExitProbability
= 0.0;
292 List
<Double
> inverseProbabilities
= new ArrayList
<Double
>(
293 Arrays
.asList(new Double
[] { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6,
295 for (int j
= sortedExitWeights
.length
- 1; j
>= 0; j
--) {
296 double exitWeight
= sortedExitWeights
[j
];
298 totalExitProbability
+= exitWeight
/ totalWeight
;
299 if (topRelays
<= 50) {
300 bw2
.write(String
.format("%s,%s,%d,%.9f%n", validAfter
,
301 weightType
, topRelays
, totalExitProbability
));
303 while (!inverseProbabilities
.isEmpty() &&
304 totalExitProbability
> inverseProbabilities
.get(0)) {
305 bw3
.write(String
.format("%s,%s,%.1f,%d%n", validAfter
,
306 weightType
, inverseProbabilities
.remove(0), topRelays
));
308 if (inverseProbabilities
.isEmpty() && topRelays
> 50) {