Add exit probability based on advertised bw (#6443).
[tor-metrics-tasks/delber.git] / task-6443 / src / CalculatePathSelectionProbabilities.java
blobf819d3ea6d0ad7ea6e6b47f378806a1b5c4911c9
1 import java.io.BufferedWriter;
2 import java.io.File;
3 import java.io.FileWriter;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.HashMap;
8 import java.util.Iterator;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.SortedMap;
12 import java.util.SortedSet;
13 import java.util.TimeZone;
14 import java.util.TreeMap;
15 import java.util.TreeSet;
17 import org.torproject.descriptor.Descriptor;
18 import org.torproject.descriptor.DescriptorFile;
19 import org.torproject.descriptor.DescriptorReader;
20 import org.torproject.descriptor.DescriptorSourceFactory;
21 import org.torproject.descriptor.NetworkStatusEntry;
22 import org.torproject.descriptor.RelayNetworkStatusConsensus;
23 import org.torproject.descriptor.ServerDescriptor;
26 * Calculate path-selection probabilities for relays based on consensus
27 * weights and advertised bandwidths:
29 * - advertised_bandwidth_fraction: Relative advertised bandwidth of
30 * this relay compared to the total advertised bandwidth in the
31 * network. If there were no bandwidth authorities, this fraction
32 * would be the probability of this relay to be selected by clients.
34 * - consensus_weight_fraction: Fraction of this relay's consensus weight
35 * compared to the sum of all consensus weights in the network. This
36 * fraction is a very rough approximation of the probability of this
37 * relay to be selected by clients.
39 * - guard_probability: Probability of this relay to be selected for the
40 * guard position. This probability is calculated based on consensus
41 * weights, relay flags, directory ports, and bandwidth weights in the
42 * consensus. Path selection depends on more factors, so that this
43 * probability can only be an approximation.
45 * - middle_probability: Probability of this relay to be selected for the
46 * middle position. This probability is calculated based on consensus
47 * weights, relay flags, directory ports, and bandwidth weights in the
48 * consensus. Path selection depends on more factors, so that this
49 * probability can only be an approximation.
51 * - exit_probability: Probability of this relay to be selected for the
52 * exit position. This probability is calculated based on consensus
53 * weights, relay flags, directory ports, and bandwidth weights in the
54 * consensus. Path selection depends on more factors, so that this
55 * probability can only be an approximation.
57 * - exit_probability_advbw: Probability of this relay to be selected for
58 * the exit position under the assumption that clients would use
59 * advertised bandwidth for path selection.
61 public class CalculatePathSelectionProbabilities {
62 public static void main(String[] args) throws Exception {
64 /* Note: change to true if raw weights shall be written to disk. */
65 boolean writeRawWeights = false;
67 /* Read advertised bandwidths of all server descriptors in
68 * in/server-descriptors/ to memory. This is a rather brute-force
69 * approach, but it's fine for running this analysis. */
70 DescriptorReader descriptorReader =
71 DescriptorSourceFactory.createDescriptorReader();
72 descriptorReader.addDirectory(new File("in/server-descriptors"));
73 Iterator<DescriptorFile> descriptorFiles =
74 descriptorReader.readDescriptors();
75 Map<String, Integer> serverDescriptors =
76 new HashMap<String, Integer>();
77 while (descriptorFiles.hasNext()) {
78 DescriptorFile descriptorFile = descriptorFiles.next();
79 for (Descriptor descriptor : descriptorFile.getDescriptors()) {
80 if (!(descriptor instanceof ServerDescriptor)) {
81 continue;
83 ServerDescriptor serverDescriptor = (ServerDescriptor) descriptor;
84 String digest = serverDescriptor.getServerDescriptorDigest();
85 int advertisedBandwidth = Math.min(Math.min(
86 serverDescriptor.getBandwidthBurst(),
87 serverDescriptor.getBandwidthObserved()),
88 serverDescriptor.getBandwidthRate());
89 serverDescriptors.put(digest.toUpperCase(), advertisedBandwidth);
93 /* Go through consensuses in in/consensuses/ in arbitrary order and
94 * calculate the five path-selection probabilities for each of them.
95 * Write results for a given consensuses to a single new line per
96 * relay to out.csv. */
97 descriptorReader = DescriptorSourceFactory.createDescriptorReader();
98 descriptorReader.addDirectory(new File("in/consensuses"));
99 descriptorFiles = descriptorReader.readDescriptors();
100 BufferedWriter bw = null;
101 if (writeRawWeights) {
102 bw = new BufferedWriter(new FileWriter("weights.csv"));
103 bw.write("validafter,fingerprint,advertised_bandwidth_fraction,"
104 + "consensus_weight_fraction,guard_probability,"
105 + "middle_probability,exit_probability,"
106 + "exit_probability_advbw\n");
108 BufferedWriter bw2 = new BufferedWriter(new FileWriter(
109 "cumulated-weights.csv"));
110 bw2.write("validafter,weight_type,top_relays,"
111 + "total_exit_probability\n");
112 BufferedWriter bw3 = new BufferedWriter(new FileWriter(
113 "inverse-cumulated-weights.csv"));
114 bw3.write("validafter,weight_type,total_exit_probability,"
115 + "top_relays\n");
116 while (descriptorFiles.hasNext()) {
117 DescriptorFile descriptorFile = descriptorFiles.next();
118 for (Descriptor descriptor : descriptorFile.getDescriptors()) {
119 if (!(descriptor instanceof RelayNetworkStatusConsensus)) {
120 continue;
122 RelayNetworkStatusConsensus consensus =
123 (RelayNetworkStatusConsensus) descriptor;
125 /* Extract valid-after time and bandwidth weights from the parsed
126 * consensus. */
127 SimpleDateFormat dateTimeFormat = new SimpleDateFormat(
128 "yyyy-MM-dd HH:mm:ss");
129 dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
130 String validAfter = dateTimeFormat.format(
131 consensus.getValidAfterMillis());
132 SortedMap<String, Integer> bandwidthWeights =
133 consensus.getBandwidthWeights();
134 if (bandwidthWeights == null) {
135 /* Consensus doesn't contain any bandwidth weights. */
136 continue;
138 SortedSet<String> weightKeys = new TreeSet<String>(Arrays.asList((
139 "Wgg,Wgm,Wgd,Wmg,Wmm,Wme,Wmd,Weg,Wem,Wee,Wed,Wgb,Wmb,Web,Wdb,"
140 + "Wbg,Wbm,Wbe,Wbd").split(",")));
141 weightKeys.removeAll(bandwidthWeights.keySet());
142 if (!weightKeys.isEmpty()) {
143 /* Consensus is missing at least some required bandwidth
144 * weights. */
145 continue;
147 double wgg = ((double) bandwidthWeights.get("Wgg")) / 10000.0,
148 wgd = ((double) bandwidthWeights.get("Wgd")) / 10000.0,
149 wmg = ((double) bandwidthWeights.get("Wmg")) / 10000.0,
150 wmm = ((double) bandwidthWeights.get("Wmm")) / 10000.0,
151 wme = ((double) bandwidthWeights.get("Wme")) / 10000.0,
152 wmd = ((double) bandwidthWeights.get("Wmd")) / 10000.0,
153 wee = ((double) bandwidthWeights.get("Wee")) / 10000.0,
154 wed = ((double) bandwidthWeights.get("Wed")) / 10000.0;
156 /* Go through network statuses and calculate the five weights for
157 * each of them. Also sum up totals to calculate probabilities
158 * later. */
159 SortedMap<String, Double>
160 advertisedBandwidths = new TreeMap<String, Double>(),
161 consensusWeights = new TreeMap<String, Double>(),
162 guardWeights = new TreeMap<String, Double>(),
163 middleWeights = new TreeMap<String, Double>(),
164 exitWeights = new TreeMap<String, Double>(),
165 exitWeightsAdvBw = new TreeMap<String, Double>();
166 double totalAdvertisedBandwidth = 0.0;
167 double totalConsensusWeight = 0.0;
168 double totalGuardWeight = 0.0;
169 double totalMiddleWeight = 0.0;
170 double totalExitWeight = 0.0;
171 double totalExitWeightAdvBw = 0.0;
172 for (NetworkStatusEntry relay :
173 consensus.getStatusEntries().values()) {
174 String fingerprint = relay.getFingerprint();
175 if (!relay.getFlags().contains("Running")) {
176 continue;
178 boolean isExit = relay.getFlags().contains("Exit") &&
179 !relay.getFlags().contains("BadExit");
180 boolean isGuard = relay.getFlags().contains("Guard");
181 String serverDescriptorDigest = relay.getDescriptor().
182 toUpperCase();
183 double advertisedBandwidth;
184 if (!serverDescriptors.containsKey(serverDescriptorDigest)) {
185 advertisedBandwidth = 0.0;
186 } else {
187 advertisedBandwidth = (double) serverDescriptors.get(
188 serverDescriptorDigest);
190 double consensusWeight = (double) relay.getBandwidth();
191 double guardWeight = (double) relay.getBandwidth();
192 double middleWeight = (double) relay.getBandwidth();
193 double exitWeight = (double) relay.getBandwidth();
194 double exitWeightAdvBw = advertisedBandwidth;
196 /* Case 1: relay has both Guard and Exit flag and could be
197 * selected for either guard, middle, or exit position. Apply
198 * bandwidth weights W?d. */
199 if (isGuard && isExit) {
200 guardWeight *= wgd;
201 middleWeight *= wmd;
202 exitWeight *= wed;
203 exitWeightAdvBw *= wed;
205 /* Case 2: relay only has the Guard flag, not the Exit flag.
206 * While, in theory, the relay could also be picked for the exit
207 * position (if it has a weird exit policy), Weg is hard-coded
208 * to 0 here. Otherwise, relays with exit policy reject *:*
209 * would show up as possible exits, which makes no sense. Maybe
210 * this is too much of an oversimplification? For the other
211 * positions, apply bandwidth weights W?g. */
212 } else if (isGuard) {
213 guardWeight *= wgg;
214 middleWeight *= wmg;
215 exitWeight = 0.0;
216 exitWeightAdvBw = 0.0;
218 /* Case 3: relay only has the Exit flag, not the Guard flag. It
219 * cannot be picked for the guard position, so set Wge to 0.
220 * For the other positions, apply bandwidth weights W?e. */
221 } else if (isExit) {
222 guardWeight = 0.0;
223 middleWeight *= wme;
224 exitWeight *= wee;
225 exitWeightAdvBw *= wee;
227 /* Case 4: relay has neither Exit nor Guard flag. Similar to
228 * case 2, this relay *could* have a weird exit policy and be
229 * picked in the exit position. Same rationale applies, so Wme
230 * is set to 0. The relay cannot be a guard, so Wgm is 0, too.
231 * For middle position, apply bandwidth weight Wmm. */
232 } else {
233 guardWeight = 0.0;
234 middleWeight *= wmm;
235 exitWeight = 0.0;
236 exitWeightAdvBw = 0.0;
239 /* Store calculated weights and update totals. */
240 advertisedBandwidths.put(fingerprint, advertisedBandwidth);
241 consensusWeights.put(fingerprint, consensusWeight);
242 guardWeights.put(fingerprint, guardWeight);
243 middleWeights.put(fingerprint, middleWeight);
244 exitWeights.put(fingerprint, exitWeight);
245 exitWeightsAdvBw.put(fingerprint, exitWeightAdvBw);
246 totalAdvertisedBandwidth += advertisedBandwidth;
247 totalConsensusWeight += consensusWeight;
248 totalGuardWeight += guardWeight;
249 totalMiddleWeight += middleWeight;
250 totalExitWeight += exitWeight;
251 totalExitWeightAdvBw += exitWeightAdvBw;
254 /* Write calculated path-selection probabilities to the output
255 * file. */
256 if (bw != null) {
257 for (NetworkStatusEntry relay :
258 consensus.getStatusEntries().values()) {
259 String fingerprint = relay.getFingerprint();
260 bw.write(String.format("%s,%s,%.9f,%.9f,%.9f,%.9f,%.9f,"
261 + "%.9f%n",
262 validAfter,
263 fingerprint,
264 advertisedBandwidths.get(fingerprint)
265 / totalAdvertisedBandwidth,
266 consensusWeights.get(fingerprint) / totalConsensusWeight,
267 guardWeights.get(fingerprint) / totalGuardWeight,
268 middleWeights.get(fingerprint) / totalMiddleWeight,
269 exitWeights.get(fingerprint) / totalExitWeight,
270 exitWeightsAdvBw.get(fingerprint)
271 / totalExitWeightAdvBw));
275 /* Write exit probabilities for top-x relays to the second and
276 * third output files. */
277 String[] weightTypes = new String[] { "consensus weights",
278 "advertised bandwidth" };
279 Double[][] sortedExitWeightsArray = new Double[][] {
280 exitWeights.values().toArray(new Double[exitWeights.size()]),
281 exitWeightsAdvBw.values().toArray(
282 new Double[exitWeightsAdvBw.size()]) };
283 double[] totalWeights = new double[] { totalExitWeight,
284 totalExitWeightAdvBw };
285 for (int i = 0; i < weightTypes.length; i++) {
286 String weightType = weightTypes[i];
287 Double[] sortedExitWeights = sortedExitWeightsArray[i];
288 double totalWeight = totalWeights[i];
289 Arrays.sort(sortedExitWeights);
290 int topRelays = 0;
291 double totalExitProbability = 0.0;
292 List<Double> inverseProbabilities = new ArrayList<Double>(
293 Arrays.asList(new Double[] { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6,
294 0.7, 0.8, 0.9 }));
295 for (int j = sortedExitWeights.length - 1; j >= 0; j--) {
296 double exitWeight = sortedExitWeights[j];
297 topRelays++;
298 totalExitProbability += exitWeight / totalWeight;
299 if (topRelays <= 50) {
300 bw2.write(String.format("%s,%s,%d,%.9f%n", validAfter,
301 weightType, topRelays, totalExitProbability));
303 while (!inverseProbabilities.isEmpty() &&
304 totalExitProbability > inverseProbabilities.get(0)) {
305 bw3.write(String.format("%s,%s,%.1f,%d%n", validAfter,
306 weightType, inverseProbabilities.remove(0), topRelays));
308 if (inverseProbabilities.isEmpty() && topRelays > 50) {
309 break;
315 if (bw != null) {
316 bw.close();
318 bw2.close();
319 bw3.close();