Update criteria for partial/full IPv6 support.
[tor-metrics-tasks.git] / task-2394 / ParseDescriptors.java
blobfbbc4b5402be226c6033c86df02f8f23bd5fae3c
1 import java.io.*;
2 import java.util.*;
3 import org.apache.commons.codec.binary.*;
5 public class ParseDescriptors {
6 public static void main(String[] args) throws IOException {
8 /* Find all files that we should parse and distinguish between
9 * consensuses, votes, and server descriptors. */
10 SortedMap<String, File> consensuses = new TreeMap<String, File>();
11 SortedMap<String, File> descriptors = new TreeMap<String, File>();
12 SortedMap<String, File> votes = new TreeMap<String, File>();
13 Stack<File> files = new Stack<File>();
14 files.add(new File("descriptors"));
15 while (!files.isEmpty()) {
16 File file = files.pop();
17 String filename = file.getName();
18 if (file.isDirectory()) {
19 files.addAll(Arrays.asList(file.listFiles()));
20 } else if (filename.endsWith("-consensus")) {
21 consensuses.put(filename, file);
22 } else if (filename.endsWith("-votes")) {
23 votes.put(filename, file);
24 } else if (filename.endsWith("-serverdesc")) {
25 descriptors.put(filename, file);
28 System.out.println("We found " + consensuses.size()
29 + " consensus files, " + votes.size() + " vote files, and "
30 + descriptors.size() + " server descriptor files.");
32 /* Parse consensuses in an outer loop and the referenced votes and
33 * descriptors in inner loops. Write the results to disk as soon as
34 * we can to avoid keeping many things in memory. */
35 SortedMap<String, String> bandwidthAuthorities =
36 new TreeMap<String, String>();
37 bandwidthAuthorities.put("27B6B5996C426270A5C95488AA5BCEB6BCC86956",
38 "ides");
39 bandwidthAuthorities.put("80550987E1D626E3EBA5E5E75A458DE0626D088C",
40 "urras");
41 bandwidthAuthorities.put("D586D18309DED4CD6D57C18FDB97EFA96D330566",
42 "moria1");
43 bandwidthAuthorities.put("ED03BB616EB2F60BEC80151114BB25CEF515B226",
44 "gabelmoo");
45 bandwidthAuthorities.put("49015F787433103580E3B66A1707A00E60F2D15B",
46 "maatuska");
47 BufferedWriter bw = new BufferedWriter(new FileWriter(
48 "bandwidth-comparison.csv"));
49 bw.write("validafter,fingerprint,nickname,category,"
50 + "descriptorbandwidth,consensusbandwidth");
51 for (String bandwidthAuthority : bandwidthAuthorities.values()) {
52 bw.write("," + bandwidthAuthority + "bandwidth");
54 bw.write("\n");
55 for (File consensusFile : consensuses.values()) {
56 System.out.println("Parsing consensus " + consensusFile.getName());
57 BufferedReader brC = new BufferedReader(new FileReader(
58 consensusFile));
59 String lastRLine = null, lastSLine = null;
60 String consensusTimestamp = consensusFile.getName().substring(0,
61 "YYYY-MM-DD-hh-mm-ss".length());
62 Map<String, Map<String, String>> measuredBandwidthsByDirSource =
63 new HashMap<String, Map<String, String>>();
65 /* Parse votes first, if we have them, and extract measured
66 * bandwidths. */
67 String votesFilename = consensusTimestamp + "-votes";
68 if (votes.containsKey(votesFilename)) {
69 BufferedReader brV = new BufferedReader(new FileReader(
70 votes.get(votesFilename)));
71 String lineV;
72 Map<String, String> measuredBandwidths = null;
73 while ((lineV = brV.readLine()) != null) {
74 if (lineV.startsWith("dir-source ")) {
75 String dirSource = lineV.split(" ")[2];
76 measuredBandwidths = new HashMap<String, String>();
77 measuredBandwidthsByDirSource.put(dirSource,
78 measuredBandwidths);
79 } else if (lineV.startsWith("r ")) {
80 lastRLine = lineV;
81 } else if (lineV.startsWith("w ") &&
82 lineV.contains(" Measured=")) {
83 String fingerprint = Hex.encodeHexString(Base64.
84 decodeBase64(lastRLine.split(" ")[2] + "="));
85 String measuredBandwidth = lineV.substring(lineV.indexOf(
86 " Measured=") + " Measured=".length()).split(" ")[0];
87 measuredBandwidths.put(fingerprint, measuredBandwidth);
90 brV.close();
93 /* Parse referenced server descriptors to learn about exit policies
94 * and reported bandwidths. */
95 String descriptorsFilename = consensusTimestamp + "-serverdesc";
96 Map<String, String> parsedDescriptors =
97 new HashMap<String, String>();
98 if (descriptors.containsKey(descriptorsFilename)) {
99 BufferedReader brD = new BufferedReader(new FileReader(
100 descriptors.get(descriptorsFilename)));
101 Set<String> defaultRejects = new HashSet<String>();
102 /* Starting with 0.2.1.6-alpha, ports 465 and 587 were allowed
103 * in the default exit policy again (and therefore removed
104 * from the default reject lines). */
105 Set<String> optionalRejects = new HashSet<String>();
106 String lineD, address = null, fingerprint = null,
107 descriptorBandwidth = null;
108 boolean defaultPolicy = false, comparePolicies = true;
109 while ((lineD = brD.readLine()) != null) {
110 if (lineD.startsWith("router ")) {
111 address = lineD.split(" ")[2];
112 defaultRejects.clear();
113 defaultRejects.addAll(Arrays.asList(("0.0.0.0/8:*,"
114 + "169.254.0.0/16:*,127.0.0.0/8:*,192.168.0.0/16:*,"
115 + "10.0.0.0/8:*,172.16.0.0/12:*,$IP:*,*:25,*:119,"
116 + "*:135-139,*:445,*:563,*:1214,*:4661-4666,*:6346-6429,"
117 + "*:6699,*:6881-6999").split(",")));
118 optionalRejects.clear();
119 optionalRejects.addAll(Arrays.asList(
120 "*:465,*:587".split(",")));
121 fingerprint = null;
122 descriptorBandwidth = null;
123 defaultPolicy = false;
124 comparePolicies = true;
125 } else if (lineD.startsWith("opt fingerprint ") ||
126 lineD.startsWith("fingerprint ")) {
127 fingerprint = lineD.substring(lineD.startsWith("opt ") ?
128 "opt fingerprint".length() : "fingerprint".length()).
129 replaceAll(" ", "").toLowerCase();
130 } else if (lineD.startsWith("bandwidth ")) {
131 descriptorBandwidth = lineD.split(" ")[3];
132 } else if (lineD.startsWith("reject ") && comparePolicies) {
133 String rejectPattern = lineD.substring("reject ".
134 length());
135 if (defaultRejects.contains(rejectPattern)) {
136 defaultRejects.remove(rejectPattern);
137 } else if (optionalRejects.contains(rejectPattern)) {
138 optionalRejects.remove(rejectPattern);
139 } else if (rejectPattern.equals(address + ":*")) {
140 defaultRejects.remove("$IP:*");
141 } else {
142 comparePolicies = false;
144 } else if (lineD.startsWith("accept ") && comparePolicies) {
145 if (defaultRejects.isEmpty() &&
146 lineD.equals("accept *:*")) {
147 defaultPolicy = true;
149 comparePolicies = false;
150 } else if (lineD.equals("router-signature")) {
151 if (address != null && fingerprint != null &&
152 descriptorBandwidth != null) {
153 parsedDescriptors.put(fingerprint, descriptorBandwidth + ","
154 + (defaultPolicy ? "1" : "0"));
158 brD.close();
161 /* Parse r, s, and w lines from the consensus. */
162 String lineC, validAfter = null;
163 while ((lineC = brC.readLine()) != null) {
164 if (lineC.startsWith("valid-after ")) {
165 validAfter = lineC.substring("valid-after ".length());
166 } else if (lineC.startsWith("r ")) {
167 lastRLine = lineC;
168 } else if (lineC.startsWith("s ")) {
169 lastSLine = lineC;
170 } else if (lineC.startsWith("w ")) {
171 String[] parts = lastRLine.split(" ");
172 String nickname = parts[1];
173 String fingerprint = Hex.encodeHexString(Base64.decodeBase64(
174 parts[2] + "="));
175 String descriptor = Hex.encodeHexString(Base64.decodeBase64(
176 parts[3] + "="));
177 boolean exitFlag = lastSLine.contains(" Exit");
178 boolean guardFlag = lastSLine.contains(" Guard");
179 String consensusBandwidth = lineC.substring(lineC.indexOf(
180 " Bandwidth=") + " Bandwidth=".length()).split(" ")[0];
182 /* Look up whether we parsed this descriptor before. */
183 boolean parsedDescriptor = false, defaultPolicy = false;
184 String descriptorBandwidth = null;
185 if (parsedDescriptors.containsKey(fingerprint)) {
186 String parseResults = parsedDescriptors.get(fingerprint);
187 parsedDescriptor = true;
188 defaultPolicy = parseResults.endsWith("1");
189 descriptorBandwidth = parseResults.split(",")[0];
192 /* Write everything we know about this relay to disk. */
193 String category = null;
194 if (guardFlag && exitFlag && defaultPolicy) {
195 category = "Guard & Exit (default policy)";
196 } else if (!guardFlag && exitFlag && defaultPolicy) {
197 category = "Exit (default policy)";
198 } else if (guardFlag && exitFlag && !defaultPolicy) {
199 category = "Guard & Exit (non-default policy)";
200 } else if (!guardFlag && exitFlag && !defaultPolicy) {
201 category = "Exit (non-default policy)";
202 } else if (guardFlag && !exitFlag) {
203 category = "Guard";
204 } else if (!guardFlag && !exitFlag) {
205 category = "Middle";
207 bw.write(validAfter + "," + fingerprint + "," + nickname + ","
208 + category + "," + (parsedDescriptor ? descriptorBandwidth
209 : "NA") + "," + consensusBandwidth);
210 for (String bandwidthAuthority :
211 bandwidthAuthorities.keySet()) {
212 if (measuredBandwidthsByDirSource.containsKey(
213 bandwidthAuthority) && measuredBandwidthsByDirSource.get(
214 bandwidthAuthority).containsKey(fingerprint)) {
215 bw.write("," + measuredBandwidthsByDirSource.get(
216 bandwidthAuthority).get(fingerprint));
217 } else {
218 bw.write(",NA");
221 bw.write("\n");
224 brC.close();
226 bw.close();