4 import org
.apache
.commons
.codec
.binary
.*;
6 public class ProcessSanitizedBridges
{
7 public static void main(String
[] args
) throws IOException
,
10 /* Validate command-line arguments. */
11 if (args
.length
!= 1 || !new File(args
[0]).exists()) {
12 System
.out
.println("Usage: java ProcessSanitizedBridges <dir>");
16 /* Find all files that we should parse. Somewhat fragile, but should
18 System
.out
.println("Creating list of files we should parse.");
19 SortedMap
<String
, File
> statuses
= new TreeMap
<String
, File
>();
20 SortedMap
<String
, File
> serverDescriptors
=
21 new TreeMap
<String
, File
>();
22 SortedMap
<String
, File
> extraInfoDescriptors
=
23 new TreeMap
<String
, File
>();
24 Stack
<File
> files
= new Stack
<File
>();
25 files
.add(new File(args
[0]));
26 while (!files
.isEmpty()) {
27 File file
= files
.pop();
28 String path
= file
.getAbsolutePath();
29 String filename
= file
.getName();
30 if (file
.isDirectory()) {
31 files
.addAll(Arrays
.asList(file
.listFiles()));
32 } else if (path
.contains("statuses")) {
33 statuses
.put(filename
, file
);
34 } else if (path
.contains("server-descriptors")) {
35 serverDescriptors
.put(filename
, file
);
36 } else if (path
.contains("extra-infos")) {
37 extraInfoDescriptors
.put(filename
, file
);
40 System
.out
.println("We found\n " + statuses
.size() + " statuses,\n "
41 + serverDescriptors
.size() + " server descriptors, and\n "
42 + extraInfoDescriptors
.size() + " extra-info descriptors.");
45 if (!statuses
.isEmpty()) {
46 System
.out
.println("Parsing statuses.");
47 List
<String
> knownFlags
= new ArrayList
<String
>(Arrays
.asList(
48 ("Authority,BadExit,BadDirectory,Exit,Fast,Guard,Named,Stable,"
49 + "Running,Valid,V2Dir").split(",")));
50 BufferedWriter bw
= new BufferedWriter(new FileWriter(
52 bw
.write("status,fingerprint,descriptor,published,address,orport,"
54 for (String knownFlag
: knownFlags
) {
55 bw
.write("," + knownFlag
.toLowerCase());
58 int parsedStatuses
= 0, totalStatuses
= statuses
.size(),
59 writtenOutputLines
= 1;
60 long started
= System
.currentTimeMillis();
61 for (File file
: statuses
.values()) {
62 String filename
= file
.getName();
63 if (filename
.length() != ("20110101-000703-"
64 + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D").length()) {
65 System
.out
.println("Status filename has wrong length: '"
66 + filename
+ "' Please check. Exiting.");
69 String statusDateTime
= filename
.substring(0, 4) + "-"
70 + filename
.substring(4, 6) + "-" + filename
.substring(6, 8)
71 + " " + filename
.substring(9, 11) + ":"
72 + filename
.substring(11, 13) + ":"
73 + filename
.substring(13, 15);
74 BufferedReader br
= new BufferedReader(new FileReader(file
));
76 while ((line
= br
.readLine()) != null) {
77 if (line
.startsWith("r ")) {
78 String
[] parts
= line
.split(" ");
79 if (parts
.length
!= 9) {
80 System
.out
.println("r line doesn't have the correct number "
81 + "of entries: '" + line
+ "'. Please check. Exiting.");
84 String fingerprint
= Hex
.encodeHexString(Base64
.decodeBase64(
86 String descriptor
= Hex
.encodeHexString(Base64
.decodeBase64(
88 String published
= parts
[4] + " " + parts
[5];
89 String address
= parts
[6];
90 String orPort
= parts
[7];
91 String dirPort
= parts
[8];
92 bw
.write(statusDateTime
+ "," + fingerprint
+ "," + descriptor
93 + "," + published
+ "," + address
+ "," + orPort
+ ","
95 } else if (line
.equals("s") || line
.startsWith("s ")) {
96 String flags
= line
.substring(1);
97 for (String flag
: knownFlags
) {
98 if (flags
.contains(" " + flag
)) {
105 writtenOutputLines
++;
110 if (parsedStatuses
% (totalStatuses
/ 10) == 0) {
111 double fractionDone
= (double) (parsedStatuses
) /
112 (double) totalStatuses
;
113 double fractionLeft
= 1.0D
- fractionDone
;
114 long now
= System
.currentTimeMillis();
115 double millisLeft
= ((double) (now
- started
)) * fractionLeft
/
117 long secondsLeft
= (long) millisLeft
/ 1000L;
118 System
.out
.println(" " + (parsedStatuses
/ (totalStatuses
119 / 10)) + "0% done, " + secondsLeft
+ " seconds left.");
123 System
.out
.println("Parsed " + parsedStatuses
+ " statuses and "
124 + "wrote " + writtenOutputLines
+ " lines to statuses.csv.");
127 /* Parse server descriptors and extra-info descriptors. */
128 if (!serverDescriptors
.isEmpty()) {
129 System
.out
.println("Parsing server descriptors and extra-info "
131 List
<String
> knownCountries
= new ArrayList
<String
>(Arrays
.asList(
132 ("?? A1 A2 AD AE AF AG AI AL AM AN AO AP AQ AR AS AT AU AW AX "
133 + "AZ BA BB BD BE BF BG BH BI BJ BM BN BO BR BS BT BV BW BY BZ "
134 + "CA CD CF CG CH CI CK CL CM CN CO CR CS CU CV CY CZ DE DJ DK "
135 + "DM DO DZ EC EE EG ER ES ET EU FI FJ FK FM FO FR GA GB GD GE "
136 + "GF GG GH GI GL GM GN GP GQ GR GT GU GW GY HK HN HR HT HU ID "
137 + "IE IL IM IN IO IQ IR IS IT JE JM JO JP KE KG KH KI KM KN KP "
138 + "KR KW KY KZ LA LB LC LI LK LR LS LT LU LV LY MA MC MD ME MF "
139 + "MG MH MK ML MM MN MO MP MQ MR MS MT MU MV MW MX MY MZ NA NC "
140 + "NE NF NG NI NL NO NP NR NU NZ OM PA PE PF PG PH PK PL PM PR "
141 + "PS PT PW PY QA RE RO RS RU RW SA SB SC SD SE SG SH SI SJ SK "
142 + "SL SM SN SO SR ST SV SY SZ TC TD TG TH TJ TK TL TM TN TO TR "
143 + "TT TV TW TZ UA UG UM US UY UZ VA VC VE VG VI VN VU WF WS YE "
144 + "YT ZA ZM ZW").toLowerCase().split(" ")));
145 BufferedWriter bw
= new BufferedWriter(new FileWriter(
147 bw
.write("descriptor,fingerprint,published,address,orport,dirport,"
148 + "version,platform,uptime,bridgestatsend,bridgestatsseconds");
149 for (String country
: knownCountries
) {
150 bw
.write("," + country
);
152 bw
.write(",bridgestatscountries,bridgestatstotal\n");
153 int parsedServerDescriptors
= 0, parsedExtraInfoDescriptors
= 0,
154 parsedGeoipStats
= 0, skippedGeoipStats
= 0,
155 parsedBridgeStats
= 0,
156 totalServerDescriptors
= serverDescriptors
.size(),
157 writtenOutputLines
= 1;
158 SimpleDateFormat timeFormat
= new SimpleDateFormat(
159 "yyyy-MM-dd HH:mm:ss");
160 timeFormat
.setTimeZone(TimeZone
.getTimeZone("UTC"));
161 long started
= System
.currentTimeMillis();
162 for (File file
: serverDescriptors
.values()) {
163 String filename
= file
.getName();
164 BufferedReader br
= new BufferedReader(new FileReader(file
));
165 String line
, fingerprint
= null, published
= null, address
= null,
166 orPort
= null, dirPort
= null, version
= null,
167 platform
= null, uptime
= null, extraInfoDigest
= null,
168 bridgeStatsEnd
= null, bridgeStatsSeconds
= null;
169 SortedMap
<String
, String
> bridgeStatsIps
=
170 new TreeMap
<String
, String
>();
171 long bridgeStatsTotal
= 0L;
172 while ((line
= br
.readLine()) != null) {
173 if (line
.startsWith("opt ")) {
174 line
= line
.substring(4);
176 if (line
.startsWith("router ")) {
177 String
[] parts
= line
.split(" ");
181 } else if (line
.startsWith("platform ")) {
182 version
= line
.split(" ")[2];
183 platform
= line
.substring(line
.indexOf("on ")
185 if (platform
.contains("Windows")) {
186 platform
= "Windows";
187 } else if (platform
.contains("Linux")) {
189 } else if (platform
.contains("Darwin")) {
190 platform
= "Mac OS X";
191 } else if (platform
.contains("BSD")) {
196 } else if (line
.startsWith("published ")) {
197 String
[] parts
= line
.split(" ");
198 published
= parts
[1] + " " + parts
[2];
199 } else if (line
.startsWith("fingerprint ")) {
200 fingerprint
= line
.substring("fingerprint".length()).
201 replaceAll(" ", "").toLowerCase();
202 } else if (line
.startsWith("uptime ")) {
203 uptime
= line
.split(" ")[1];
204 } else if (line
.startsWith("extra-info-digest ")) {
205 extraInfoDigest
= line
.substring("extra-info-digest ".
206 length()).toLowerCase();
207 if (extraInfoDescriptors
.containsKey(extraInfoDigest
)) {
208 parsedExtraInfoDescriptors
++;
209 BufferedReader br2
= new BufferedReader(new FileReader(
210 extraInfoDescriptors
.get(extraInfoDigest
)));
211 String geoipStartTime
= null, bridgeStatsEndLine
= null;
212 while ((line
= br2
.readLine()) != null) {
213 if (line
.startsWith("geoip-start-time ")) {
214 geoipStartTime
= line
.substring("geoip-start-time ".
216 } else if (line
.startsWith("geoip-client-origins ") &&
217 line
.split(" ").length
> 1 && published
!= null &&
218 geoipStartTime
!= null) {
219 if (version
.startsWith("0.2.2.")) {
223 bridgeStatsEnd
= published
;
224 bridgeStatsSeconds
= "" +
225 + (timeFormat
.parse(published
).getTime()
226 - timeFormat
.parse(geoipStartTime
).getTime())
228 for (String pair
: line
.split(" ")[1].split(",")) {
229 String country
= pair
.substring(0, 2);
230 String ips
= pair
.substring(3);
231 bridgeStatsIps
.put(country
, ips
);
232 bridgeStatsTotal
+= Long
.parseLong(ips
);
235 } else if (line
.startsWith("bridge-stats-end ")) {
236 bridgeStatsEndLine
= line
;
237 } else if (line
.startsWith("bridge-ips ") &&
238 line
.length() > "bridge-ips ".length() &&
239 bridgeStatsEndLine
!= null) {
241 String
[] parts
= bridgeStatsEndLine
.split(" ");
242 bridgeStatsEnd
= parts
[1] + " " + parts
[2];
243 bridgeStatsSeconds
= parts
[3].substring(1);
244 for (String pair
: line
.split(" ")[1].split(",")) {
245 String country
= pair
.substring(0, 2);
246 String ips
= pair
.substring(3);
247 bridgeStatsIps
.put(country
, ips
);
248 bridgeStatsTotal
+= Long
.parseLong(ips
);
257 if (fingerprint
== null || published
== null || address
== null ||
258 orPort
== null || dirPort
== null || version
== null ||
259 platform
== null || uptime
== null) {
260 System
.out
.println("Server descriptor " + filename
+ " is "
261 + "missing critical information. Please check. Exiting.");
264 bw
.write(filename
+ "," + fingerprint
+ "," + published
+ ","
265 + address
+ "," + orPort
+ "," + dirPort
+ "," + version
+ ","
266 + platform
+ "," + uptime
);
267 if (bridgeStatsEnd
!= null) {
268 bw
.write("," + bridgeStatsEnd
+ "," + bridgeStatsSeconds
);
269 int bridgeStatsCountries
= bridgeStatsIps
.size();
270 for (String country
: knownCountries
) {
271 if (bridgeStatsIps
.containsKey(country
)) {
272 bw
.write("," + bridgeStatsIps
.remove(country
));
277 if (!bridgeStatsIps
.isEmpty()) {
278 StringBuilder message
= new StringBuilder();
279 for (String country
: bridgeStatsIps
.keySet()) {
280 message
.append(", " + country
);
282 System
.out
.println("Unknown " + (bridgeStatsIps
.size() == 1 ?
283 "country" : "countries") + " " + message
.toString().
284 substring(2) + " in extra-info descriptor "
285 + extraInfoDigest
+ ". Please check. Exiting.");
288 bw
.write("," + bridgeStatsCountries
+ "," + bridgeStatsTotal
292 for (String country
: knownCountries
) {
295 bw
.write(",NA,NA\n");
297 writtenOutputLines
++;
298 parsedServerDescriptors
++;
299 if (parsedServerDescriptors
% (totalServerDescriptors
/ 100)
301 double fractionDone
= (double) (parsedServerDescriptors
) /
302 (double) totalServerDescriptors
;
303 double fractionLeft
= 1.0D
- fractionDone
;
304 long now
= System
.currentTimeMillis();
305 double millisLeft
= ((double) (now
- started
)) * fractionLeft
/
307 long secondsLeft
= (long) millisLeft
/ 1000L;
308 System
.out
.println(" " + (parsedServerDescriptors
/
309 (totalServerDescriptors
/ 100)) + "% done, " + secondsLeft
314 System
.out
.println("Parsed " + parsedServerDescriptors
+ " server "
315 + "descriptors and " + parsedExtraInfoDescriptors
316 + " extra-info descriptors.\nParsed " + parsedGeoipStats
317 + " geoip-stats and " + parsedBridgeStats
+ " bridge-stats.\n"
318 + "Skipped " + skippedGeoipStats
+ " broken geoip-stats of "
319 + "0.2.2.x bridges.\nWrote " + writtenOutputLines
+ " to "
320 + "descriptors.csv.");
324 System
.out
.println("Terminating.");