Add graphing code for bandwidth by version (13634).
[tor-metrics-tasks.git] / task-3261 / ExtractDescriptorParts.java
blob544022de3f101fa5f19b41afff438461f6c970c6
1 import java.io.BufferedWriter;
2 import java.io.File;
3 import java.io.FileWriter;
4 import java.text.SimpleDateFormat;
5 import java.util.Iterator;
6 import java.util.SortedSet;
7 import java.util.TimeZone;
8 import java.util.TreeSet;
10 import org.apache.commons.codec.binary.Hex;
11 import org.apache.commons.codec.digest.DigestUtils;
12 import org.torproject.descriptor.BridgeNetworkStatus;
13 import org.torproject.descriptor.Descriptor;
14 import org.torproject.descriptor.DescriptorFile;
15 import org.torproject.descriptor.DescriptorReader;
16 import org.torproject.descriptor.DescriptorSourceFactory;
17 import org.torproject.descriptor.ExtraInfoDescriptor;
18 import org.torproject.descriptor.NetworkStatusEntry;
19 import org.torproject.descriptor.RelayNetworkStatusConsensus;
20 import org.torproject.descriptor.ServerDescriptor;
22 /* Extract the relevant parts from bridge descriptors and consensuses that
23 * are required to answer what fraction of bridges are not reporting
24 * bridge usage statistics. */
25 public class ExtractDescriptorParts {
26 public static void main(String[] args) throws Exception {
28 /* Define paths: we parse descriptor (tarballs) from in/, store the
29 * parse history to parse-history, write relevant parts per bridge to
30 * temp/, and write publication times of bridge network statuses to
31 * bridge-network-statuses. */
32 File inDirectory = new File("in");
33 File parseHistoryFile = new File("parse-history");
34 File tempDirectory = new File("temp");
35 File statusFile = new File("bridge-network-statuses");
37 /* Read descriptors. */
38 SimpleDateFormat dateTimeFormat =
39 new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
40 dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
41 DescriptorReader reader =
42 DescriptorSourceFactory.createDescriptorReader();
43 reader.addDirectory(inDirectory);
44 reader.setExcludeFiles(parseHistoryFile);
45 Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors();
46 while (descriptorFiles.hasNext()) {
47 DescriptorFile descriptorFile = descriptorFiles.next();
48 if (descriptorFile.getDescriptors() != null) {
49 for (Descriptor descriptor : descriptorFile.getDescriptors()) {
51 /* Extract bridge-stats and geoip-stats from bridge extra-info
52 * descriptors. */
53 if (descriptor instanceof ExtraInfoDescriptor) {
54 System.out.print("e");
55 SortedSet<String> lines = new TreeSet<String>();
56 ExtraInfoDescriptor extraInfoDescriptor =
57 (ExtraInfoDescriptor) descriptor;
58 if (extraInfoDescriptor.getBridgeStatsEndMillis() > 0) {
59 lines.add("bridge-stats " + dateTimeFormat.format(
60 extraInfoDescriptor.getBridgeStatsEndMillis()) + " "
61 + extraInfoDescriptor.getBridgeStatsIntervalLength()
62 + " " + (extraInfoDescriptor.getGeoipDbDigest() == null
63 ? "NA" : extraInfoDescriptor.getGeoipDbDigest()));
65 if (extraInfoDescriptor.getGeoipStartTimeMillis() > 0) {
66 long intervalLength =
67 (extraInfoDescriptor.getPublishedMillis()
68 - extraInfoDescriptor.getGeoipStartTimeMillis())
69 / 1000L;
70 String geoipStatsEnd = dateTimeFormat.format(
71 extraInfoDescriptor.getPublishedMillis());
72 lines.add("geoip-stats " + geoipStatsEnd + " "
73 + intervalLength + " "
74 + (extraInfoDescriptor.getGeoipDbDigest() == null
75 ? "NA" : extraInfoDescriptor.getGeoipDbDigest()));
77 if (!lines.isEmpty()) {
78 File outputFile = new File(tempDirectory,
79 extraInfoDescriptor.getFingerprint().toUpperCase());
80 outputFile.getParentFile().mkdirs();
81 BufferedWriter bw = new BufferedWriter(new FileWriter(
82 outputFile, true));
83 for (String l : lines) {
84 bw.write(l + "\n");
86 bw.close();
89 /* Extract all bridges with the Running flag from bridge network
90 * statuses. Also extract the status publication time. */
91 } else if (descriptor instanceof BridgeNetworkStatus) {
92 System.out.print("n");
93 BridgeNetworkStatus status = (BridgeNetworkStatus) descriptor;
94 String published = dateTimeFormat.format(
95 status.getPublishedMillis());
96 if (status.getStatusEntries() != null) {
97 for (NetworkStatusEntry entry :
98 status.getStatusEntries().values()) {
99 if (entry.getFlags().contains("Running")) {
100 File outputFile = new File(tempDirectory,
101 entry.getFingerprint().toUpperCase());
102 outputFile.getParentFile().mkdirs();
103 BufferedWriter bw = new BufferedWriter(new FileWriter(
104 outputFile, true));
105 String digest = entry.getDescriptor().toUpperCase();
106 bw.write("running-bridge " + published + " " + digest
107 + "\n");
108 bw.close();
111 BufferedWriter bw = new BufferedWriter(new FileWriter(
112 statusFile, true));
113 bw.write(published + "\n");
114 bw.close();
117 /* Extract publication time, digest, uptime, and platform string
118 * from bridge server descriptors. */
119 } else if (descriptor instanceof ServerDescriptor) {
120 System.out.print("s");
121 ServerDescriptor serverDescriptor =
122 (ServerDescriptor) descriptor;
123 String published = dateTimeFormat.format(
124 serverDescriptor.getPublishedMillis());
125 String digest = descriptorFile.getFileName().substring(
126 descriptorFile.getFileName().lastIndexOf("/") + 1).
127 toUpperCase();
128 String uptime = serverDescriptor.getUptime() == null ? "-1"
129 : String.valueOf(serverDescriptor.getUptime());
130 String platform = serverDescriptor.getPlatform() == null
131 ? "NA" : serverDescriptor.getPlatform();
132 File outputFile = new File(tempDirectory,
133 serverDescriptor.getFingerprint().toUpperCase());
134 outputFile.getParentFile().mkdirs();
135 BufferedWriter bw = new BufferedWriter(new FileWriter(
136 outputFile, true));
137 bw.write("server-descriptor " + published + " "
138 + digest + " " + uptime + " " + platform + "\n");
139 bw.close();
141 /* Extract hashed fingerprints of all relays with the Running
142 * flag from relay network status consensuses. */
143 } else if (descriptor instanceof RelayNetworkStatusConsensus) {
144 System.out.print("r");
145 RelayNetworkStatusConsensus status =
146 (RelayNetworkStatusConsensus) descriptor;
147 if (status.getStatusEntries() != null) {
148 for (NetworkStatusEntry entry :
149 status.getStatusEntries().values()) {
150 if (entry.getFlags().contains("Running")) {
151 String hashedFingerprint = Hex.encodeHexString(
152 DigestUtils.sha(Hex.decodeHex(
153 entry.getFingerprint().toCharArray()))).
154 toUpperCase();
155 File outputFile = new File(tempDirectory,
156 hashedFingerprint);
157 outputFile.getParentFile().mkdirs();
158 BufferedWriter bw = new BufferedWriter(new FileWriter(
159 outputFile, true));
160 bw.write("running-relay " + dateTimeFormat.format(
161 status.getValidAfterMillis()) + "\n");
162 bw.close();