Add code for web logs analysis (#20008).
[tor-metrics-tasks.git] / task-1991 / Merge.java
blob622cbf680fbf227cc57ee613869c5f09175ddcec
1 import java.io.*;
2 import java.text.*;
3 import java.util.*;
5 public class Merge {
6 public static void main(String[] args) throws Exception {
8 System.out.println("Reading guard node bandwidths...");
9 SortedMap<String, String> bandwidthRanks =
10 new TreeMap<String, String>();
11 BufferedReader br = new BufferedReader(new FileReader(
12 "bandwidths-sql.csv"));
13 String line = br.readLine(), lastDateTime = null;
14 List<String> currentRelays = new ArrayList<String>();
15 while ((line = br.readLine()) != null) {
16 if (line.startsWith("fingerprint") || line.startsWith("(")) {
17 continue;
19 String[] parts = line.split(",");
20 String fingerprint = parts[0], dateTime = parts[1],
21 bandwidth = parts[2];
22 if (lastDateTime != null && !dateTime.equals(lastDateTime)) {
23 Collections.sort(currentRelays, new Comparator<String>() {
24 public int compare(String a, String b) {
25 return Integer.parseInt(a.split(",")[2]) -
26 Integer.parseInt(b.split(",")[2]);
28 });
29 for (int i = 0; i < currentRelays.size(); i++) {
30 String relay = currentRelays.get(i);
31 String relayParts[] = currentRelays.get(i).split(",");
32 String relayFingerprint = relayParts[0];
33 String relayBandwidth = relayParts[2];
34 bandwidthRanks.put(relayFingerprint + "," + lastDateTime,
35 String.format("%s,%.6f", relayBandwidth, (double) i /
36 (double) (currentRelays.size() - 1)));
38 currentRelays.clear();
40 lastDateTime = dateTime;
41 currentRelays.add(line);
43 br.close();
45 System.out.println("Reading .mergedata file and writing completion "
46 + "time, guard bandwidth, and rank to disk...");
47 SortedMap<Integer, List<Long>> aggregatedResults =
48 new TreeMap<Integer, List<Long>>();
49 BufferedWriter bw = new BufferedWriter(new FileWriter(
50 "torperf-guard-bandwidths-ranks.csv"));
51 bw.write("bandwidth,rank,completiontime,guards,filesize\n");
52 SimpleDateFormat dateTimeFormat =
53 new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
54 dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
55 for (File mergedataFile : new File(".").listFiles()) {
56 String filename = mergedataFile.getName();
57 if (!filename.endsWith(".mergedata")) {
58 continue;
60 String guards = filename.substring(0, filename.indexOf("80cbt"));
61 String filesize = filename.split("-")[1].split("\\.")[0];
62 br = new BufferedReader(new FileReader(mergedataFile));
63 while ((line = br.readLine()) != null) {
64 String path = null;
65 long started = 0L, completed = 0L;
66 for (String part : line.split(" ")) {
67 String key = part.substring(0, part.indexOf("="));
68 String value = part.substring(part.indexOf("=") + 1);
69 if (key.equals("PATH")) {
70 path = value;
71 } else if (key.equals("STARTSEC")) {
72 started += Long.parseLong(value) * 1000L;
73 } else if (key.equals("STARTUSEC")) {
74 started += Long.parseLong(value) / 1000L;
75 } else if (key.equals("DATACOMPLETESEC")) {
76 completed += Long.parseLong(value) * 1000L;
77 } else if (key.equals("DATACOMPLETEUSEC")) {
78 completed += Long.parseLong(value) / 1000L;
79 } else if (key.equals("DIDTIMEOUT")) {
80 if (value.equals("1")) {
81 continue;
85 if (path == null || started == 0L || completed == 0L) {
86 continue;
88 String dateTime = dateTimeFormat.format(started);
89 String fingerprint = path.split(",")[0].substring(1).toLowerCase();
90 String guardKey = fingerprint + "," + dateTime;
91 String previousGuardKey = bandwidthRanks.headMap(guardKey).lastKey();
92 if (previousGuardKey.startsWith(fingerprint)) {
93 String bandwidthRank = bandwidthRanks.get(previousGuardKey);
94 long completionTime = completed - started;
95 bw.write(bandwidthRank + "," + completionTime + "," + guards
96 + "," + filesize + "\n");
100 br.close();
101 bw.close();