From 968def62d5872fb23279a35c2474db276ae455e7 Mon Sep 17 00:00:00 2001 From: Karsten Loesing Date: Sat, 17 Jan 2015 17:53:03 +0100 Subject: [PATCH] Add hidserv-stats extrapolation code (#13192). --- task-13192/.gitignore | 7 + task-13192/README.md | 24 + task-13192/src/R/plot.R | 246 ++++++++ task-13192/src/java/ExtrapolateHidServStats.java | 722 +++++++++++++++++++++++ 4 files changed, 999 insertions(+) create mode 100644 task-13192/.gitignore create mode 100644 task-13192/README.md create mode 100644 task-13192/src/R/plot.R create mode 100644 task-13192/src/java/ExtrapolateHidServStats.java diff --git a/task-13192/.gitignore b/task-13192/.gitignore new file mode 100644 index 0000000..7e8bf3b --- /dev/null +++ b/task-13192/.gitignore @@ -0,0 +1,7 @@ +in/ +.classpath +.project +src/bash/ +src/bin/ +out/ + diff --git a/task-13192/README.md b/task-13192/README.md new file mode 100644 index 0000000..c6ba2c8 --- /dev/null +++ b/task-13192/README.md @@ -0,0 +1,24 @@ +Extrapolating network totals from hidden-service statistics +=========================================================== + +Fetch (and inflate, but not extract) tarballs and/or fetch single files +from CollecTor and store them in the following directories: + + in/collector/archive/relay-descriptors/extra-infos/ + in/collector/archive/relay-descriptors/consensuses/ + in/collector/recent/relay-descriptors/extra-infos/ + in/collector/recent/relay-descriptors/consensuses/ + +Fetch the latest bandwidth.csv file from Metrics and put it in the +following directory: + + in/metrics/bandwidth.csv + +Add metrics-lib to the classpath and compile the classes in src/java/. + +Run Java class ExtrapolateHidServStats. + +Run the R script: + + R --slave -f src/R/plot.R + diff --git a/task-13192/src/R/plot.R b/task-13192/src/R/plot.R new file mode 100644 index 0000000..991928b --- /dev/null +++ b/task-13192/src/R/plot.R @@ -0,0 +1,246 @@ +# Load required libraries. +require(ggplot2, warn.conflicts = FALSE, quietly = TRUE) +require(scales, warn.conflicts = FALSE, quietly = TRUE) +require(reshape, warn.conflicts = FALSE, quietly = TRUE) +require(splines, warn.conflicts = FALSE, quietly = TRUE) +require(Hmisc, warn.conflicts = FALSE, quietly = TRUE) + +# Avoid scientific notation. +options(scipen = 15) + +# Read .csv file written by Java. +h <- read.csv("out/csv/hidserv-stats.csv", stringsAsFactors = FALSE) + +# Create directories for graphs. +dir.create(file.path("out", "graphs", "report"), showWarnings = FALSE, + recursive = TRUE) +dir.create(file.path("out", "graphs", "slides"), showWarnings = FALSE, + recursive = TRUE) + +# Cut off last two days, because stats might be incomplete for those. +h <- h[as.Date(h$stats_end) < max(as.Date(h$stats_end) - 1), ] + +# Graph the number of reported stats by day. +h7 <- data.frame(date = as.Date(h$stats_end), reports = 1) +ggplot(h7, aes(x = date)) + +geom_bar(colour = 'lightgray', width = .7, binwidth = 1) + +scale_x_date("") + +scale_y_continuous("") +ggsave("out/graphs/report/num-reported-stats.pdf", width = 10, height = 3, + dpi = 100) +ggsave("out/graphs/slides/hidserv-12.png", width = 8, height = 3, + dpi = 100) + +# Graph distributions of reported values by day. +h1 <- data.frame(date = as.Date(h$stats_end), + traffic = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000), + services = h$hidserv_dir_onions_seen) +h1 <- melt(h1, "date") +h1 <- data.frame(date = h1$date, + variable = ifelse(h1$variable == "traffic", "traffic in MB/s", + ".onion addresses"), value = h1$value) +ggplot(h1, aes(x = date, y = value, group = date)) + +geom_boxplot() + +facet_grid(variable ~ ., scales = "free_y") + +scale_x_date("") + +scale_y_continuous("Statistics reported by single relays\n") +ggsave("out/graphs/report/stats-by-day.pdf", width = 10, height = 5, + dpi = 100) + +# Graph distributions of calculated fractions by day. +h2 <- data.frame(date = as.Date(h$stats_end), + prob_rend_point = h$prob_rend_point, + x_frac_hsdesc = h$frac_hsdesc / 3.0) +h2 <- melt(h2, "date") +h2 <- data.frame(date = h2$date, + variable = ifelse(h2$variable == "prob_rend_point", + "selected as rendezvous point", "responsible for a descriptor"), + value = h2$value) +ggplot(h2, aes(x = date, y = value, group = date)) + +geom_boxplot() + +facet_grid(variable ~ ., scales = "free_y") + +scale_x_date("") + +scale_y_continuous("Calculated probabilities\n", labels = percent) +ggsave("out/graphs/report/probs-by-relay.pdf", width = 10, height = 5, + dpi = 100) + +# Graph ECDF of cells reported by relays with rend point probability of 0. +h8 <- h[h$prob_rend_point == 0, + "hidserv_rend_relayed_cells" ] +h8 <- sort(h8) +h8 <- data.frame(x = h8, y = (1:length(h8)) / length(h8)) +laplace_cells <- function(x) { + 0.5 + 0.5 * sign(x) * (1 - exp(abs(x) / (-2048/0.3))) +} +ggplot(h8, aes(x = x, y = y)) + +geom_line() + +stat_function(fun = laplace_cells, colour = "blue") + +scale_x_continuous("\nReported cells on rendezvous circuits") + +scale_y_continuous("Cumulative probability\n") +ggsave("out/graphs/report/zero-prob-cells.pdf", width = 5, height = 3, + dpi = 100) + +# Graph ECDF of .onions reported by relays with HSDir probability of 0. +h9 <- h[h$frac_hsdesc == 0, "hidserv_dir_onions_seen"] +h9 <- sort(h9) +h9 <- data.frame(x = h9, y = (1:length(h9)) / length(h9)) +laplace_onions <- function(x) { + 0.5 + 0.5 * sign(x) * (1 - exp(abs(x) / (-8/0.3))) +} +ggplot(h9, aes(x = x, y = y)) + +geom_line() + +stat_function(fun = laplace_onions, colour = "blue") + +scale_x_continuous("\nReported .onion addresses") + +scale_y_continuous("Cumulative probability\n") +ggsave("out/graphs/report/zero-prob-onions.pdf", width = 5, height = 3, + dpi = 100) + +# Graph correlation between reports and fractions per relay. +h3 <- rbind( + data.frame(x = h$frac_hsdesc / 3.0, + y = ifelse(h$frac_hsdesc == 0, NA, h$hidserv_dir_onions_seen), + facet = ".onion addresses"), + data.frame(x = h$prob_rend_point, + y = ifelse(h$prob_rend_point == 0, NA, + h$hidserv_rend_relayed_cells * 512 / (86400 * 1000)), + facet = "traffic in kB/s")) +ggplot(h3[h3$facet == ".onion addresses", ], aes(x = x, y = y)) + +geom_point(alpha = 0.5) + +stat_smooth(method = "lm") + +scale_x_continuous(name = "\nProbability", labels = percent) + +scale_y_continuous(name = "Reported .onion addresses\n") +ggsave("out/graphs/report/corr-probs-onions-by-relay.pdf", width = 5, + height = 3, dpi = 100) +ggplot(h3[h3$facet == "traffic in kB/s", ], aes(x = x, y = y)) + +geom_point(alpha = 0.5) + +stat_smooth(method = "lm") + +scale_x_continuous(name = "\nProbability", labels = percent) + +scale_y_continuous(name = "Reported traffic in kB/s\n") +ggsave("out/graphs/report/corr-probs-cells-by-relay.pdf", width = 5, + height = 3, dpi = 100) + +# Graph correlation between reports and fractions per day. +h5 <- rbind( + data.frame(date = as.Date(h$stats_end), + prob = ifelse(h$frac_hsdesc == 0, NA, h$frac_hsdesc / 3.0), + reported = h$hidserv_dir_onions_seen, facet = "published descriptor"), + data.frame(date = as.Date(h$stats_end), + prob = ifelse(h$prob_rend_point == 0, NA, h$prob_rend_point), + reported = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000), + facet = "traffic in MB/s")) +h5 <- na.omit(h5) +h5 <- aggregate(list(prob = h5$prob, reported = h5$reported), + by = list(date = h5$date, facet = h5$facet), FUN = sum) +ggplot(h5[h5$facet == "traffic in MB/s", ], aes(x = prob, y = reported)) + +geom_point(alpha = 0.5) + +scale_x_continuous(name = "\nTotal probability", labels = percent) + +scale_y_continuous(name = "Total traffic in MB/s\n") + +stat_smooth(method = "lm") + +geom_vline(xintercept = 0.01, linetype = 2) +ggsave("out/graphs/report/corr-probs-cells-by-day.pdf", width = 5, + height = 3, dpi = 100) +ggplot(h5[h5$facet == "published descriptor", ], + aes(x = prob, y = reported)) + +geom_point(alpha = 0.5) + +scale_x_continuous(name = "\nTotal probability", labels = percent) + +scale_y_continuous(name = "Total reported .onion addresses\n") + +stat_smooth(method = "lm") + +geom_vline(xintercept = 0.01, linetype = 2) +ggsave("out/graphs/report/corr-probs-onions-by-day.pdf", width = 5, + height = 3, dpi = 100) + +# Graph extrapolated network totals. +h6 <- data.frame(date = as.Date(h$stats_end), + traffic = ifelse(h$prob_rend_point == 0, 0, + h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000)), + prob_rend_point = h$prob_rend_point, + onions = ifelse(h$frac_hsdesc == 0, 0, h$hidserv_dir_onions_seen), + prob_onion = h$frac_hsdesc * 4.0) +h6 <- aggregate(list(traffic = h6$traffic, + prob_rend_point = h6$prob_rend_point, + onions = h6$onions, + prob_onion = h6$prob_onion), by = list(date = h6$date), FUN = sum) +h6 <- data.frame(date = h6$date, + traffic = ifelse(h6$prob_rend_point < 0.01, 0, + h6$traffic / h6$prob_rend_point), + onions = ifelse(h6$prob_onion / 12.0 < 0.01, 0, + h6$onions / h6$prob_onion)) +h6 <- melt(h6, "date") +h6 <- h6[h6$value > 0, ] +h6 <- rbind(h6, data.frame(date = NA, variable = c('traffic', 'onions'), + value = 0)) +h6 <- data.frame(date = h6$date, + variable = ifelse(h6$variable == "traffic", "total traffic in MB/s", + ".onion addresses"), value = h6$value) +ggplot(h6, aes(date, value)) + +facet_grid(variable ~ ., scales = "free_y") + +geom_point() + +stat_smooth() + +scale_x_date(name = "") + +scale_y_continuous(name = "Extrapolated network totals\n") +ggsave("out/graphs/report/extrapolated-network-totals.pdf", width = 10, + height = 5, dpi = 100) + +# Graph extrapolated number of .onion addresses. +h11 <- h6[h6$variable == ".onion addresses", ] +ggplot(h11, aes(x = date, y = value)) + +geom_point() + +stat_smooth() + +scale_x_date(name = "") + +scale_y_continuous(name = "") +ggsave("out/graphs/slides/hidserv-13.png", width = 8, height = 3, + dpi = 100) + +# Graph extrapolated fraction of hidden-service traffic. +b <- read.csv("in/metrics/bandwidth.csv", stringsAsFactors = FALSE) +b <- b[b$isexit == '' & b$isguard == '' & b$date > '2014-12-20', ] +h10 <- data.frame(date = as.Date(h$stats_end), + traffic = h$hidserv_rend_relayed_cells * 512 / (86400 * 1000 * 1000), + prob_rend_point = h$prob_rend_point) +h10 <- aggregate(list(traffic = h10$traffic, + prob_rend_point = h10$prob_rend_point), by = list(date = h10$date), + FUN = sum) +h10 <- data.frame(date = h10$date, + traffic = ifelse(h10$prob_rend_point < 0.01, 0, + h10$traffic / h10$prob_rend_point)) +h10 <- melt(h10, "date") +h10 <- h10[h10$value > 0, ] +h10 <- rbind(h10, data.frame(date = as.Date(b$date), variable = "bw", + value = b$bwread + b$bwwrite)) +h10 <- cast(h10, date ~ variable, value = "value") +h10 <- na.omit(h10) +h10 <- data.frame(date = h10$date, + value = h10$traffic * 1000 * 1000 / h10$bw) +h10 <- rbind(h10, data.frame(date = NA, value = 0)) +ggplot(h10, aes(x = date, y = value)) + +geom_point() + +scale_x_date(name = "") + +scale_y_continuous(name = "", labels = percent) + +stat_smooth() +ggsave("out/graphs/slides/hidserv-14.png", width = 8, height = 3, + dpi = 100) + +# Graph simulation results for cells on rendezvous circuits. +s <- read.csv("out/csv/sim-cells.csv") +ggplot(s, aes(x = frac, y = (p500 - 1e10) / 1e10, + ymin = (p025 - 1e10) / 1e10, ymax = (p975 - 1e10) / 1e10)) + +geom_line() + +geom_ribbon(alpha = 0.2) + +scale_x_continuous("\nRendezvous points included in extrapolation", + labels = percent) + +scale_y_continuous("Deviation from network totals\n", labels = percent) +ggsave("out/graphs/report/sim-cells.pdf", width = 5, height = 3, + dpi = 100) + +# Graph simulation results for .onion addresses. +o <- read.csv("out/csv/sim-onions.csv") +ggplot(o, aes(x = frac, y = (p500 - 40000) / 40000, + ymin = (p025 - 40000) / 40000, ymax = (p975 - 40000) / 40000)) + +geom_line() + +geom_ribbon(alpha = 0.2) + +scale_x_continuous("\nDirectories included in extrapolation", + labels = percent) + +scale_y_continuous("Deviation from network totals\n", labels = percent) +ggsave("out/graphs/report/sim-onions.pdf", width = 5, height = 3, + dpi = 100) + diff --git a/task-13192/src/java/ExtrapolateHidServStats.java b/task-13192/src/java/ExtrapolateHidServStats.java new file mode 100644 index 0000000..100520d --- /dev/null +++ b/task-13192/src/java/ExtrapolateHidServStats.java @@ -0,0 +1,722 @@ +import java.io.BufferedWriter; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileWriter; +import java.math.BigInteger; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Scanner; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TimeZone; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.torproject.descriptor.Descriptor; +import org.torproject.descriptor.DescriptorFile; +import org.torproject.descriptor.DescriptorReader; +import org.torproject.descriptor.DescriptorSourceFactory; +import org.torproject.descriptor.ExtraInfoDescriptor; +import org.torproject.descriptor.NetworkStatusEntry; +import org.torproject.descriptor.RelayNetworkStatusConsensus; + +public class ExtrapolateHidServStats { + + private static File archiveExtraInfosDirectory = + new File("in/collector/archive/relay-descriptors/extra-infos/"); + + private static File recentExtraInfosDirectory = + new File("in/collector/recent/relay-descriptors/extra-infos/"); + + private static File archiveConsensuses = + new File("in/collector/archive/relay-descriptors/consensuses/"); + + private static File recentConsensuses = + new File("in/collector/recent/relay-descriptors/consensuses/"); + + private static File hidservStatsCsvFile = + new File("out/csv/hidserv-stats.csv"); + + private static File simCellsCsvFile = + new File("out/csv/sim-cells.csv"); + + private static File simOnionsCsvFile = + new File("out/csv/sim-onions.csv"); + + public static void main(String[] args) throws Exception { + System.out.println("Extracting hidserv-* lines from extra-info " + + "descriptors..."); + SortedMap> hidServStats = + extractHidServStats(); + System.out.println("Extracting fractions from consensuses..."); + SortedMap> consensusFractions = + extractConsensusFractions(hidServStats.keySet()); + System.out.println("Extrapolating statistics..."); + extrapolateHidServStats(hidServStats, consensusFractions); + System.out.println("Simulating extrapolation of rendezvous cells..."); + simulateCells(); + System.out.println("Simulating extrapolation of .onions..."); + simulateOnions(); + System.out.println("Terminating."); + } + + private static final DateFormat DATE_TIME_FORMAT; + + static { + DATE_TIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + DATE_TIME_FORMAT.setLenient(false); + DATE_TIME_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); + } + + private static class HidServStats implements Comparable { + + /* Hidden-service statistics end timestamp in milliseconds. */ + private long statsEndMillis; + + /* Statistics interval length in seconds. */ + private long statsIntervalSeconds; + + /* Number of relayed cells reported by the relay and adjusted by + * rounding to the nearest right side of a bin and subtracting half of + * the bin size. */ + private long rendRelayedCells; + + /* Number of .onions reported by the relay and adjusted by rounding to + * the nearest right side of a bin and subtracting half of the bin + * size. */ + private long dirOnionsSeen; + + private HidServStats(long statsEndMillis, long statsIntervalSeconds, + long rendRelayedCells, long dirOnionsSeen) { + this.statsEndMillis = statsEndMillis; + this.statsIntervalSeconds = statsIntervalSeconds; + this.rendRelayedCells = rendRelayedCells; + this.dirOnionsSeen = dirOnionsSeen; + } + + @Override + public boolean equals(Object otherObject) { + if (!(otherObject instanceof HidServStats)) { + return false; + } + HidServStats other = (HidServStats) otherObject; + return this.statsEndMillis == other.statsEndMillis && + this.statsIntervalSeconds == other.statsIntervalSeconds && + this.rendRelayedCells == other.rendRelayedCells && + this.dirOnionsSeen == other.dirOnionsSeen; + } + + @Override + public int compareTo(HidServStats other) { + return this.statsEndMillis < other.statsEndMillis ? -1 : + this.statsEndMillis > other.statsEndMillis ? 1 : 0; + } + } + + /* Extract fingerprint and hidserv-* lines from extra-info descriptors + * located in in/{archive,recent}/relay-descriptors/extra-infos/. */ + private static SortedMap> + extractHidServStats() { + SortedMap> extractedHidServStats = + new TreeMap>(); + DescriptorReader descriptorReader = + DescriptorSourceFactory.createDescriptorReader(); + descriptorReader.addDirectory(archiveExtraInfosDirectory); + descriptorReader.addDirectory(recentExtraInfosDirectory); + Iterator descriptorFiles = + descriptorReader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (!(descriptor instanceof ExtraInfoDescriptor)) { + continue; + } + String fingerprint = + ((ExtraInfoDescriptor) descriptor).getFingerprint(); + Scanner scanner = new Scanner(new ByteArrayInputStream( + descriptor.getRawDescriptorBytes())); + Long statsEndMillis = null, statsIntervalSeconds = null, + rendRelayedCells = null, dirOnionsSeen = null; + try { + while (scanner.hasNext()) { + String line = scanner.nextLine(); + if (line.startsWith("hidserv-")) { + String[] parts = line.split(" "); + if (parts[0].equals("hidserv-stats-end")) { + if (parts.length != 5 || !parts[3].startsWith("(") || + !parts[4].equals("s)")) { + /* Will warn below, because statsEndMillis and + * statsIntervalSeconds are still null. */ + continue; + } + statsEndMillis = DATE_TIME_FORMAT.parse( + parts[1] + " " + parts[2]).getTime(); + statsIntervalSeconds = + Long.parseLong(parts[3].substring(1)); + } else if (parts[0].equals("hidserv-rend-relayed-cells")) { + if (parts.length != 5 || + !parts[4].startsWith("bin_size=")) { + /* Will warn below, because rendRelayedCells is still + * null. */ + continue; + } + rendRelayedCells = removeNoise(Long.parseLong(parts[1]), + Long.parseLong(parts[4].substring(9))); + } else if (parts[0].equals("hidserv-dir-onions-seen")) { + if (parts.length != 5 || + !parts[4].startsWith("bin_size=")) { + /* Will warn below, because dirOnionsSeen is still + * null. */ + continue; + } + dirOnionsSeen = removeNoise(Long.parseLong(parts[1]), + Long.parseLong(parts[4].substring(9))); + } + } + } + } catch (ParseException e) { + e.printStackTrace(); + continue; + } catch (NumberFormatException e) { + e.printStackTrace(); + continue; + } + if (statsEndMillis == null && statsIntervalSeconds == null && + rendRelayedCells == null && dirOnionsSeen == null) { + continue; + } else if (statsEndMillis != null && statsIntervalSeconds != null + && rendRelayedCells != null && dirOnionsSeen != null) { + if (!extractedHidServStats.containsKey(fingerprint)) { + extractedHidServStats.put(fingerprint, + new TreeSet()); + } + extractedHidServStats.get(fingerprint).add(new HidServStats( + statsEndMillis, statsIntervalSeconds, rendRelayedCells, + dirOnionsSeen)); + } else { + System.err.println("Relay " + fingerprint + " published " + + "incomplete hidserv-stats. Ignoring."); + } + } + } + return extractedHidServStats; + } + + private static long removeNoise(long reportedNumber, long binSize) { + long roundedToNearestRightSideOfTheBin = + ((reportedNumber + binSize / 2) / binSize) * binSize; + long subtractedHalfOfBinSize = + roundedToNearestRightSideOfTheBin - binSize / 2; + return subtractedHalfOfBinSize; + } + + private static class ConsensusFraction + implements Comparable { + + /* Valid-after timestamp of the consensus in milliseconds. */ + private long validAfterMillis; + + /* Fresh-until timestamp of the consensus in milliseconds. */ + private long freshUntilMillis; + + /* Fraction of consensus weight in [0.0, 1.0] of this relay. */ + private double fractionConsensusWeight; + + /* Probability for being selected by clients as rendezvous point. */ + private double probabilityRendezvousPoint; + + /* Fraction of descriptor identifiers in [0.0, 1.0] that this relay + * has been responsible for. This is the "distance" from the + * fingerprint of the relay three HSDir positions earlier in the ring + * to the fingerprint of this relay. Fractions of all HSDirs in a + * consensus add up to 3.0, not 1.0. */ + private double fractionResponsibleDescriptors; + + private ConsensusFraction(long validAfterMillis, + long freshUntilMillis, + double fractionConsensusWeight, + double probabilityRendezvousPoint, + double fractionResponsibleDescriptors) { + this.validAfterMillis = validAfterMillis; + this.freshUntilMillis = freshUntilMillis; + this.fractionConsensusWeight = fractionConsensusWeight; + this.probabilityRendezvousPoint = probabilityRendezvousPoint; + this.fractionResponsibleDescriptors = + fractionResponsibleDescriptors; + } + + @Override + public boolean equals(Object otherObject) { + if (!(otherObject instanceof ConsensusFraction)) { + return false; + } + ConsensusFraction other = (ConsensusFraction) otherObject; + return this.validAfterMillis == other.validAfterMillis && + this.freshUntilMillis == other.freshUntilMillis && + this.fractionResponsibleDescriptors == + other.fractionResponsibleDescriptors && + this.fractionConsensusWeight == other.fractionConsensusWeight && + this.probabilityRendezvousPoint == + other.probabilityRendezvousPoint; + } + + @Override + public int compareTo(ConsensusFraction other) { + return this.validAfterMillis < other.validAfterMillis ? -1 : + this.validAfterMillis > other.validAfterMillis ? 1 : 0; + } + } + + /* Extract fractions that relays were responsible for from consensuses + * located in in/{archive,recent}/relay-descriptors/consensuses/. */ + private static SortedMap> + extractConsensusFractions(Collection fingerprints) { + SortedMap> + extractedConsensusFractions = + new TreeMap>(); + DescriptorReader descriptorReader = + DescriptorSourceFactory.createDescriptorReader(); + descriptorReader.addDirectory(archiveConsensuses); + descriptorReader.addDirectory(recentConsensuses); + Iterator descriptorFiles = + descriptorReader.readDescriptors(); + while (descriptorFiles.hasNext()) { + DescriptorFile descriptorFile = descriptorFiles.next(); + for (Descriptor descriptor : descriptorFile.getDescriptors()) { + if (!(descriptor instanceof RelayNetworkStatusConsensus)) { + continue; + } + RelayNetworkStatusConsensus consensus = + (RelayNetworkStatusConsensus) descriptor; + SortedSet weightKeys = new TreeSet(Arrays.asList( + "Wmg,Wmm,Wme,Wmd".split(","))); + weightKeys.removeAll(consensus.getBandwidthWeights().keySet()); + if (!weightKeys.isEmpty()) { + System.err.println("Consensus with valid-after time " + + DATE_TIME_FORMAT.format(consensus.getValidAfterMillis()) + + " doesn't contain expected Wmx weights. Skipping."); + continue; + } + double wmg = ((double) consensus.getBandwidthWeights().get("Wmg")) + / 10000.0; + double wmm = ((double) consensus.getBandwidthWeights().get("Wmm")) + / 10000.0; + double wme = ((double) consensus.getBandwidthWeights().get("Wme")) + / 10000.0; + double wmd = ((double) consensus.getBandwidthWeights().get("Wmd")) + / 10000.0; + SortedSet hsDirs = new TreeSet( + Collections.reverseOrder()); + long totalConsensusWeight = 0L; + double totalWeightsRendezvousPoint = 0.0; + SortedMap weightsRendezvousPoint = + new TreeMap(); + for (Map.Entry e : + consensus.getStatusEntries().entrySet()) { + String fingerprint = e.getKey(); + NetworkStatusEntry statusEntry = e.getValue(); + SortedSet flags = statusEntry.getFlags(); + if (flags.contains("HSDir")) { + hsDirs.add(statusEntry.getFingerprint()); + } + totalConsensusWeight += statusEntry.getBandwidth(); + double weightRendezvousPoint = 0.0; + if (flags.contains("Fast")) { + weightRendezvousPoint = (double) statusEntry.getBandwidth(); + if (flags.contains("Guard") && flags.contains("Exit")) { + weightRendezvousPoint *= wmd; + } else if (flags.contains("Guard")) { + weightRendezvousPoint *= wmg; + } else if (flags.contains("Exit")) { + weightRendezvousPoint *= wme; + } else { + weightRendezvousPoint *= wmm; + } + } + weightsRendezvousPoint.put(fingerprint, weightRendezvousPoint); + totalWeightsRendezvousPoint += weightRendezvousPoint; + } + /* Add all HSDir fingerprints with leading "0" and "1" to + * simplify the logic to traverse the ring start. */ + SortedSet hsDirsCopy = new TreeSet(hsDirs); + hsDirs.clear(); + for (String fingerprint : hsDirsCopy) { + hsDirs.add("0" + fingerprint); + hsDirs.add("1" + fingerprint); + } + final double RING_SIZE = new BigInteger( + "10000000000000000000000000000000000000000", + 16).doubleValue(); + for (String fingerprint : fingerprints) { + double probabilityRendezvousPoint = 0.0, + fractionResponsibleDescriptors = 0.0, + fractionConsensusWeight = 0.0; + NetworkStatusEntry statusEntry = + consensus.getStatusEntry(fingerprint); + if (statusEntry != null) { + if (hsDirs.contains("1" + fingerprint)) { + String startResponsible = fingerprint; + int positionsToGo = 3; + for (String hsDirFingerprint : + hsDirs.tailSet("1" + fingerprint)) { + startResponsible = hsDirFingerprint; + if (positionsToGo-- <= 0) { + break; + } + } + fractionResponsibleDescriptors = + new BigInteger("1" + fingerprint, 16).subtract( + new BigInteger(startResponsible, 16)).doubleValue() + / RING_SIZE; + } + fractionConsensusWeight = + ((double) statusEntry.getBandwidth()) + / ((double) totalConsensusWeight); + probabilityRendezvousPoint = + weightsRendezvousPoint.get(fingerprint) + / totalWeightsRendezvousPoint; + } + if (!extractedConsensusFractions.containsKey(fingerprint)) { + extractedConsensusFractions.put(fingerprint, + new TreeSet()); + } + extractedConsensusFractions.get(fingerprint).add( + new ConsensusFraction(consensus.getValidAfterMillis(), + consensus.getFreshUntilMillis(), fractionConsensusWeight, + probabilityRendezvousPoint, + fractionResponsibleDescriptors)); + } + } + } + return extractedConsensusFractions; + } + + private static void extrapolateHidServStats( + SortedMap> hidServStats, + SortedMap> + consensusFractions) throws Exception { + hidservStatsCsvFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter( + new FileWriter(hidservStatsCsvFile)); + bw.write("fingerprint,stats_start,stats_end," + + "hidserv_rend_relayed_cells,hidserv_dir_onions_seen," + + "prob_rend_point,frac_hsdesc\n"); + for (Map.Entry> e : + hidServStats.entrySet()) { + String fingerprint = e.getKey(); + if (!consensusFractions.containsKey(fingerprint)) { + System.err.println("We have hidserv-stats but no consensus " + + "fractions for " + fingerprint + ". Skipping."); + continue; + } + for (HidServStats stats : e.getValue()) { + long statsStartMillis = stats.statsEndMillis + - stats.statsIntervalSeconds * 1000L; + double sumProbabilityRendezvousPoint = 0.0, + sumResponsibleDescriptors = 0.0; + int statusEntries = 0; + for (ConsensusFraction frac : + consensusFractions.get(fingerprint)) { + if (statsStartMillis <= frac.validAfterMillis && + frac.validAfterMillis < stats.statsEndMillis) { + sumProbabilityRendezvousPoint += + frac.probabilityRendezvousPoint; + sumResponsibleDescriptors += + frac.fractionResponsibleDescriptors; + statusEntries++; + } + } + bw.write(String.format("%s,%s,%s,%d,%d,%.8f,%.8f%n", fingerprint, + DATE_TIME_FORMAT.format(statsStartMillis), + DATE_TIME_FORMAT.format(stats.statsEndMillis), + stats.rendRelayedCells, stats.dirOnionsSeen, + sumProbabilityRendezvousPoint / statusEntries, + sumResponsibleDescriptors / statusEntries)); + } + } + bw.close(); + } + + private static Random rnd = new Random(3); + + private static void simulateCells() throws Exception { + + /* Generate consensus weights following an exponential distribution + * with lambda = 1 for 3000 potential rendezvous points. */ + final int numberRendPoints = 3000; + double[] consensusWeights = new double[numberRendPoints]; + double totalConsensusWeight = 0.0; + for (int i = 0; i < numberRendPoints; i++) { + double consensusWeight = -Math.log(1.0 - rnd.nextDouble()); + consensusWeights[i] = consensusWeight; + totalConsensusWeight += consensusWeight; + } + + /* Compute probabilities for being selected as rendezvous point. */ + double[] probRendPoint = new double[numberRendPoints]; + for (int i = 0; i < numberRendPoints; i++) { + probRendPoint[i] = consensusWeights[i] / totalConsensusWeight; + } + + /* Generate 10,000,000,000 (roughly 60 MiB/s) cells in chunks + * following an exponential distribution with lambda = 0.00001 and + * randomly assign them to a rendezvous point to report them later. */ + long cellsLeft = 10000000000L; + final double cellsLambda = 0.00001; + long[] observedCells = new long[numberRendPoints]; + while (cellsLeft > 0) { + long cells = (long) (-Math.log(1.0 - rnd.nextDouble()) + / cellsLambda); + double selectRendPoint = rnd.nextDouble(); + for (int i = 0; i < probRendPoint.length; i++) { + selectRendPoint -= probRendPoint[i]; + if (selectRendPoint <= 0.0) { + observedCells[i] += cells; + break; + } + } + cellsLeft -= cells; + } + + /* Obfuscate reports using binning and Laplace noise, and then attempt + * to remove noise again. */ + final long binSize = 1024L; + final double b = 2048.0 / 0.3; + long[] reportedCells = new long[numberRendPoints]; + long[] removedNoiseCells = new long[numberRendPoints]; + for (int i = 0; i < numberRendPoints; i++) { + long observed = observedCells[i]; + long afterBinning = ((observed + binSize - 1L) / binSize) * binSize; + double p = rnd.nextDouble(); + double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) * + Math.log(1.0 - 2.0 * Math.abs(p - 0.5)); + long reported = afterBinning + (long) laplaceNoise; + reportedCells[i] = reported; + long roundedToNearestRightSideOfTheBin = + ((reported + binSize / 2) / binSize) * binSize; + long subtractedHalfOfBinSize = + roundedToNearestRightSideOfTheBin - binSize / 2; + removedNoiseCells[i] = subtractedHalfOfBinSize; + } + + /* Perform 10,000 extrapolations from random fractions of reports by + * probability to be selected as rendezvous point. */ + simCellsCsvFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + simCellsCsvFile)); + bw.write("frac,p025,p500,p975\n"); + double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, + 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 }; + final int numberOfExtrapolations = 10000; + for (double fraction : fractions) { + List extrapolations = new ArrayList(); + for (int i = 0; i < numberOfExtrapolations; i++) { + SortedSet nonReportingRelays = new TreeSet(); + for (int j = 0; j < numberRendPoints; j++) { + nonReportingRelays.add(j); + } + List shuffledRelays = new ArrayList( + nonReportingRelays); + Collections.shuffle(shuffledRelays); + SortedSet reportingRelays = new TreeSet(); + for (int j = 0; j < (int) ((double) numberRendPoints * fraction); + j++) { + reportingRelays.add(shuffledRelays.get(j)); + nonReportingRelays.remove(shuffledRelays.get(j)); + } + double reportingProbability; + long totalReports; + do { + reportingProbability = 0.0; + totalReports = 0L; + for (int reportingRelay : reportingRelays) { + reportingProbability += probRendPoint[reportingRelay]; + totalReports += removedNoiseCells[reportingRelay]; + } + if (reportingProbability < fraction - 0.001) { + int addRelay = new ArrayList(nonReportingRelays).get( + rnd.nextInt(nonReportingRelays.size())); + nonReportingRelays.remove(addRelay); + reportingRelays.add(addRelay); + } else if (reportingProbability > fraction + 0.001) { + int removeRelay = new ArrayList(reportingRelays).get( + rnd.nextInt(reportingRelays.size())); + reportingRelays.remove(removeRelay); + nonReportingRelays.add(removeRelay); + } + } while (reportingProbability < fraction - 0.001 || + reportingProbability > fraction + 0.001); + extrapolations.add((long) ((double) totalReports + / reportingProbability)); + } + Collections.sort(extrapolations); + long p025 = extrapolations.get((extrapolations.size() * 25) / 1000), + p500 = extrapolations.get((extrapolations.size() * 500) / 1000), + p975 = extrapolations.get((extrapolations.size() * 975) / 1000); + bw.write(String.format("%.2f,%d,%d,%d%n", fraction, p025, p500, + p975)); + } + bw.close(); + } + + private static void simulateOnions() throws Exception { + + /* Generate 3000 HSDirs with "fingerprints" between 0.0 and 1.0. */ + final int numberHsDirs = 3000; + SortedSet hsDirFingerprints = new TreeSet(); + for (int i = 0; i < numberHsDirs; i++) { + hsDirFingerprints.add(rnd.nextDouble()); + } + + /* Compute fractions of observed descriptor space. */ + SortedSet ring = + new TreeSet(Collections.reverseOrder()); + for (double fingerprint : hsDirFingerprints) { + ring.add(fingerprint); + ring.add(fingerprint - 1.0); + } + SortedMap hsDirFractions = + new TreeMap(); + for (double fingerprint : hsDirFingerprints) { + double start = fingerprint; + int positionsToGo = 3; + for (double prev : ring.tailSet(fingerprint)) { + start = prev; + if (positionsToGo-- <= 0) { + break; + } + } + hsDirFractions.put(fingerprint, fingerprint - start); + } + + /* Generate 40000 .onions with 4 HSDesc IDs, store them on HSDirs. */ + final int numberOnions = 40000; + final int replicas = 4; + final int storeOnDirs = 3; + SortedMap> storedDescs = + new TreeMap>(); + for (double fingerprint : hsDirFingerprints) { + storedDescs.put(fingerprint, new TreeSet()); + } + for (int i = 0; i < numberOnions; i++) { + for (int j = 0; j < replicas; j++) { + int leftToStore = storeOnDirs; + for (double fingerprint : + hsDirFingerprints.tailSet(rnd.nextDouble())) { + storedDescs.get(fingerprint).add(i); + if (--leftToStore <= 0) { + break; + } + } + if (leftToStore > 0) { + for (double fingerprint : hsDirFingerprints) { + storedDescs.get(fingerprint).add(i); + if (--leftToStore <= 0) { + break; + } + } + } + } + } + + /* Obfuscate reports using binning and Laplace noise, and then attempt + * to remove noise again. */ + final long binSize = 8L; + final double b = 8.0 / 0.3; + SortedMap reportedOnions = new TreeMap(), + removedNoiseOnions = new TreeMap(); + for (Map.Entry> e : + storedDescs.entrySet()) { + double fingerprint = e.getKey(); + long observed = (long) e.getValue().size(); + long afterBinning = ((observed + binSize - 1L) / binSize) * binSize; + double p = rnd.nextDouble(); + double laplaceNoise = -b * (p > 0.5 ? 1.0 : -1.0) * + Math.log(1.0 - 2.0 * Math.abs(p - 0.5)); + long reported = afterBinning + (long) laplaceNoise; + reportedOnions.put(fingerprint, reported); + long roundedToNearestRightSideOfTheBin = + ((reported + binSize / 2) / binSize) * binSize; + long subtractedHalfOfBinSize = + roundedToNearestRightSideOfTheBin - binSize / 2; + removedNoiseOnions.put(fingerprint, subtractedHalfOfBinSize); + } + + /* Perform 10,000 extrapolations from random fractions of reports by + * probability to be selected as rendezvous point. */ + simOnionsCsvFile.getParentFile().mkdirs(); + BufferedWriter bw = new BufferedWriter(new FileWriter( + simOnionsCsvFile)); + bw.write("frac,p025,p500,p975\n"); + double[] fractions = new double[] { 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, + 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99 }; + final int numberOfExtrapolations = 10000; + for (double fraction : fractions) { + List extrapolationsTwo = new ArrayList(); + for (int i = 0; i < numberOfExtrapolations; i++) { + SortedSet nonReportingRelays = + new TreeSet(hsDirFractions.keySet()); + List shuffledRelays = new ArrayList( + nonReportingRelays); + Collections.shuffle(shuffledRelays); + SortedSet reportingRelays = new TreeSet(); + for (int j = 0; j < (int) ((double) hsDirFractions.size() + * fraction); j++) { + reportingRelays.add(shuffledRelays.get(j)); + nonReportingRelays.remove(shuffledRelays.get(j)); + } + double reportingProbability; + long totalReports; + do { + reportingProbability = 0.0; + totalReports = 0L; + for (double reportingRelay : reportingRelays) { + reportingProbability += hsDirFractions.get(reportingRelay) + / 3.0; + totalReports += removedNoiseOnions.get(reportingRelay); + } + if (reportingProbability < fraction - 0.001) { + double addRelay = + new ArrayList(nonReportingRelays).get( + rnd.nextInt(nonReportingRelays.size())); + nonReportingRelays.remove(addRelay); + reportingRelays.add(addRelay); + } else if (reportingProbability > fraction + 0.001) { + double removeRelay = + new ArrayList(reportingRelays).get( + rnd.nextInt(reportingRelays.size())); + reportingRelays.remove(removeRelay); + nonReportingRelays.add(removeRelay); + } + } while (reportingProbability < fraction - 0.001 || + reportingProbability > fraction + 0.001); + double totalFraction = 0.0; + for (double fingerprint : reportingRelays) { + totalFraction += hsDirFractions.get(fingerprint) * 4.0; + } + extrapolationsTwo.add((long) ((double) totalReports + / totalFraction)); + } + Collections.sort(extrapolationsTwo); + long pTwo025 = extrapolationsTwo.get( + (extrapolationsTwo.size() * 25) / 1000), + pTwo500 = extrapolationsTwo.get( + (extrapolationsTwo.size() * 500) / 1000), + pTwo975 = extrapolationsTwo.get( + (extrapolationsTwo.size() * 975) / 1000); + bw.write(String.format("%.2f,%d,%d,%d%n", fraction, pTwo025, + pTwo500, pTwo975)); + } + bw.close(); + } +} + -- 2.11.4.GIT