task-2394/bandwidth-comparison.R

   1 options(warn = -1)
   2 suppressPackageStartupMessages(library("ggplot2"))
   3
   4 b <- read.csv("bandwidth-comparison.csv", stringsAsFactors = FALSE)
   5
   6 # Plot ECDF to compare categories
   7 cdf_relays_category <- function(data, category) {
   8   d <- data[data$category == category & data$descriptorbandwidth > 0, ]
   9   d <- sort(d$consensusbandwidth * 1000 / d$descriptorbandwidth)
  10   d <- data.frame(x = d, y = (1:length(d)) / length(d),
  11     category = category)
  12   d
  13 }
  14 relays_category <- rbind(
  15   cdf_relays_category(b, "Guard & Exit (default policy)"),
  16   cdf_relays_category(b, "Exit (default policy)"),
  17   cdf_relays_category(b, "Guard & Exit (non-default policy)"),
  18   cdf_relays_category(b, "Exit (non-default policy)"),
  19   cdf_relays_category(b, "Guard"),
  20   cdf_relays_category(b, "Middle"))
  21 ggplot(relays_category, aes(x = x, y = y, colour = category)) +
  22 geom_line() +
  23 scale_x_log10("\nRatio of measured by self-reported bandwidth",
  24   limits = c(0.1, 10), breaks = c(0.1, 0.2, 0.5, 1, 2, 5, 10),
  25   labels = c("0.1", "0.2", "0.5", "1", "2", "5", "10")) +
  26 scale_y_continuous("Fraction of relays\n", limits = c(0, 1),
  27   formatter = "percent") +
  28 scale_colour_hue("") +
  29 geom_vline(xintercept = 1, legend = FALSE, linetype = "dashed") +
  30 opts(title = "Ratio between measured and self-reported relay bandwidth",
  31   legend.position = "top")
  32 ggsave(filename = "bandwidth-comparison-relays.png",
  33   width = 8, height = 5, dpi = 150)
  34
  35 # Plot ECDFs to compare consensus to votes
  36 cdf_relays_category_votes <- function(data, category) {
  37   d <- data[data$category == category & data$descriptorbandwidth > 0, ]
  38   consensus <- sort(d$consensusbandwidth * 1000 / d$descriptorbandwidth)
  39   ides <- sort(d$idesbandwidth * 1000 / d$descriptorbandwidth)
  40   urras <- sort(d$urrasbandwidth * 1000 / d$descriptorbandwidth)
  41   moria1 <- sort(d$moria1bandwidth * 1000 / d$descriptorbandwidth)
  42   gabelmoo <- sort(d$gabelmoobandwidth * 1000 / d$descriptorbandwidth)
  43   maatuska <- sort(d$maatuskabandwidth * 1000 / d$descriptorbandwidth)
  44   d <- data.frame(x = consensus,
  45                y = (1:length(consensus)) / length(consensus),
  46                source = "consensus",
  47                category = category)
  48   if (length(urras) > 0) {
  49     d <- rbind(d, data.frame(x = urras,
  50                y = (1:length(urras)) / length(urras),
  51                source = "urras",
  52                category = category))
  53   }
  54   if (length(ides) > 0) {
  55     d <- rbind(d, data.frame(x = ides,
  56                y = (1:length(ides)) / length(ides),
  57                source = "ides",
  58                category = category))
  59   }
  60   if (length(moria1) > 0) {
  61     d <- rbind(d, data.frame(x = moria1,
  62                y = (1:length(moria1)) / length(moria1),
  63                source = "moria1",
  64                category = category))
  65   }
  66   if (length(gabelmoo) > 0) {
  67     d <- rbind(d, data.frame(x = gabelmoo,
  68                y = (1:length(gabelmoo)) / length(gabelmoo),
  69                source = "gabelmoo",
  70                category = category))
  71   }
  72   if (length(maatuska) > 0) {
  73     d <- rbind(d, data.frame(x = maatuska,
  74                y = (1:length(maatuska)) / length(maatuska),
  75                source = "maatuska",
  76                category = category))
  77   }
  78   d
  79 }
  80 relays_category_votes <- rbind(
  81   cdf_relays_category_votes(b, "Guard & Exit (default policy)"),
  82   cdf_relays_category_votes(b, "Exit (default policy)"),
  83   cdf_relays_category_votes(b, "Guard & Exit (non-default policy)"),
  84   cdf_relays_category_votes(b, "Exit (non-default policy)"),
  85   cdf_relays_category_votes(b, "Guard"),
  86   cdf_relays_category_votes(b, "Middle"))
  87 ggplot(relays_category_votes, aes(x = x, y = y, colour = source)) +
  88 geom_line() +
  89 facet_wrap(~ category, ncol = 3) +
  90 scale_x_log10("\nRatio of measured by self-reported bandwidth",
  91   limits = c(0.1, 10), breaks = c(0.1, 1, 10),
  92   labels = c("0.1", "1", "10")) +
  93 scale_y_continuous("Fraction of relays\n", limits = c(0, 1),
  94   formatter = "percent") +
  95 scale_colour_manual("", c("consensus" = "black",
  96   "urras" = alpha("purple", 0.5), "ides" = alpha("red", 0.5),
  97   "moria1" = alpha("green", 0.5), "gabelmoo" = alpha("blue", 0.5),
  98   "maatuska" = alpha("orange", 0.5))) +
  99 geom_vline(xintercept = 1, legend = FALSE, linetype = "dotted") +
 100 opts(title = paste("Measured vs. self-reported bandwidth ratios in",
 101   "consensus and votes\n"), legend.position = "right")
 102 ggsave(filename = "bandwidth-comparison-relays-votes.png",
 103   width = 8, height = 5, dpi = 150)
 104
 105 # Plot _weighted_ ECDFs
 106 wecdf <- function(data, source, category) {
 107   data <- data[with(data, order(ratio)), ]
 108   sum_measured <- sum(data$measured, na.rm = TRUE)
 109   cur_measured <- data$measured[1]
 110   res <- data.frame(x = data$ratio[1],
 111                   y = cur_measured / sum_measured,
 112                source = source, category = category)
 113   for (i in 2:length(data$ratio)) {
 114     cur_measured <- cur_measured + data$measured[i]
 115     res <- rbind(res, data.frame(x = data$ratio[i],
 116         y = cur_measured / sum_measured,
 117                source = source, category = category))
 118   }
 119   res
 120 }
 121 cdf_measured_category_votes <- function(data, category) {
 122   d <- data[data$category == category & data$descriptorbandwidth > 0, ]
 123   d <- rbind(
 124     wecdf(data.frame(
 125       ratio = d$consensusbandwidth * 1000 / d$descriptorbandwidth,
 126       measured = d$consensusbandwidth), "consensus", category),
 127     wecdf(data.frame(
 128       ratio = d$urrasbandwidth * 1000 / d$descriptorbandwidth,
 129       measured = d$urrasbandwidth), "urras", category),
 130     wecdf(data.frame(
 131       ratio = d$idesbandwidth * 1000 / d$descriptorbandwidth,
 132       measured = d$idesbandwidth), "ides", category),
 133     wecdf(data.frame(
 134       ratio = d$moria1bandwidth * 1000 / d$descriptorbandwidth,
 135       measured = d$moria1bandwidth), "moria1", category),
 136     wecdf(data.frame(
 137       ratio = d$gabelmoobandwidth * 1000 / d$descriptorbandwidth,
 138       measured = d$gabelmoobandwidth), "gabelmoo", category),
 139     wecdf(data.frame(
 140       ratio = d$maatuskabandwidth * 1000 / d$descriptorbandwidth,
 141       measured = d$maatuskabandwidth), "maatuska", category))
 142   d
 143 }
 144 measured_category_votes <- rbind(
 145   cdf_measured_category_votes(b, "Guard & Exit (default policy)"),
 146   cdf_measured_category_votes(b, "Exit (default policy)"),
 147   cdf_measured_category_votes(b, "Guard & Exit (non-default policy)"),
 148   cdf_measured_category_votes(b, "Exit (non-default policy)"),
 149   cdf_measured_category_votes(b, "Guard"),
 150   cdf_measured_category_votes(b, "Middle"))
 151 ggplot(measured_category_votes, aes(x = x, y = y, colour = source)) +
 152 geom_line() +
 153 facet_wrap(~ category, ncol = 3) +
 154 scale_x_log10("\nRatio of measured by self-reported bandwidth",
 155   limits = c(0.1, 10), breaks = c(0.1, 1, 10),
 156   labels = c("0.1", "1", "10")) +
 157 scale_y_continuous("Fraction of measured bandwidth\n", limits = c(0, 1),
 158   formatter = "percent") +
 159 scale_colour_manual("", c("consensus" = "black",
 160   "urras" = alpha("purple", 0.5), "ides" = alpha("red", 0.5),
 161   "moria1" = alpha("green", 0.5), "gabelmoo" = alpha("blue", 0.5),
 162   "maatuska" = alpha("orange", 0.5))) +
 163 geom_vline(xintercept = 1, legend = FALSE, linetype = "dotted") +
 164 opts(title = paste("Measured vs. self-reported bandwidth ratios in",
 165   "consensus and votes\n"), legend.position = "right")
 166 ggsave(filename = "bandwidth-comparison-measured-votes.png",
 167   width = 8, height = 5, dpi = 150)
 168 write.csv(measured_category_votes, "measured_category_votes-temp.csv",
 169   quote = FALSE, row.names = FALSE)
 170