Update criteria for partial/full IPv6 support.
[tor-metrics-tasks.git] / task-2394 / bandwidth-comparison.R
blob56095eff72c1a58079242c59d092b7b6a9c75f65
1 options(warn = -1)
2 suppressPackageStartupMessages(library("ggplot2"))
4 b <- read.csv("bandwidth-comparison.csv", stringsAsFactors = FALSE)
6 # Plot ECDF to compare categories
7 cdf_relays_category <- function(data, category) {
8   d <- data[data$category == category & data$descriptorbandwidth > 0, ]
9   d <- sort(d$consensusbandwidth * 1000 / d$descriptorbandwidth)
10   d <- data.frame(x = d, y = (1:length(d)) / length(d),
11     category = category)
12   d
14 relays_category <- rbind(
15   cdf_relays_category(b, "Guard & Exit (default policy)"),
16   cdf_relays_category(b, "Exit (default policy)"),
17   cdf_relays_category(b, "Guard & Exit (non-default policy)"),
18   cdf_relays_category(b, "Exit (non-default policy)"),
19   cdf_relays_category(b, "Guard"),
20   cdf_relays_category(b, "Middle"))
21 ggplot(relays_category, aes(x = x, y = y, colour = category)) +
22 geom_line() +
23 scale_x_log10("\nRatio of measured by self-reported bandwidth",
24   limits = c(0.1, 10), breaks = c(0.1, 0.2, 0.5, 1, 2, 5, 10),
25   labels = c("0.1", "0.2", "0.5", "1", "2", "5", "10")) +
26 scale_y_continuous("Fraction of relays\n", limits = c(0, 1),
27   formatter = "percent") +
28 scale_colour_hue("") +
29 geom_vline(xintercept = 1, legend = FALSE, linetype = "dashed") +
30 opts(title = "Ratio between measured and self-reported relay bandwidth",
31   legend.position = "top")
32 ggsave(filename = "bandwidth-comparison-relays.png",
33   width = 8, height = 5, dpi = 150)
35 # Plot ECDFs to compare consensus to votes
36 cdf_relays_category_votes <- function(data, category) {
37   d <- data[data$category == category & data$descriptorbandwidth > 0, ]
38   consensus <- sort(d$consensusbandwidth * 1000 / d$descriptorbandwidth)
39   ides <- sort(d$idesbandwidth * 1000 / d$descriptorbandwidth)
40   urras <- sort(d$urrasbandwidth * 1000 / d$descriptorbandwidth)
41   moria1 <- sort(d$moria1bandwidth * 1000 / d$descriptorbandwidth)
42   gabelmoo <- sort(d$gabelmoobandwidth * 1000 / d$descriptorbandwidth)
43   maatuska <- sort(d$maatuskabandwidth * 1000 / d$descriptorbandwidth)
44   d <- data.frame(x = consensus,
45                y = (1:length(consensus)) / length(consensus),
46                source = "consensus",
47                category = category)
48   if (length(urras) > 0) {
49     d <- rbind(d, data.frame(x = urras,
50                y = (1:length(urras)) / length(urras),
51                source = "urras",
52                category = category))
53   }
54   if (length(ides) > 0) {
55     d <- rbind(d, data.frame(x = ides,
56                y = (1:length(ides)) / length(ides),
57                source = "ides",
58                category = category))
59   }
60   if (length(moria1) > 0) {
61     d <- rbind(d, data.frame(x = moria1,
62                y = (1:length(moria1)) / length(moria1),
63                source = "moria1",
64                category = category))
65   }
66   if (length(gabelmoo) > 0) {
67     d <- rbind(d, data.frame(x = gabelmoo,
68                y = (1:length(gabelmoo)) / length(gabelmoo),
69                source = "gabelmoo",
70                category = category))
71   }
72   if (length(maatuska) > 0) {
73     d <- rbind(d, data.frame(x = maatuska,
74                y = (1:length(maatuska)) / length(maatuska),
75                source = "maatuska",
76                category = category))
77   }
78   d
80 relays_category_votes <- rbind(
81   cdf_relays_category_votes(b, "Guard & Exit (default policy)"),
82   cdf_relays_category_votes(b, "Exit (default policy)"),
83   cdf_relays_category_votes(b, "Guard & Exit (non-default policy)"),
84   cdf_relays_category_votes(b, "Exit (non-default policy)"),
85   cdf_relays_category_votes(b, "Guard"),
86   cdf_relays_category_votes(b, "Middle"))
87 ggplot(relays_category_votes, aes(x = x, y = y, colour = source)) +
88 geom_line() +
89 facet_wrap(~ category, ncol = 3) +
90 scale_x_log10("\nRatio of measured by self-reported bandwidth",
91   limits = c(0.1, 10), breaks = c(0.1, 1, 10),
92   labels = c("0.1", "1", "10")) +
93 scale_y_continuous("Fraction of relays\n", limits = c(0, 1),
94   formatter = "percent") +
95 scale_colour_manual("", c("consensus" = "black",
96   "urras" = alpha("purple", 0.5), "ides" = alpha("red", 0.5),
97   "moria1" = alpha("green", 0.5), "gabelmoo" = alpha("blue", 0.5),
98   "maatuska" = alpha("orange", 0.5))) +
99 geom_vline(xintercept = 1, legend = FALSE, linetype = "dotted") +
100 opts(title = paste("Measured vs. self-reported bandwidth ratios in",
101   "consensus and votes\n"), legend.position = "right")
102 ggsave(filename = "bandwidth-comparison-relays-votes.png",
103   width = 8, height = 5, dpi = 150)
105 # Plot _weighted_ ECDFs
106 wecdf <- function(data, source, category) {
107   data <- data[with(data, order(ratio)), ]
108   sum_measured <- sum(data$measured, na.rm = TRUE)
109   cur_measured <- data$measured[1]
110   res <- data.frame(x = data$ratio[1],
111                   y = cur_measured / sum_measured,
112                source = source, category = category)
113   for (i in 2:length(data$ratio)) {
114     cur_measured <- cur_measured + data$measured[i]
115     res <- rbind(res, data.frame(x = data$ratio[i],
116         y = cur_measured / sum_measured,
117                source = source, category = category))
118   }
119   res
121 cdf_measured_category_votes <- function(data, category) {
122   d <- data[data$category == category & data$descriptorbandwidth > 0, ]
123   d <- rbind(
124     wecdf(data.frame(
125       ratio = d$consensusbandwidth * 1000 / d$descriptorbandwidth,
126       measured = d$consensusbandwidth), "consensus", category),
127     wecdf(data.frame(
128       ratio = d$urrasbandwidth * 1000 / d$descriptorbandwidth,
129       measured = d$urrasbandwidth), "urras", category),
130     wecdf(data.frame(
131       ratio = d$idesbandwidth * 1000 / d$descriptorbandwidth,
132       measured = d$idesbandwidth), "ides", category),
133     wecdf(data.frame(
134       ratio = d$moria1bandwidth * 1000 / d$descriptorbandwidth,
135       measured = d$moria1bandwidth), "moria1", category),
136     wecdf(data.frame(
137       ratio = d$gabelmoobandwidth * 1000 / d$descriptorbandwidth,
138       measured = d$gabelmoobandwidth), "gabelmoo", category),
139     wecdf(data.frame(
140       ratio = d$maatuskabandwidth * 1000 / d$descriptorbandwidth,
141       measured = d$maatuskabandwidth), "maatuska", category))
142   d
144 measured_category_votes <- rbind(
145   cdf_measured_category_votes(b, "Guard & Exit (default policy)"),
146   cdf_measured_category_votes(b, "Exit (default policy)"),
147   cdf_measured_category_votes(b, "Guard & Exit (non-default policy)"),
148   cdf_measured_category_votes(b, "Exit (non-default policy)"),
149   cdf_measured_category_votes(b, "Guard"),
150   cdf_measured_category_votes(b, "Middle"))
151 ggplot(measured_category_votes, aes(x = x, y = y, colour = source)) +
152 geom_line() +
153 facet_wrap(~ category, ncol = 3) +
154 scale_x_log10("\nRatio of measured by self-reported bandwidth",
155   limits = c(0.1, 10), breaks = c(0.1, 1, 10),
156   labels = c("0.1", "1", "10")) +
157 scale_y_continuous("Fraction of measured bandwidth\n", limits = c(0, 1),
158   formatter = "percent") +
159 scale_colour_manual("", c("consensus" = "black",
160   "urras" = alpha("purple", 0.5), "ides" = alpha("red", 0.5),
161   "moria1" = alpha("green", 0.5), "gabelmoo" = alpha("blue", 0.5),
162   "maatuska" = alpha("orange", 0.5))) +
163 geom_vline(xintercept = 1, legend = FALSE, linetype = "dotted") +
164 opts(title = paste("Measured vs. self-reported bandwidth ratios in",
165   "consensus and votes\n"), legend.position = "right")
166 ggsave(filename = "bandwidth-comparison-measured-votes.png",
167   width = 8, height = 5, dpi = 150)
168 write.csv(measured_category_votes, "measured_category_votes-temp.csv",
169   quote = FALSE, row.names = FALSE)