Use identifiers when manipulating weights instead of anonymous tuples
[compass/delber.git] / compass.py
bloba62eb98347f98b76b9962a8ab52fe47eebfc1e63
1 #!/usr/bin/env python
3 # This program is free software. It comes without any warranty, to
4 # the extent permitted by applicable law. You can redistribute it
5 # and/or modify it under the terms of the Do What The Fuck You Want
6 # To Public License, Version 2, as published by Sam Hocevar. See
7 # http://sam.zoy.org/wtfpl/COPYING for more details.
9 FAST_EXIT_BANDWIDTH_RATE = 95 * 125 * 1024 # 95 Mbit/s
10 FAST_EXIT_ADVERTISED_BANDWIDTH = 5000 * 1024 # 5000 kB/s
11 FAST_EXIT_PORTS = [80, 443, 554, 1755]
12 FAST_EXIT_MAX_PER_NETWORK = 2
14 ALMOST_FAST_EXIT_BANDWIDTH_RATE = 80 * 125 * 1024 # 80 Mbit/s
15 ALMOST_FAST_EXIT_ADVERTISED_BANDWIDTH = 2000 * 1024 # 2000 kB/s
16 ALMOST_FAST_EXIT_PORTS = [80, 443]
18 import json
19 import operator
20 import sys
21 import os
22 from optparse import OptionParser, OptionGroup
23 import urllib
24 import re
25 import itertools
27 class BaseFilter(object):
28 def accept(self, relay):
29 raise NotImplementedError("This isn't implemented by the subclass")
31 def load(self, relays):
32 return filter(self.accept, relays)
34 class RunningFilter(BaseFilter):
35 def accept(self, relay):
36 return relay['running']
38 class FamilyFilter(BaseFilter):
39 def __init__(self, family, all_relays):
40 self._family_fingerprint = None
41 self._family_nickname = None
42 self._family_relays = []
43 found_relay = None
44 for relay in all_relays:
45 if len(family) == 40 and relay['fingerprint'] == family:
46 found_relay = relay
47 break
48 if len(family) < 20 and 'Named' in relay['flags'] and relay['nickname'] == family:
49 found_relay = relay
50 break
51 if found_relay:
52 self._family_fingerprint = '$%s' % found_relay['fingerprint']
53 if 'Named' in found_relay['flags']:
54 self._family_nickname = found_relay['nickname']
55 self._family_relays = [self._family_fingerprint] + found_relay.get('family', [])
57 def accept(self, relay):
58 fingerprint = '$%s' % relay['fingerprint']
59 mentions = [fingerprint] + relay.get('family', [])
60 # Only show families as accepted by consensus (mutually listed relays)
61 listed = fingerprint in self._family_relays
62 listed = listed or 'Named' in relay['flags'] and relay['nickname'] in self._family_relays
63 mentioned = self._family_fingerprint in mentions
64 mentioned = mentioned or self._family_nickname in mentions
65 if listed and mentioned:
66 return True
67 return False
69 class CountryFilter(BaseFilter):
70 def __init__(self, countries=[]):
71 self._countries = [x.lower() for x in countries]
73 def accept(self, relay):
74 return relay.get('country', None) in self._countries
76 class ASFilter(BaseFilter):
77 def __init__(self, as_sets=[]):
78 self._as_sets = [x if not x.isdigit() else "AS" + x for x in as_sets]
80 def accept(self, relay):
81 return relay.get('as_number', None) in self._as_sets
83 class ExitFilter(BaseFilter):
84 def accept(self, relay):
85 return relay.get('exit_probability', -1) > 0.0
87 class GuardFilter(BaseFilter):
88 def accept(self, relay):
89 return relay.get('guard_probability', -1) > 0.0
91 class FastExitFilter(BaseFilter):
92 class Relay(object):
93 def __init__(self, relay):
94 self.exit = relay.get('exit_probability')
95 self.fp = relay.get('fingerprint')
96 self.relay = relay
98 def __init__(self, bandwidth_rate=FAST_EXIT_BANDWIDTH_RATE,
99 advertised_bandwidth=FAST_EXIT_ADVERTISED_BANDWIDTH,
100 ports=FAST_EXIT_PORTS):
101 self.bandwidth_rate = bandwidth_rate
102 self.advertised_bandwidth = advertised_bandwidth
103 self.ports = ports
105 def load(self, all_relays):
106 # First, filter relays based on bandwidth and port requirements.
107 matching_relays = []
108 for relay in all_relays:
109 if relay.get('bandwidth_rate', -1) < self.bandwidth_rate:
110 continue
111 if relay.get('advertised_bandwidth', -1) < self.advertised_bandwidth:
112 continue
113 relevant_ports = set(self.ports)
114 summary = relay.get('exit_policy_summary', {})
115 if 'accept' in summary:
116 portlist = summary['accept']
117 elif 'reject' in summary:
118 portlist = summary['reject']
119 else:
120 continue
121 ports = []
122 for p in portlist:
123 if '-' in p:
124 ports.extend(range(int(p.split('-')[0]),
125 int(p.split('-')[1]) + 1))
126 else:
127 ports.append(int(p))
128 policy_ports = set(ports)
129 if 'accept' in summary and not relevant_ports.issubset(policy_ports):
130 continue
131 if 'reject' in summary and not relevant_ports.isdisjoint(policy_ports):
132 continue
133 matching_relays.append(relay)
134 return matching_relays
136 class SameNetworkFilter(BaseFilter):
137 def __init__(self, orig_filter, max_per_network=FAST_EXIT_MAX_PER_NETWORK):
138 self.orig_filter = orig_filter
139 self.max_per_network = max_per_network
141 def load(self, all_relays):
142 network_data = {}
143 for relay in self.orig_filter.load(all_relays):
144 or_addresses = relay.get("or_addresses")
145 no_of_addresses = 0
146 for ip in or_addresses:
147 ip, port = ip.rsplit(':', 1)
148 # skip if ipv6
149 if ':' in ip:
150 continue
151 no_of_addresses += 1
152 if no_of_addresses > 1:
153 print "[WARNING] - %s has more than one IPv4 OR address - %s" % relay.get("fingerprint"), or_addresses
154 network = ip.rsplit('.', 1)[0]
155 if network_data.has_key(network):
156 if len(network_data[network]) >= FAST_EXIT_MAX_PER_NETWORK:
157 # assume current relay to have smallest exit_probability
158 min_exit = relay.get('exit_probability')
159 min_id = -1
160 for id, value in enumerate(network_data[network]):
161 if value.get('exit_probability') < min_exit:
162 min_exit = value.get('exit_probability')
163 min_id = id
164 if min_id != -1:
165 del network_data[network][min_id]
166 network_data[network].append(relay)
167 else:
168 network_data[network].append(relay)
169 else:
170 network_data[network] = [relay]
171 return list(itertools.chain.from_iterable(network_data.values()))
173 class InverseFilter(BaseFilter):
174 def __init__(self, orig_filter):
175 self.orig_filter = orig_filter
177 def load(self, all_relays):
178 matching_relays = self.orig_filter.load(all_relays)
179 inverse_relays = []
180 for relay in all_relays:
181 if relay not in matching_relays:
182 inverse_relays.append(relay)
183 return inverse_relays
185 class RelayStats(object):
186 def __init__(self, options):
187 self._data = None
188 self._filters = self._create_filters(options)
189 self._get_group = self._get_group_function(options)
190 self._relays = None
192 @property
193 def data(self):
194 if not self._data:
195 self._data = json.load(file(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'details.json')))
196 return self._data
198 @property
199 def relays(self):
200 if self._relays:
201 return self._relays
202 self._relays = {}
203 relays = self.data['relays']
204 for f in self._filters:
205 relays = f.load(relays)
206 for relay in relays:
207 self.add_relay(relay)
208 return self._relays
210 def _create_filters(self, options):
211 filters = []
212 if not options.inactive:
213 filters.append(RunningFilter())
214 if options.family:
215 filters.append(FamilyFilter(options.family, self.data['relays']))
216 if options.country:
217 filters.append(CountryFilter(options.country))
218 if options.ases:
219 filters.append(ASFilter(options.ases))
220 if options.exits_only:
221 filters.append(ExitFilter())
222 if options.guards_only:
223 filters.append(GuardFilter())
224 if options.fast_exits_only:
225 filters.append(SameNetworkFilter(FastExitFilter()))
226 if options.almost_fast_exits_only:
227 filters.append(
228 FastExitFilter(ALMOST_FAST_EXIT_BANDWIDTH_RATE, ALMOST_FAST_EXIT_ADVERTISED_BANDWIDTH,
229 ALMOST_FAST_EXIT_PORTS))
230 filters.append(
231 InverseFilter(SameNetworkFilter(FastExitFilter())))
232 if options.fast_exits_only_any_network:
233 filters.append(FastExitFilter())
234 return filters
236 def _get_group_function(self, options):
237 if options.by_country and options.by_as:
238 return lambda relay: (relay.get('country', None), relay.get('as_number', None))
239 elif options.by_country:
240 return lambda relay: relay.get('country', None)
241 elif options.by_as:
242 return lambda relay: relay.get('as_number', None)
243 else:
244 return lambda relay: relay.get('fingerprint')
246 def add_relay(self, relay):
247 key = self._get_group(relay)
248 if key not in self._relays:
249 self._relays[key] = []
250 self._relays[key].append(relay)
252 WEIGHTS = ['consensus_weight_fraction', 'advertised_bandwidth_fraction', 'guard_probability', 'middle_probability', 'exit_probability']
254 def format_and_sort_groups(self, grouped_relays, country=None, ases=None, by_country=False, by_as_number=False, links=False):
255 formatted_groups = {}
256 for group in grouped_relays.values():
257 group_weights = dict.fromkeys(RelayStats.WEIGHTS, 0)
258 relays_in_group, exits_in_group, guards_in_group = 0, 0, 0
259 ases_in_group = set()
260 for relay in group:
261 for weight in RelayStats.WEIGHTS:
262 group_weights[weight] += relay.get(weight, 0)
263 nickname = relay['nickname']
264 fingerprint = relay['fingerprint'] if not links else "https://atlas.torproject.org/#details/%s" % relay['fingerprint']
265 if 'Exit' in set(relay['flags']) and not 'BadExit' in set(relay['flags']):
266 exit = 'Exit'
267 exits_in_group += 1
268 else:
269 exit = '-'
270 if 'Guard' in set(relay['flags']):
271 guard = 'Guard'
272 guards_in_group += 1
273 else:
274 guard = '-'
275 country = relay.get('country', '??')
276 as_number = relay.get('as_number', '??')
277 as_name = relay.get('as_name', '??')
278 as_info = "%s %s" %(as_number, as_name)
279 ases_in_group.add(as_info)
280 relays_in_group += 1
281 if by_country or by_as_number:
282 nickname = "*"
283 fingerprint = "(%d relays)" % relays_in_group
284 exit = "(%d)" % exits_in_group
285 guard = "(%d)" % guards_in_group
286 if not by_as_number and not ases:
287 as_info = "(%s)" % len(ases_in_group)
288 if not by_country and not country:
289 country = "*"
290 if links:
291 format_string = "%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% %-19s %-78s %-5s %-5s %-2s %-9s"
292 else:
293 format_string = "%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% %-19s %-40s %-5s %-5s %-2s %-9s"
294 formatted_group = format_string % (
295 group_weights['consensus_weight_fraction'] * 100.0,
296 group_weights['advertised_bandwidth_fraction'] * 100.0,
297 group_weights['guard_probability'] * 100.0,
298 group_weights['middle_probability'] * 100.0,
299 group_weights['exit_probability'] * 100.0,
300 nickname, fingerprint,
301 exit, guard, country, as_info)
302 formatted_groups[formatted_group] = group_weights
303 sorted_groups = sorted(formatted_groups.iteritems(), key=lambda gs: gs[1]['consensus_weight_fraction'])
304 sorted_groups.reverse()
305 return sorted_groups
307 def print_groups(self, sorted_groups, count=10, by_country=False, by_as_number=False, short=False, links=False):
308 output_string = []
309 if links:
310 output_string.append(" CW adv_bw P_guard P_middle P_exit Nickname Link Exit Guard CC Autonomous System"[:short])
311 else:
312 output_string.append(" CW adv_bw P_guard P_middle P_exit Nickname Fingerprint Exit Guard CC Autonomous System"[:short])
313 if count < 0: count = len(sorted_groups)
314 for formatted_group, weight in sorted_groups[:count]:
315 output_string.append(formatted_group[:short])
316 if len(sorted_groups) > count:
317 if by_country and by_as_number:
318 type = "countries and ASes"
319 elif by_country:
320 type = "countries"
321 elif by_as_number:
322 type = "ASes"
323 else:
324 type = "relays"
325 other_weights = dict.fromkeys(RelayStats.WEIGHTS, 0)
326 for _, weights in sorted_groups[count:]:
327 for weight in RelayStats.WEIGHTS:
328 other_weights[weight] += weights[weight]
329 output_string.append("%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% (%d other %s)" % (
330 other_weights['consensus_weight_fraction'] * 100.0,
331 other_weights['advertised_bandwidth_fraction'] * 100.0,
332 other_weights['guard_probability'] * 100.0,
333 other_weights['middle_probability'] * 100.0,
334 other_weights['exit_probability'] * 100.0,
335 len(sorted_groups) - count, type))
336 selection_weights = dict.fromkeys(RelayStats.WEIGHTS, 0)
337 for _, weights in sorted_groups:
338 for weight in RelayStats.WEIGHTS:
339 selection_weights[weight] += weights[weight]
340 if len(sorted_groups) > 1 and selection_weights['consensus_weight_fraction'] < 0.999:
341 output_string.append("%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% (total in selection)" % (
342 selection_weights['consensus_weight_fraction'] * 100.0,
343 selection_weights['advertised_bandwidth_fraction'] * 100.0,
344 selection_weights['guard_probability'] * 100.0,
345 selection_weights['middle_probability'] * 100.0,
346 selection_weights['exit_probability'] * 100.0))
347 return output_string
349 def create_option_parser():
350 parser = OptionParser()
351 parser.add_option("-d", "--download", action="store_true",
352 help="download details.json from Onionoo service")
353 group = OptionGroup(parser, "Filtering options")
354 group.add_option("-i", "--inactive", action="store_true", default=False,
355 help="include relays in selection that aren't currently running")
356 group.add_option("-a", "--as", dest="ases", action="append",
357 help="select only relays from autonomous system number AS",
358 metavar="AS")
359 group.add_option("-c", "--country", action="append",
360 help="select only relays from country with code CC", metavar="CC")
361 group.add_option("-e", "--exits-only", action="store_true",
362 help="select only relays suitable for exit position")
363 group.add_option("-f", "--family", action="store", type="string", metavar="RELAY",
364 help="select family by fingerprint or nickname (for named relays)")
365 group.add_option("-g", "--guards-only", action="store_true",
366 help="select only relays suitable for guard position")
367 group.add_option("--fast-exits-only", action="store_true",
368 help="select only fast exits (%d+ Mbit/s, %d+ KB/s, %s, %d- per /24)" %
369 (FAST_EXIT_BANDWIDTH_RATE / (125 * 1024),
370 FAST_EXIT_ADVERTISED_BANDWIDTH / 1024,
371 '/'.join(map(str, FAST_EXIT_PORTS)),
372 FAST_EXIT_MAX_PER_NETWORK))
373 group.add_option("--almost-fast-exits-only", action="store_true",
374 help="select only almost fast exits (%d+ Mbit/s, %d+ KB/s, %s, not in set of fast exits)" %
375 (ALMOST_FAST_EXIT_BANDWIDTH_RATE / (125 * 1024),
376 ALMOST_FAST_EXIT_ADVERTISED_BANDWIDTH / 1024,
377 '/'.join(map(str, ALMOST_FAST_EXIT_PORTS))))
378 group.add_option("--fast-exits-only-any-network", action="store_true",
379 help="select only fast exits without network restriction (%d+ Mbit/s, %d+ KB/s, %s)" %
380 (FAST_EXIT_BANDWIDTH_RATE / (125 * 1024),
381 FAST_EXIT_ADVERTISED_BANDWIDTH / 1024,
382 '/'.join(map(str, FAST_EXIT_PORTS))))
383 parser.add_option_group(group)
384 group = OptionGroup(parser, "Grouping options")
385 group.add_option("-A", "--by-as", action="store_true", default=False,
386 help="group relays by AS")
387 group.add_option("-C", "--by-country", action="store_true", default=False,
388 help="group relays by country")
389 parser.add_option_group(group)
390 group = OptionGroup(parser, "Display options")
391 group.add_option("-l", "--links", action="store_true",
392 help="display links to the Atlas service instead of fingerprints")
393 group.add_option("-t", "--top", type="int", default=10, metavar="NUM",
394 help="display only the top results (default: %default; -1 for all)")
395 group.add_option("-s", "--short", action="store_true",
396 help="cut the length of the line output at 70 chars")
397 parser.add_option_group(group)
398 return parser
400 def download_details_file():
401 url = urllib.urlopen('https://onionoo.torproject.org/details?type=relay')
402 details_file = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'details.json'), 'w')
403 details_file.write(url.read())
404 url.close()
405 details_file.close()
407 if '__main__' == __name__:
408 parser = create_option_parser()
409 (options, args) = parser.parse_args()
410 if len(args) > 0:
411 parser.error("Did not understand positional argument(s), use options instead.")
412 if options.family and not re.match(r'^[A-F0-9]{40}$', options.family) and not re.match(r'^[A-Za-z0-9]{1,19}$', options.family):
413 parser.error("Not a valid fingerprint or nickname: %s" % options.family)
414 fast_exit_options = 0
415 if options.fast_exits_only: fast_exit_options += 1
416 if options.almost_fast_exits_only: fast_exit_options += 1
417 if options.fast_exits_only_any_network: fast_exit_options += 1
418 if fast_exit_options > 1:
419 parser.error("Can only filter by one fast-exit option.")
420 if options.download:
421 download_details_file()
422 print "Downloaded details.json. Re-run without --download option."
423 exit()
424 if not os.path.exists(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'details.json')):
425 parser.error("Did not find details.json. Re-run with --download.")
426 stats = RelayStats(options)
427 sorted_groups = stats.format_and_sort_groups(stats.relays,
428 country=options.country,
429 ases=options.ases,
430 by_country=options.by_country,
431 by_as_number=options.by_as,
432 links=options.links)
433 output_string = stats.print_groups(sorted_groups, options.top,
434 by_country=options.by_country,
435 by_as_number=options.by_as,
436 short=70 if options.short else None,
437 links=options.links)
438 print '\n'.join(output_string)