Set options based on request args
[compass/delber.git] / compass.py
blob428812fe7ffde0ddef73c457d81e329f1b079da3
1 #!/usr/bin/env python
3 # This program is free software. It comes without any warranty, to
4 # the extent permitted by applicable law. You can redistribute it
5 # and/or modify it under the terms of the Do What The Fuck You Want
6 # To Public License, Version 2, as published by Sam Hocevar. See
7 # http://sam.zoy.org/wtfpl/COPYING for more details.
9 import json
10 import operator
11 import sys
12 import os
13 from optparse import OptionParser, OptionGroup
14 import urllib
15 import re
16 from abc import abstractmethod
18 class BaseFilter(object):
19 @abstractmethod
20 def accept(self, relay):
21 pass
23 def load(self, relays):
24 return filter(self.accept, relays)
26 class RunningFilter(BaseFilter):
27 def accept(self, relay):
28 return relay['running']
30 class FamilyFilter(BaseFilter):
31 def __init__(self, family, all_relays):
32 self._family_fingerprint = None
33 self._family_nickname = None
34 self._family_relays = []
35 found_relay = None
36 for relay in all_relays:
37 if len(family) == 40 and relay['fingerprint'] == family:
38 found_relay = relay
39 break
40 if len(family) < 20 and 'Named' in relay['flags'] and relay['nickname'] == family:
41 found_relay = relay
42 break
43 if found_relay:
44 self._family_fingerprint = '$%s' % found_relay['fingerprint']
45 if 'Named' in found_relay['flags']:
46 self._family_nickname = found_relay['nickname']
47 self._family_relays = [self._family_fingerprint] + found_relay.get('family', [])
49 def accept(self, relay):
50 fingerprint = '$%s' % relay['fingerprint']
51 mentions = [fingerprint] + relay.get('family', [])
52 # Only show families as accepted by consensus (mutually listed relays)
53 listed = fingerprint in self._family_relays
54 listed = listed or 'Named' in relay['flags'] and relay['nickname'] in self._family_relays
55 mentioned = self._family_fingerprint in mentions
56 mentioned = mentioned or self._family_nickname in mentions
57 if listed and mentioned:
58 return True
59 return False
61 class CountryFilter(BaseFilter):
62 def __init__(self, countries=[]):
63 self._countries = [x.lower() for x in countries]
65 def accept(self, relay):
66 return relay.get('country', None) in self._countries
68 class ASFilter(BaseFilter):
69 def __init__(self, as_sets=[]):
70 self._as_sets = [x if not x.isdigit() else "AS" + x for x in as_sets]
72 def accept(self, relay):
73 return relay.get('as_number', None) in self._as_sets
75 class ExitFilter(BaseFilter):
76 def accept(self, relay):
77 return relay.get('exit_probability', -1) > 0.0
79 class GuardFilter(BaseFilter):
80 def accept(self, relay):
81 return relay.get('guard_probability', -1) > 0.0
83 class FastExitFilter(BaseFilter):
84 class Relay(object):
85 def __init__(self, relay):
86 self.exit = relay.get('exit_probability')
87 self.fp = relay.get('fingerprint')
88 self.relay = relay
90 def __init__(self, bandwidth_rate, advertised_bandwidth, ports, same_network, inverse=False):
91 self.bandwidth_rate = bandwidth_rate
92 self.advertised_bandwidth = advertised_bandwidth
93 self.ports = ports
94 self.same_network = same_network
95 self.inverse = inverse
97 def load(self, all_relays):
98 # First, filter relays based on bandwidth and port requirements.
99 matching_relays = []
100 for relay in all_relays:
101 if relay.get('bandwidth_rate', -1) < self.bandwidth_rate:
102 continue
103 if relay.get('advertised_bandwidth', -1) < self.advertised_bandwidth:
104 continue
105 relevant_ports = set(self.ports)
106 summary = relay.get('exit_policy_summary', {})
107 if 'accept' in summary:
108 portlist = summary['accept']
109 elif 'reject' in summary:
110 portlist = summary['reject']
111 else:
112 continue
113 ports = []
114 for p in portlist:
115 if '-' in p:
116 ports.extend(range(int(p.split('-')[0]),
117 int(p.split('-')[1]) + 1))
118 else:
119 ports.append(int(p))
120 policy_ports = set(ports)
121 if 'accept' in summary and not relevant_ports.issubset(policy_ports):
122 continue
123 if 'reject' in summary and not relevant_ports.isdisjoint(policy_ports):
124 continue
125 matching_relays.append(relay)
126 # Second, filter relays based on same /24 requirement.
127 if self.same_network:
128 network_data = {}
129 for relay in matching_relays:
130 or_addresses = relay.get("or_addresses")
131 no_of_addresses = 0
132 for ip in or_addresses:
133 ip, port = ip.rsplit(':', 1)
134 # skip if ipv6
135 if ':' in ip:
136 continue
137 no_of_addresses += 1
138 if no_of_addresses > 1:
139 print "[WARNING] - %s has more than one IPv4 OR address - %s" % relay.get("fingerprint"), or_addresses
140 network = ip.rsplit('.', 1)[0]
141 relay_info = self.Relay(relay)
142 if network_data.has_key(network):
143 if len(network_data[network]) > 1:
144 # assume current relay to have smallest exit_probability
145 min_exit = relay.get('exit_probability')
146 min_id = -1
147 for id, value in enumerate(network_data[network]):
148 if value.exit < min_exit:
149 min_exit = value.exit
150 min_id = id
151 if min_id != -1:
152 del network_data[network][min_id]
153 network_data[network].append(relay_info)
154 else:
155 network_data[network].append(relay_info)
156 else:
157 network_data[network] = [relay_info]
158 matching_relays = []
159 for relay_list in network_data.values():
160 matching_relays.extend([relay.relay for relay in relay_list])
161 # Either return relays meeting all requirements, or the inverse set.
162 if self.inverse:
163 inverse_relays = []
164 for relay in all_relays:
165 if relay not in matching_relays:
166 inverse_relays.append(relay)
167 return inverse_relays
168 else:
169 return matching_relays
171 class RelayStats(object):
172 def __init__(self, options):
173 self._data = None
174 self._filters = self._create_filters(options)
175 self._get_group = self._get_group_function(options)
176 self._relays = None
178 @property
179 def data(self):
180 if not self._data:
181 self._data = json.load(file(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'details.json')))
182 return self._data
184 @property
185 def relays(self):
186 if self._relays:
187 return self._relays
188 self._relays = {}
189 relays = self.data['relays']
190 for f in self._filters:
191 relays = f.load(relays)
192 for relay in relays:
193 self.add_relay(relay)
194 return self._relays
196 def _create_filters(self, options):
197 filters = []
198 if not options.inactive:
199 filters.append(RunningFilter())
200 if options.family:
201 filters.append(FamilyFilter(options.family, self.data['relays']))
202 if options.country:
203 filters.append(CountryFilter(options.country))
204 if options.ases:
205 filters.append(ASFilter(options.ases))
206 if options.exits_only:
207 filters.append(ExitFilter())
208 if options.guards_only:
209 filters.append(GuardFilter())
210 if options.fast_exits_only:
211 filters.append(FastExitFilter(95 * 125 * 1024, 5000 * 1024, [80, 443, 554, 1755], True, False))
212 if options.almost_fast_exits_only:
213 filters.append(FastExitFilter(80 * 125 * 1024, 2000 * 1024, [80, 443], False, False))
214 filters.append(FastExitFilter(95 * 125 * 1024, 5000 * 1024, [80, 443, 554, 1755], True, True))
215 if options.fast_exits_only_any_network:
216 filters.append(FastExitFilter(95 * 125 * 1024, 5000 * 1024, [80, 443, 554, 1755], False, False))
217 return filters
219 def _get_group_function(self, options):
220 if options.by_country and options.by_as:
221 return lambda relay: (relay.get('country', None), relay.get('as_number', None))
222 elif options.by_country:
223 return lambda relay: relay.get('country', None)
224 elif options.by_as:
225 return lambda relay: relay.get('as_number', None)
226 else:
227 return lambda relay: relay.get('fingerprint')
229 def add_relay(self, relay):
230 key = self._get_group(relay)
231 if key not in self._relays:
232 self._relays[key] = []
233 self._relays[key].append(relay)
235 def format_and_sort_groups(self, grouped_relays, country=None, ases=None, by_country=False, by_as_number=False, links=False):
236 formatted_groups = {}
237 for group in grouped_relays.values():
238 group_weights = (0, 0, 0, 0, 0)
239 relays_in_group, exits_in_group, guards_in_group = 0, 0, 0
240 ases_in_group = set()
241 for relay in group:
242 weights = (relay.get('consensus_weight_fraction', 0),
243 relay.get('advertised_bandwidth_fraction', 0),
244 relay.get('guard_probability', 0),
245 relay.get('middle_probability', 0),
246 relay.get('exit_probability', 0))
247 group_weights = tuple(sum(x) for x in zip(group_weights, weights))
248 nickname = relay['nickname']
249 fingerprint = relay['fingerprint'] if not links else "https://atlas.torproject.org/#details/%s" % relay['fingerprint']
250 if 'Exit' in set(relay['flags']) and not 'BadExit' in set(relay['flags']):
251 exit = 'Exit'
252 exits_in_group += 1
253 else:
254 exit = '-'
255 if 'Guard' in set(relay['flags']):
256 guard = 'Guard'
257 guards_in_group += 1
258 else:
259 guard = '-'
260 country = relay.get('country', '??')
261 as_number = relay.get('as_number', '??')
262 as_name = relay.get('as_name', '??')
263 as_info = "%s %s" %(as_number, as_name)
264 ases_in_group.add(as_info)
265 relays_in_group += 1
266 if by_country or by_as_number:
267 nickname = "*"
268 fingerprint = "(%d relays)" % relays_in_group
269 exit = "(%d)" % exits_in_group
270 guard = "(%d)" % guards_in_group
271 if not by_as_number and not ases:
272 as_info = "(%s)" % len(ases_in_group)
273 if not by_country and not country:
274 country = "*"
275 if links:
276 format_string = "%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% %-19s %-78s %-5s %-5s %-2s %-9s"
277 else:
278 format_string = "%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% %-19s %-40s %-5s %-5s %-2s %-9s"
279 formatted_group = format_string % (
280 group_weights[0] * 100.0,
281 group_weights[1] * 100.0,
282 group_weights[2] * 100.0,
283 group_weights[3] * 100.0,
284 group_weights[4] * 100.0,
285 nickname, fingerprint,
286 exit, guard, country, as_info)
287 formatted_groups[formatted_group] = group_weights
288 sorted_groups = sorted(formatted_groups.iteritems(), key=operator.itemgetter(1))
289 sorted_groups.reverse()
290 return sorted_groups
292 def print_groups(self, sorted_groups, count=10, by_country=False, by_as_number=False, short=False, links=False):
293 output_string = []
294 if links:
295 output_string.append(" CW adv_bw P_guard P_middle P_exit Nickname Link Exit Guard CC Autonomous System"[:short])
296 else:
297 output_string.append(" CW adv_bw P_guard P_middle P_exit Nickname Fingerprint Exit Guard CC Autonomous System"[:short])
298 if count < 0: count = len(sorted_groups)
299 for formatted_group, weight in sorted_groups[:count]:
300 output_string.append(formatted_group[:short])
301 if len(sorted_groups) > count:
302 if by_country and by_as_number:
303 type = "countries and ASes"
304 elif by_country:
305 type = "countries"
306 elif by_as_number:
307 type = "ASes"
308 else:
309 type = "relays"
310 other_weights = (0, 0, 0, 0, 0)
311 for _, weights in sorted_groups[count:]:
312 other_weights = tuple(sum(x) for x in zip(other_weights, weights))
313 output_string.append("%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% (%d other %s)" % (
314 other_weights[0] * 100.0, other_weights[1] * 100.0,
315 other_weights[2] * 100.0, other_weights[3] * 100.0,
316 other_weights[4] * 100.0, len(sorted_groups) - count, type))
317 selection_weights = (0, 0, 0, 0, 0)
318 for _, weights in sorted_groups:
319 selection_weights = tuple(sum(x) for x in zip(selection_weights, weights))
320 if len(sorted_groups) > 1 and selection_weights[0] < 0.999:
321 output_string.append("%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% (total in selection)" % (
322 selection_weights[0] * 100.0, selection_weights[1] * 100.0,
323 selection_weights[2] * 100.0, selection_weights[3] * 100.0,
324 selection_weights[4] * 100.0))
325 return output_string
327 def create_option_parser():
328 parser = OptionParser()
329 parser.add_option("-d", "--download", action="store_true",
330 help="download details.json from Onionoo service")
331 group = OptionGroup(parser, "Filtering options")
332 group.add_option("-i", "--inactive", action="store_true", default=False,
333 help="include relays in selection that aren't currently running")
334 group.add_option("-a", "--as", dest="ases", action="append",
335 help="select only relays from autonomous system number AS",
336 metavar="AS")
337 group.add_option("-c", "--country", action="append",
338 help="select only relays from country with code CC", metavar="CC")
339 group.add_option("-e", "--exits-only", action="store_true",
340 help="select only relays suitable for exit position")
341 group.add_option("-f", "--family", action="store", type="string", metavar="RELAY",
342 help="select family by fingerprint or nickname (for named relays)")
343 group.add_option("-g", "--guards-only", action="store_true",
344 help="select only relays suitable for guard position")
345 group.add_option("--fast-exits-only", action="store_true",
346 help="select only fast exits (95+ Mbit/s, 5000+ KB/s, 80/443/554/1755, 2- per /24)")
347 group.add_option("--almost-fast-exits-only", action="store_true",
348 help="select only almost fast exits (80+ Mbit/s, 2000+ KB/s, 80/443, not in set of fast exits)")
349 group.add_option("--fast-exits-only-any-network", action="store_true",
350 help="select only fast exits without network restriction (95+ Mbit/s, 5000+ KB/s, 80/443/554/1755")
351 parser.add_option_group(group)
352 group = OptionGroup(parser, "Grouping options")
353 group.add_option("-A", "--by-as", action="store_true", default=False,
354 help="group relays by AS")
355 group.add_option("-C", "--by-country", action="store_true", default=False,
356 help="group relays by country")
357 parser.add_option_group(group)
358 group = OptionGroup(parser, "Display options")
359 group.add_option("-l", "--links", action="store_true",
360 help="display links to the Atlas service instead of fingerprints")
361 group.add_option("-t", "--top", type="int", default=10, metavar="NUM",
362 help="display only the top results (default: %default; -1 for all)")
363 group.add_option("-s", "--short", action="store_true",
364 help="cut the length of the line output at 70 chars")
365 parser.add_option_group(group)
366 return parser
368 def download_details_file():
369 url = urllib.urlopen('https://onionoo.torproject.org/details?type=relay')
370 details_file = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'details.json'), 'w')
371 details_file.write(url.read())
372 url.close()
373 details_file.close()
375 if '__main__' == __name__:
376 parser = create_option_parser()
377 (options, args) = parser.parse_args()
378 if len(args) > 0:
379 parser.error("Did not understand positional argument(s), use options instead.")
380 if options.family and not re.match(r'^[A-F0-9]{40}$', options.family) and not re.match(r'^[A-Za-z0-9]{1,19}$', options.family):
381 parser.error("Not a valid fingerprint or nickname: %s" % options.family)
382 fast_exit_options = 0
383 if options.fast_exits_only: fast_exit_options += 1
384 if options.almost_fast_exits_only: fast_exit_options += 1
385 if options.fast_exits_only_any_network: fast_exit_options += 1
386 if fast_exit_options > 1:
387 parser.error("Can only filter by one fast-exit option.")
388 if options.download:
389 download_details_file()
390 print "Downloaded details.json. Re-run without --download option."
391 exit()
392 if not os.path.exists(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'details.json')):
393 parser.error("Did not find details.json. Re-run with --download.")
394 stats = RelayStats(options)
395 sorted_groups = stats.format_and_sort_groups(stats.relays,
396 country=options.country,
397 ases=options.ases,
398 by_country=options.by_country,
399 by_as_number=options.by_as,
400 links=options.links)
401 output_string = stats.print_groups(sorted_groups, options.top,
402 by_country=options.by_country,
403 by_as_number=options.by_as,
404 short=70 if options.short else None,
405 links=options.links)
406 print '\n'.join(output_string)