task-6329: move OptionParser creation to its own function
[tor-metrics-tasks/delber.git] / task-6329 / tor-relays-stats.py
blobd966f66b8aa95b67b6440085c2dcb5a0f8759e1a
1 #!/usr/bin/env python
3 # This program is free software. It comes without any warranty, to
4 # the extent permitted by applicable law. You can redistribute it
5 # and/or modify it under the terms of the Do What The Fuck You Want
6 # To Public License, Version 2, as published by Sam Hocevar. See
7 # http://sam.zoy.org/wtfpl/COPYING for more details.
9 import json
10 import operator
11 import sys
12 import os.path
13 from optparse import OptionParser, OptionGroup
14 import urllib
15 import re
16 from abc import abstractmethod
18 class BaseFilter(object):
19 @abstractmethod
20 def accept(self, relay):
21 pass
23 class RunningFilter(BaseFilter):
24 def accept(self, relay):
25 return relay['running']
27 class FamilyFilter(BaseFilter):
28 def __init__(self, family, all_relays):
29 self._family_fingerprint = None
30 self._family_nickname = None
31 self._family_relays = []
32 found_relay = None
33 for relay in all_relays:
34 if len(family) == 40 and relay['fingerprint'] == family:
35 found_relay = relay
36 break
37 if len(family) < 20 and 'Named' in relay['flags'] and relay['nickname'] == family:
38 found_relay = relay
39 break
40 if found_relay:
41 self._family_fingerprint = '$%s' % found_relay['fingerprint']
42 if 'Named' in found_relay['flags']:
43 self._family_nickname = found_relay['nickname']
44 self._family_relays = [self._family_fingerprint] + found_relay.get('family', [])
46 def accept(self, relay):
47 fingerprint = '$%s' % relay['fingerprint']
48 mentions = [fingerprint] + relay.get('family', [])
49 if fingerprint in self._family_relays:
50 return True
51 if 'Named' in relay['flags'] and relay['nickname'] in self._family_relays:
52 return True
53 if self._family_fingerprint in mentions:
54 return True
55 if self._family_nickname in mentions:
56 return True
57 return False
59 class CountryFilter(BaseFilter):
60 def __init__(self, countries=[]):
61 self._countries = [x.lower() for x in countries]
63 def accept(self, relay):
64 return relay.get('country', None) in self._countries
66 class ASFilter(BaseFilter):
67 def __init__(self, as_sets=[]):
68 self._as_sets = [x if not x.isdigit() else "AS" + x for x in as_sets]
70 def accept(self, relay):
71 return relay.get('as_number', None) in self._as_sets
73 class ExitFilter(BaseFilter):
74 def accept(self, relay):
75 return relay.get('exit_probability', -1) > 0.0
77 class GuardFilter(BaseFilter):
78 def accept(self, relay):
79 return relay.get('guard_probability', -1) > 0.0
81 class FastExitFilter(BaseFilter):
82 def accept(self, relay):
83 if relay.get('bandwidth_rate', -1) < 12500 * 1024:
84 return False
85 if relay.get('advertised_bandwidth', -1) < 5000 * 1024:
86 return False
87 relevant_ports = set([80, 443, 554, 1755])
88 summary = relay.get('exit_policy_summary', {})
89 if 'accept' in summary:
90 portlist = summary['accept']
91 elif 'reject' in summary:
92 portlist = summary['reject']
93 else:
94 return False
95 ports = []
96 for p in portlist:
97 if '-' in p:
98 ports.extend(range(int(p.split('-')[0]),
99 int(p.split('-')[1]) + 1))
100 else:
101 ports.append(int(p))
102 policy_ports = set(ports)
103 if 'accept' in summary and not relevant_ports.issubset(policy_ports):
104 return False
105 if 'reject' in summary and not relevant_ports.isdisjoint(policy_ports):
106 return False
107 return True
109 class RelayStats(object):
110 def __init__(self, options):
111 self._data = None
112 self._filters = self._create_filters(options)
113 self._get_group = self._get_group_function(options)
114 self._relays = None
116 @property
117 def data(self):
118 if not self._data:
119 self._data = json.load(file('details.json'))
120 return self._data
122 @property
123 def relays(self):
124 if self._relays:
125 return self._relays
127 self._relays = {}
128 for relay in self.data['relays']:
129 accepted = True
130 for f in self._filters:
131 if not f.accept(relay):
132 accepted = False
133 break
134 if accepted:
135 self.add_relay(relay)
136 return self._relays
138 def _create_filters(self, options):
139 filters = []
140 if not options.inactive:
141 filters.append(RunningFilter())
142 if options.family:
143 filters.append(FamilyFilter(options.family, self.data['relays']))
144 if options.country:
145 filters.append(CountryFilter(options.country))
146 if options.ases:
147 filters.append(ASFilter(options.ases))
148 if options.exits_only:
149 filters.append(ExitFilter())
150 if options.guards_only:
151 filters.append(GuardFilter())
152 if options.fast_exits_only:
153 filters.append(FastExitFilter())
154 return filters
156 def _get_group_function(self, options):
157 if options.by_country and options.by_as:
158 return lambda relay: (relay.get('country', None), relay.get('as_number', None))
159 elif options.by_country:
160 return lambda relay: relay.get('country', None)
161 elif options.by_as:
162 return lambda relay: relay.get('as_number', None)
163 else:
164 return lambda relay: relay.get('fingerprint')
166 def add_relay(self, relay):
167 key = self._get_group(relay)
168 if key not in self._relays:
169 self._relays[key] = []
170 self._relays[key].append(relay)
172 def format_and_sort_groups(self, grouped_relays, by_country=False, by_as_number=False, links=False):
173 formatted_groups = {}
174 for group in grouped_relays.values():
175 group_weights = (0, 0, 0, 0, 0)
176 relays_in_group = 0
177 for relay in group:
178 weights = (relay.get('consensus_weight_fraction', 0),
179 relay.get('advertised_bandwidth_fraction', 0),
180 relay.get('guard_probability', 0),
181 relay.get('middle_probability', 0),
182 relay.get('exit_probability', 0))
183 group_weights = tuple(sum(x) for x in zip(group_weights, weights))
184 nickname = relay['nickname']
185 fingerprint = relay['fingerprint'] if not links else "https://atlas.torproject.org/#details/%s" % relay['fingerprint']
186 exit = 'Exit' if 'Exit' in set(relay['flags']) else ''
187 guard = 'Guard' if 'Guard' in set(relay['flags']) else ''
188 country = relay.get('country', '')
189 as_number = relay.get('as_number', '')
190 as_name = relay.get('as_name', '')
191 relays_in_group += 1
192 if by_country or by_as_number:
193 nickname = "(%d relays)" % relays_in_group
194 fingerprint = "*"
195 exit = "*"
196 guard = "*"
197 if by_country and not by_as_number:
198 as_number = "*"
199 as_name = "*"
200 if by_as_number and not by_country:
201 country = "*"
202 if links:
203 format_string = "%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% %-19s %-78s %-4s %-5s %-2s %-9s %s"
204 else:
205 format_string = "%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% %-19s %-40s %-4s %-5s %-2s %-9s %s"
206 formatted_group = format_string % (
207 group_weights[0] * 100.0,
208 group_weights[1] * 100.0,
209 group_weights[2] * 100.0,
210 group_weights[3] * 100.0,
211 group_weights[4] * 100.0,
212 nickname, fingerprint,
213 exit, guard, country, as_number, as_name)
214 formatted_groups[formatted_group] = group_weights
215 sorted_groups = sorted(formatted_groups.iteritems(), key=operator.itemgetter(1))
216 sorted_groups.reverse()
217 return sorted_groups
219 def print_groups(self, sorted_groups, count=10, by_country=False, by_as_number=False, short=False, links=False):
220 if links:
221 print " CW adv_bw P_guard P_middle P_exit Nickname Link Exit Guard CC AS_num AS_name"[:short]
222 else:
223 print " CW adv_bw P_guard P_middle P_exit Nickname Fingerprint Exit Guard CC AS_num AS_name"[:short]
224 if count < 0: count = len(sorted_groups)
225 for formatted_group, weight in sorted_groups[:count]:
226 print formatted_group[:short]
227 if len(sorted_groups) > count:
228 if by_country and by_as_number:
229 type = "countries and ASes"
230 elif by_country:
231 type = "countries"
232 elif by_as_number:
233 type = "ASes"
234 else:
235 type = "relays"
236 other_weights = (0, 0, 0, 0, 0)
237 for _, weights in sorted_groups[count:]:
238 other_weights = tuple(sum(x) for x in zip(other_weights, weights))
239 print "%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% (%d other %s)" % (
240 other_weights[0] * 100.0, other_weights[1] * 100.0,
241 other_weights[2] * 100.0, other_weights[3] * 100.0,
242 other_weights[4] * 100.0, len(sorted_groups) - count, type)
243 selection_weights = (0, 0, 0, 0, 0)
244 for _, weights in sorted_groups:
245 selection_weights = tuple(sum(x) for x in zip(selection_weights, weights))
246 if len(sorted_groups) > 1 and selection_weights[0] < 0.999:
247 print "%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% (total in selection)" % (
248 selection_weights[0] * 100.0, selection_weights[1] * 100.0,
249 selection_weights[2] * 100.0, selection_weights[3] * 100.0,
250 selection_weights[4] * 100.0)
252 def create_option_parser():
253 parser = OptionParser()
254 parser.add_option("-d", "--download", action="store_true",
255 help="download details.json from Onionoo service")
256 group = OptionGroup(parser, "Filtering options")
257 group.add_option("-i", "--inactive", action="store_true", default=False,
258 help="include relays in selection that aren't currently running")
259 group.add_option("-a", "--as", dest="ases", action="append",
260 help="select only relays from autonomous system number AS",
261 metavar="AS")
262 group.add_option("-c", "--country", action="append",
263 help="select only relays from country with code CC", metavar="CC")
264 group.add_option("-e", "--exits-only", action="store_true",
265 help="select only relays suitable for exit position")
266 group.add_option("-f", "--family", action="store", type="string", metavar="RELAY",
267 help="select family by fingerprint or nickname (for named relays)")
268 group.add_option("-g", "--guards-only", action="store_true",
269 help="select only relays suitable for guard position")
270 group.add_option("-x", "--fast-exits-only", action="store_true",
271 help="select only 100+ Mbit/s exits allowing ports 80, 443, 554, and 1755")
272 parser.add_option_group(group)
273 group = OptionGroup(parser, "Grouping options")
274 group.add_option("-A", "--by-as", action="store_true", default=False,
275 help="group relays by AS")
276 group.add_option("-C", "--by-country", action="store_true", default=False,
277 help="group relays by country")
278 parser.add_option_group(group)
279 group = OptionGroup(parser, "Display options")
280 group.add_option("-l", "--links", action="store_true",
281 help="display links to the Atlas service instead of fingerprints")
282 group.add_option("-t", "--top", type="int", default=10, metavar="NUM",
283 help="display only the top results (default: %default; -1 for all)")
284 group.add_option("-s", "--short", action="store_true",
285 help="cut the length of the line output at 70 chars")
286 parser.add_option_group(group)
287 return parser
289 def download_details_file():
290 url = urllib.urlopen('https://onionoo.torproject.org/details?type=relay')
291 details_file = open("details.json", 'w')
292 details_file.write(url.read())
293 url.close()
294 details_file.close()
296 if '__main__' == __name__:
297 parser = create_option_parser()
298 (options, args) = parser.parse_args()
299 if len(args) > 0:
300 parser.error("Did not understand positional argument(s), use options instead.")
302 if options.family and not re.match(r'^[A-F0-9]{40}$', options.family) and not re.match(r'^[A-Za-z0-9]{1,19}$', options.family):
303 parser.error("Not a valid fingerprint or nickname: %s" % options.family)
304 if options.download:
305 download_details_file()
306 print "Downloaded details.json. Re-run without --download option."
307 exit()
309 if not os.path.exists('details.json'):
310 parser.error("Did not find details.json. Re-run with --download.")
312 stats = RelayStats(options)
313 sorted_groups = stats.format_and_sort_groups(stats.relays,
314 by_country=options.by_country,
315 by_as_number=options.by_as,
316 links=options.links)
317 stats.print_groups(sorted_groups, options.top,
318 by_country=options.by_country,
319 by_as_number=options.by_as,
320 short=70 if options.short else None,
321 links=options.links)