Define --top -1 as "all".
[tor-metrics-tasks/delber.git] / task-6329 / tor-relays-stats.py
blob8479dc330252a3ed6e3df60672766706e99e9d66
1 #!/usr/bin/env python
3 # This program is free software. It comes without any warranty, to
4 # the extent permitted by applicable law. You can redistribute it
5 # and/or modify it under the terms of the Do What The Fuck You Want
6 # To Public License, Version 2, as published by Sam Hocevar. See
7 # http://sam.zoy.org/wtfpl/COPYING for more details.
9 import json
10 import operator
11 import sys
12 import os.path
13 from optparse import OptionParser, OptionGroup
14 import urllib
16 class RelayStats(object):
17 def __init__(self):
18 self._data = None
20 @property
21 def data(self):
22 if not self._data:
23 self._data = json.load(file('details.json'))
24 return self._data
26 def get_relays(self, countries=[], as_sets=[], exits_only=False, guards_only=False, inactive=False, fast_exits_only=False):
27 relays = []
28 if countries:
29 countries = [x.lower() for x in countries]
30 if as_sets:
31 as_sets = [x if not x.isdigit() else "AS" + x for x in as_sets]
32 for relay in self.data['relays']:
33 if not inactive and inactive == relay['running']:
34 continue
35 if countries and not relay.get('country', ' ') in countries:
36 continue
37 if as_sets and not relay.get('as_number', ' ') in as_sets:
38 continue
39 if exits_only and not relay.get('exit_probability', -1) > 0.0:
40 continue
41 if guards_only and not relay.get('guard_probability', -1) > 0.0:
42 continue
43 if fast_exits_only:
44 if relay.get('bandwidth_rate', -1) < 12500 * 1024:
45 continue
46 if relay.get('advertised_bandwidth', -1) < 5000 * 1024:
47 continue
48 relevant_ports = set([80, 443, 554, 1755])
49 summary = relay.get('exit_policy_summary', {})
50 if 'accept' in summary:
51 portlist = summary['accept']
52 elif 'reject' in summary:
53 portlist = summary['reject']
54 else:
55 continue
56 ports = []
57 for p in portlist:
58 if '-' in p:
59 ports.extend(range(int(p.split('-')[0]),
60 int(p.split('-')[1]) + 1))
61 else:
62 ports.append(int(p))
63 policy_ports = set(ports)
64 if 'accept' in summary and not relevant_ports.issubset(policy_ports):
65 continue
66 if 'reject' in summary and not relevant_ports.isdisjoint(policy_ports):
67 continue
68 relays.append(relay)
69 return relays
71 def group_relays(self, relays, by_country=False, by_as_number=False):
72 grouped_relays = {}
73 for relay in relays:
74 if by_country and by_as_number:
75 key = (relay.get('country', None), relay.get('as_number', None))
76 elif by_country:
77 key = relay.get('country', None)
78 elif by_as_number:
79 key = relay.get('as_number', None)
80 else:
81 key = relay.get('fingerprint')
82 if key not in grouped_relays:
83 grouped_relays[key] = []
84 grouped_relays[key].append(relay)
85 return grouped_relays
87 def format_and_sort_groups(self, grouped_relays, by_country=False, by_as_number=False):
88 formatted_groups = {}
89 for group in grouped_relays.values():
90 group_weights = (0, 0, 0, 0, 0)
91 relays_in_group = 0
92 for relay in group:
93 weights = (relay.get('consensus_weight_fraction', 0),
94 relay.get('advertised_bandwidth_fraction', 0),
95 relay.get('guard_probability', 0),
96 relay.get('middle_probability', 0),
97 relay.get('exit_probability', 0))
98 group_weights = tuple(sum(x) for x in zip(group_weights, weights))
99 nickname = relay['nickname']
100 fingerprint = relay['fingerprint']
101 exit = 'Exit' if 'Exit' in set(relay['flags']) else ''
102 guard = 'Guard' if 'Guard' in set(relay['flags']) else ''
103 country = relay.get('country', '')
104 as_number = relay.get('as_number', '')
105 as_name = relay.get('as_name', '')
106 relays_in_group += 1
107 if by_country or by_as_number:
108 nickname = "(%d relays)" % relays_in_group
109 fingerprint = "*"
110 exit = "*"
111 guard = "*"
112 if by_country and not by_as_number:
113 as_number = "*"
114 as_name = "*"
115 if by_as_number and not by_country:
116 country = "*"
117 formatted_group = "%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% %-19s %-40s %-4s %-5s %-2s %-9s %s" % (
118 group_weights[0] * 100.0,
119 group_weights[1] * 100.0,
120 group_weights[2] * 100.0,
121 group_weights[3] * 100.0,
122 group_weights[4] * 100.0,
123 nickname, fingerprint,
124 exit, guard, country, as_number, as_name)
125 formatted_groups[formatted_group] = group_weights
126 sorted_groups = sorted(formatted_groups.iteritems(), key=operator.itemgetter(1))
127 sorted_groups.reverse()
128 return sorted_groups
130 def print_groups(self, sorted_groups, count=10, by_country=False, by_as_number=False, short=None):
131 print " CW adv_bw P_guard P_middle P_exit Nickname Fingerprint Exit Guard CC AS_num AS_name"[:short]
132 if count < 0: count = len(sorted_groups)
133 for formatted_group, weight in sorted_groups[:count]:
134 print formatted_group[:short]
135 if len(sorted_groups) > count:
136 if by_country and by_as_number:
137 type = "countries and ASes"
138 elif by_country:
139 type = "countries"
140 elif by_as_number:
141 type = "ASes"
142 else:
143 type = "relays"
144 other_weights = (0, 0, 0, 0, 0)
145 for _, weights in sorted_groups[count:]:
146 other_weights = tuple(sum(x) for x in zip(other_weights, weights))
147 print "%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% (%d other %s)" % (
148 other_weights[0] * 100.0, other_weights[1] * 100.0,
149 other_weights[2] * 100.0, other_weights[3] * 100.0,
150 other_weights[4] * 100.0, len(sorted_groups) - count, type)
151 selection_weights = (0, 0, 0, 0, 0)
152 for _, weights in sorted_groups:
153 selection_weights = tuple(sum(x) for x in zip(selection_weights, weights))
154 if len(sorted_groups) > 1 and selection_weights[0] < 0.999:
155 print "%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% (total in selection)" % (
156 selection_weights[0] * 100.0, selection_weights[1] * 100.0,
157 selection_weights[2] * 100.0, selection_weights[3] * 100.0,
158 selection_weights[4] * 100.0)
160 def download_details_file():
161 url = urllib.urlopen('https://onionoo.torproject.org/details?type=relay')
162 details_file = open("details.json", 'w')
163 details_file.write(url.read())
164 url.close()
165 details_file.close()
167 if '__main__' == __name__:
168 parser = OptionParser()
169 parser.add_option("-d", "--download", action="store_true",
170 help="download details.json from Onionoo service")
171 group = OptionGroup(parser, "Filtering options")
172 group.add_option("-i", "--inactive", action="store_true", default=False,
173 help="include relays in selection that aren't currently running")
174 group.add_option("-a", "--as", dest="ases", action="append",
175 help="select only relays from autonomous system number AS",
176 metavar="AS")
177 group.add_option("-c", "--country", action="append",
178 help="select only relays from country with code CC", metavar="CC")
179 group.add_option("-e", "--exits-only", action="store_true",
180 help="select only relays suitable for exit position")
181 group.add_option("-g", "--guards-only", action="store_true",
182 help="select only relays suitable for guard position")
183 group.add_option("-x", "--fast-exits-only", action="store_true",
184 help="select only 100+ MBit/s exits allowing ports 80, 443, 554, and 1755")
185 parser.add_option_group(group)
186 group = OptionGroup(parser, "Grouping options")
187 group.add_option("-A", "--by-as", action="store_true", default=False,
188 help="group relays by AS")
189 group.add_option("-C", "--by-country", action="store_true", default=False,
190 help="group relays by country")
191 parser.add_option_group(group)
192 group = OptionGroup(parser, "Display options")
193 group.add_option("-t", "--top", type="int", default=10, metavar="NUM",
194 help="display only the top results (default: %default; -1 for all)")
195 group.add_option("-s", "--short", action="store_true",
196 help="cut the length of the line output at 70 chars")
197 parser.add_option_group(group)
198 (options, args) = parser.parse_args()
199 if len(args) > 0:
200 parser.error("Did not understand positional argument(s), use options instead.")
202 if options.download:
203 download_details_file()
205 if not os.path.exists('details.json'):
206 parser.error("Did not find details.json. Re-run with --download.")
208 stats = RelayStats()
209 relays = stats.get_relays(countries=options.country,
210 as_sets=options.ases,
211 exits_only=options.exits_only,
212 guards_only=options.guards_only,
213 inactive=options.inactive,
214 fast_exits_only=options.fast_exits_only)
215 grouped_relays = stats.group_relays(relays,
216 by_country=options.by_country,
217 by_as_number=options.by_as)
218 sorted_groups = stats.format_and_sort_groups(grouped_relays,
219 by_country=options.by_country,
220 by_as_number=options.by_as)
221 stats.print_groups(sorted_groups, options.top,
222 by_country=options.by_country,
223 by_as_number=options.by_as,
224 short=70 if options.short else None)