Add --download option (#6329).
[tor-metrics-tasks/delber.git] / task-6329 / tor-relays-stats.py
blobfc9b14c6ec8969658334c57ce0c0e03b6fda064c
1 #!/usr/bin/env python
3 # This program is free software. It comes without any warranty, to
4 # the extent permitted by applicable law. You can redistribute it
5 # and/or modify it under the terms of the Do What The Fuck You Want
6 # To Public License, Version 2, as published by Sam Hocevar. See
7 # http://sam.zoy.org/wtfpl/COPYING for more details.
9 import json
10 import operator
11 import sys
12 import os.path
13 from optparse import OptionParser, OptionGroup
14 import urllib
16 class RelayStats(object):
17 def __init__(self):
18 self._data = None
20 @property
21 def data(self):
22 if not self._data:
23 self._data = json.load(file('details.json'))
24 return self._data
26 def get_relays(self, countries=[], as_sets=[], exits_only=False, guards_only=False):
27 relays = []
28 if countries:
29 countries = [x.lower() for x in countries]
30 for relay in self.data['relays']:
31 if not relay['running']:
32 continue
33 if countries and not relay.get('country', ' ') in countries:
34 continue
35 if as_sets and not relay.get('as_number', ' ') in as_sets:
36 continue
37 if exits_only and not relay.get('exit_probability', -1) > 0.0:
38 continue
39 if guards_only and not relay.get('guard_probability', -1) > 0.0:
40 continue
41 relays.append(relay)
42 return relays
44 def group_relays(self, relays, by_country=False, by_as_number=False):
45 grouped_relays = {}
46 for relay in relays:
47 if by_country and by_as_number:
48 key = (relay.get('country', None), relay.get('as_number', None))
49 elif by_country:
50 key = relay.get('country', None)
51 elif by_as_number:
52 key = relay.get('as_number', None)
53 else:
54 key = relay.get('fingerprint')
55 if key not in grouped_relays:
56 grouped_relays[key] = []
57 grouped_relays[key].append(relay)
58 return grouped_relays
60 def format_and_sort_groups(self, grouped_relays, by_country=False, by_as_number=False):
61 formatted_groups = {}
62 for group in grouped_relays.viewvalues():
63 group_weights = (0, 0, 0, 0, 0)
64 relays_in_group = 0
65 for relay in group:
66 weights = (relay.get('consensus_weight_fraction', 0),
67 relay.get('advertised_bandwidth_fraction', 0),
68 relay.get('guard_probability', 0),
69 relay.get('middle_probability', 0),
70 relay.get('exit_probability', 0))
71 group_weights = tuple(sum(x) for x in zip(group_weights, weights))
72 nickname = relay['nickname']
73 fingerprint = relay['fingerprint']
74 exit = 'Exit' if 'Exit' in set(relay['flags']) else ''
75 guard = 'Guard' if 'Guard' in set(relay['flags']) else ''
76 country = relay.get('country', '')
77 as_number = relay.get('as_number', '')
78 as_name = relay.get('as_name', '')
79 relays_in_group += 1
80 if by_country or by_as_number:
81 nickname = "(%d relays)" % relays_in_group
82 fingerprint = "*"
83 exit = "*"
84 guard = "*"
85 if by_country and not by_as_number:
86 as_number = "*"
87 as_name = "*"
88 if by_as_number and not by_country:
89 country = "*"
90 formatted_group = "%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% %-19s %-40s %-4s %-5s %-2s %-9s %s" % (
91 group_weights[0] * 100.0,
92 group_weights[1] * 100.0,
93 group_weights[2] * 100.0,
94 group_weights[3] * 100.0,
95 group_weights[4] * 100.0,
96 nickname, fingerprint,
97 exit, guard, country, as_number, as_name)
98 formatted_groups[formatted_group] = group_weights
99 sorted_groups = sorted(formatted_groups.iteritems(), key=operator.itemgetter(1))
100 sorted_groups.reverse()
101 return sorted_groups
103 def print_groups(self, sorted_groups, count=10, by_country=False, by_as_number=False):
104 print " CW adv_bw P_guard P_middle P_exit Nickname Fingerprint Exit Guard CC AS_num AS_name"
106 for formatted_group, weight in sorted_groups[:count]:
107 print formatted_group
108 if len(sorted_groups) > count:
109 if by_country and by_as_number:
110 type = "countries and ASes"
111 elif by_country:
112 type = "countries"
113 elif by_as_number:
114 type = "ASes"
115 else:
116 type = "relays"
117 other_weights = (0, 0, 0, 0, 0)
118 for _, weights in sorted_groups[count:]:
119 other_weights = tuple(sum(x) for x in zip(other_weights, weights))
120 print "%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% (%d other %s)" % (
121 other_weights[0] * 100.0, other_weights[1] * 100.0,
122 other_weights[2] * 100.0, other_weights[3] * 100.0,
123 other_weights[4] * 100.0, len(sorted_groups) - count, type)
124 selection_weights = (0, 0, 0, 0, 0)
125 for _, weights in sorted_groups:
126 selection_weights = tuple(sum(x) for x in zip(selection_weights, weights))
127 if len(sorted_groups) > 1 and selection_weights[0] < 0.999:
128 print "%8.4f%% %8.4f%% %8.4f%% %8.4f%% %8.4f%% (total in selection)" % (
129 selection_weights[0] * 100.0, selection_weights[1] * 100.0,
130 selection_weights[2] * 100.0, selection_weights[3] * 100.0,
131 selection_weights[4] * 100.0)
133 def download_details_file():
134 url = urllib.urlopen('https://onionoo.torproject.org/details?type=relay&running=true')
135 details_file = open("details.json", 'w')
136 details_file.write(url.read())
137 url.close()
138 details_file.close()
140 if '__main__' == __name__:
141 parser = OptionParser()
142 parser.add_option("-d", "--download", action="store_true",
143 help="download details.json from Onionoo service")
144 group = OptionGroup(parser, "Filtering options")
145 group.add_option("-a", "--as", dest="ases", action="append",
146 help="select only relays from autonomous system number AS",
147 metavar="AS")
148 group.add_option("-c", "--country", action="append",
149 help="select only relays from country with code CC", metavar="CC")
150 group.add_option("-e", "--exits-only", action="store_true",
151 help="select only relays suitable for exit position")
152 group.add_option("-g", "--guards-only", action="store_true",
153 help="select only relays suitable for guard position")
154 parser.add_option_group(group)
155 group = OptionGroup(parser, "Grouping options")
156 group.add_option("-A", "--by-as", action="store_true", default=False,
157 help="group relays by AS")
158 group.add_option("-C", "--by-country", action="store_true", default=False,
159 help="group relays by country")
160 parser.add_option_group(group)
161 group = OptionGroup(parser, "Display options")
162 group.add_option("-t", "--top", type="int", default=10, metavar="NUM",
163 help="display only the top results (default: %default)")
164 parser.add_option_group(group)
165 (options, args) = parser.parse_args()
166 if len(args) > 0:
167 parser.error("Did not understand positional argument(s), use options instead.")
169 if options.download:
170 download_details_file()
172 if not os.path.exists('details.json'):
173 parser.error("Did not find details.json. Re-run with --download.")
175 stats = RelayStats()
176 relays = stats.get_relays(countries=options.country,
177 as_sets=options.ases,
178 exits_only=options.exits_only,
179 guards_only=options.guards_only)
180 grouped_relays = stats.group_relays(relays,
181 by_country=options.by_country,
182 by_as_number=options.by_as)
183 sorted_groups = stats.format_and_sort_groups(grouped_relays,
184 by_country=options.by_country,
185 by_as_number=options.by_as)
186 stats.print_groups(sorted_groups, options.top,
187 by_country=options.by_country,
188 by_as_number=options.by_as)