3 # This program is free software. It comes without any warranty, to
4 # the extent permitted by applicable law. You can redistribute it
5 # and/or modify it under the terms of the Do What The Fuck You Want
6 # To Public License, Version 2, as published by Sam Hocevar. See
7 # http://sam.zoy.org/wtfpl/COPYING for more details.
9 FAST_EXIT_BANDWIDTH_RATE
= 95 * 125 * 1024 # 95 Mbit/s
10 FAST_EXIT_ADVERTISED_BANDWIDTH
= 5000 * 1024 # 5000 kB/s
11 FAST_EXIT_PORTS
= [80, 443, 554, 1755]
12 FAST_EXIT_MAX_PER_NETWORK
= 2
14 ALMOST_FAST_EXIT_BANDWIDTH_RATE
= 80 * 125 * 1024 # 80 Mbit/s
15 ALMOST_FAST_EXIT_ADVERTISED_BANDWIDTH
= 2000 * 1024 # 2000 kB/s
16 ALMOST_FAST_EXIT_PORTS
= [80, 443]
23 from optparse
import OptionParser
, OptionGroup
28 class BaseFilter(object):
29 def accept(self
, relay
):
30 raise NotImplementedError("This isn't implemented by the subclass")
32 def load(self
, relays
):
33 return filter(self
.accept
, relays
)
35 class RunningFilter(BaseFilter
):
36 def accept(self
, relay
):
37 return relay
['running']
39 class FamilyFilter(BaseFilter
):
40 def __init__(self
, family
, all_relays
):
41 self
._family
_fingerprint
= None
42 self
._family
_nickname
= None
43 self
._family
_relays
= []
45 for relay
in all_relays
:
46 if len(family
) == 40 and relay
['fingerprint'] == family
:
49 if len(family
) < 20 and 'Named' in relay
['flags'] and relay
['nickname'] == family
:
53 self
._family
_fingerprint
= '$%s' % found_relay
['fingerprint']
54 if 'Named' in found_relay
['flags']:
55 self
._family
_nickname
= found_relay
['nickname']
56 self
._family
_relays
= [self
._family
_fingerprint
] + found_relay
.get('effective_family', [])
58 def accept(self
, relay
):
59 fingerprint
= '$%s' % relay
['fingerprint']
60 mentions
= [fingerprint
] + relay
.get('effective_family', [])
61 # Only show families as accepted by consensus (mutually listed relays)
62 listed
= fingerprint
in self
._family
_relays
63 listed
= listed
or 'Named' in relay
['flags'] and relay
['nickname'] in self
._family
_relays
64 mentioned
= self
._family
_fingerprint
in mentions
65 mentioned
= mentioned
or self
._family
_nickname
in mentions
66 if listed
and mentioned
:
70 class CountryFilter(BaseFilter
):
71 def __init__(self
, countries
=[]):
72 self
._countries
= [x
.lower() for x
in countries
]
74 def accept(self
, relay
):
75 return relay
.get('country', None) in self
._countries
77 class ASFilter(BaseFilter
):
78 def __init__(self
, as_sets
=[]):
79 self
._as
_sets
= [x
if not x
.isdigit() else "AS" + x
for x
in as_sets
]
81 def accept(self
, relay
):
82 return relay
.get('as_number', None) in self
._as
_sets
84 class ExitFilter(BaseFilter
):
85 def accept(self
, relay
):
86 return relay
.get('exit_probability', -1) > 0.0
88 class GuardFilter(BaseFilter
):
89 def accept(self
, relay
):
90 return relay
.get('guard_probability', -1) > 0.0
92 class FastExitFilter(BaseFilter
):
94 def __init__(self
, relay
):
95 self
.exit
= relay
.get('exit_probability')
96 self
.fp
= relay
.get('fingerprint')
99 def __init__(self
, bandwidth_rate
=FAST_EXIT_BANDWIDTH_RATE
,
100 advertised_bandwidth
=FAST_EXIT_ADVERTISED_BANDWIDTH
,
101 ports
=FAST_EXIT_PORTS
):
102 self
.bandwidth_rate
= bandwidth_rate
103 self
.advertised_bandwidth
= advertised_bandwidth
106 def load(self
, all_relays
):
107 # First, filter relays based on bandwidth and port requirements.
109 for relay
in all_relays
:
110 if relay
.get('bandwidth_rate', -1) < self
.bandwidth_rate
:
112 if relay
.get('advertised_bandwidth', -1) < self
.advertised_bandwidth
:
114 relevant_ports
= set(self
.ports
)
115 summary
= relay
.get('exit_policy_summary', {})
116 if 'accept' in summary
:
117 portlist
= summary
['accept']
118 elif 'reject' in summary
:
119 portlist
= summary
['reject']
125 ports
.extend(range(int(p
.split('-')[0]),
126 int(p
.split('-')[1]) + 1))
129 policy_ports
= set(ports
)
130 if 'accept' in summary
and not relevant_ports
.issubset(policy_ports
):
132 if 'reject' in summary
and not relevant_ports
.isdisjoint(policy_ports
):
134 matching_relays
.append(relay
)
135 return matching_relays
137 class SameNetworkFilter(BaseFilter
):
138 def __init__(self
, orig_filter
, max_per_network
=FAST_EXIT_MAX_PER_NETWORK
):
139 self
.orig_filter
= orig_filter
140 self
.max_per_network
= max_per_network
142 def load(self
, all_relays
):
144 for relay
in self
.orig_filter
.load(all_relays
):
145 or_addresses
= relay
.get("or_addresses")
147 for ip
in or_addresses
:
148 ip
, port
= ip
.rsplit(':', 1)
153 if no_of_addresses
> 1:
154 print "[WARNING] - %s has more than one IPv4 OR address - %s" % relay
.get("fingerprint"), or_addresses
155 network
= ip
.rsplit('.', 1)[0]
156 if network_data
.has_key(network
):
157 if len(network_data
[network
]) >= FAST_EXIT_MAX_PER_NETWORK
:
158 # assume current relay to have smallest exit_probability
159 min_exit
= relay
.get('exit_probability')
161 for id, value
in enumerate(network_data
[network
]):
162 if value
.get('exit_probability') < min_exit
:
163 min_exit
= value
.get('exit_probability')
166 del network_data
[network
][min_id
]
167 network_data
[network
].append(relay
)
169 network_data
[network
].append(relay
)
171 network_data
[network
] = [relay
]
172 return list(itertools
.chain
.from_iterable(network_data
.values()))
174 class InverseFilter(BaseFilter
):
175 def __init__(self
, orig_filter
):
176 self
.orig_filter
= orig_filter
178 def load(self
, all_relays
):
179 matching_relays
= self
.orig_filter
.load(all_relays
)
181 for relay
in all_relays
:
182 if relay
not in matching_relays
:
183 inverse_relays
.append(relay
)
184 return inverse_relays
186 def get_network_family(relay
):
187 addresses
= relay
.get('or_addresses', [])
188 if len(addresses
) == 0:
190 # Guaranteed by Onionoo. Currently restricted to IPv4 by the network design.
191 primary_ip
, _
= addresses
[0].split(':')
192 # Network family is /16, so let's take the first two bytes by regex
193 return "%s.0.0/16" % re
.match(r
'^([0-9]+\.[0-9]+)\.', primary_ip
).group(1)
195 class RelayStats(object):
196 def __init__(self
, options
, custom_datafile
="details.json"):
198 self
._datafile
_name
= custom_datafile
199 self
._filters
= self
._create
_filters
(options
)
200 self
._get
_group
= self
._get
_group
_function
(options
)
206 self
._data
= json
.load(file(os
.path
.join(os
.path
.dirname(os
.path
.abspath(__file__
)), self
._datafile
_name
)))
214 relays
= self
.data
['relays']
215 for f
in self
._filters
:
216 relays
= f
.load(relays
)
218 self
.add_relay(relay
)
221 def _create_filters(self
, options
):
223 if not options
.inactive
:
224 filters
.append(RunningFilter())
226 filters
.append(FamilyFilter(options
.family
, self
.data
['relays']))
228 filters
.append(CountryFilter(options
.country
))
230 filters
.append(ASFilter(options
.ases
))
231 if options
.exits_only
:
232 filters
.append(ExitFilter())
233 if options
.guards_only
:
234 filters
.append(GuardFilter())
235 if options
.exit_filter
== 'all_relays':
237 elif options
.exit_filter
== 'fast_exits_only':
238 filters
.append(SameNetworkFilter(FastExitFilter()))
239 elif options
.exit_filter
== 'almost_fast_exits_only':
240 filters
.append(FastExitFilter(ALMOST_FAST_EXIT_BANDWIDTH_RATE
,
241 ALMOST_FAST_EXIT_ADVERTISED_BANDWIDTH
,
242 ALMOST_FAST_EXIT_PORTS
))
243 filters
.append(InverseFilter(SameNetworkFilter(FastExitFilter())))
244 elif options
.exit_filter
== 'fast_exits_only_any_network':
245 filters
.append(FastExitFilter())
248 def _get_group_function(self
, options
):
250 if options
.by_country
:
251 funcs
.append(lambda relay
: relay
.get('country', None))
253 funcs
.append(lambda relay
: relay
.get('as_number', None))
254 if options
.by_network_family
:
255 funcs
.append(get_network_family
)
256 # Default on grouping by fingerprint
258 funcs
.append(lambda relay
: relay
.get('fingerprint'))
259 return lambda relay
: tuple([func(relay
) for func
in funcs
])
261 def add_relay(self
, relay
):
262 key
= self
._get
_group
(relay
)
263 if key
not in self
._relays
:
264 self
._relays
[key
] = []
265 self
._relays
[key
].append(relay
)
267 WEIGHTS
= ['consensus_weight_fraction', 'advertised_bandwidth_fraction', 'guard_probability', 'middle_probability', 'exit_probability']
269 def print_selection(self
,selection
,options
):
271 Print the selection returned by sort_and_reduce relays into a
272 string for the command line version.
274 column_widths
= [9,10,10,10,10,21,80 if options
.links
else 42,7,7,4,16,11]
275 headings
= ["CW","adv_bw","P_guard","P_middle", "P_exit", "Nickname",
276 "Link" if options
.links
else "Fingerprint",
277 "Exit","Guard","CC", "IPv4", "Autonomous System"]
280 header
= "".join(word
.ljust(column_widths
[i
]) for i
,word
in enumerate(headings
))
281 print(header
[:options
.short
])
283 for relay
in selection
['results']:
284 line
= "".join(field
.ljust(column_widths
[i
])
286 enumerate(relay
.printable_fields(options
.links
)))
287 print(line
[:options
.short
])
289 #Print the 'excluded' set if we have it
290 if selection
['excluded']:
291 line
= "".join(field
.ljust(column_widths
[i
])
293 enumerate(selection
['excluded'].printable_fields()))
294 print(line
[:options
.short
])
296 #Print the 'total' set if we have it
297 if selection
['total']:
298 line
= "".join(field
.ljust(column_widths
[i
])
300 enumerate(selection
['total'].printable_fields()))
301 print(line
[:options
.short
])
303 def sort_and_reduce(self
, relay_set
, options
):
305 Take a set of relays (has already been grouped and
306 filtered), sort it and return the ones requested
307 in the 'top' option. Add index numbers to them as well.
309 Returns a hash with three values:
310 *results*: A list of Result objects representing the selected
312 *excluded*: A Result object representing the stats for the
313 filtered out relays. May be None
314 *total*: A Result object representing the stats for all of the
315 relays in this filterset.
317 output_relays
= list()
318 excluded_relays
= None
321 # We need a simple sorting key function
323 return getattr(r
,options
.sort
)
325 relay_set
.sort(key
=sort_fn
,reverse
=options
.sort_reverse
)
328 options
.top
= len(relay_set
)
330 # Set up to handle the special lines at the bottom
331 excluded_relays
= util
.Result(zero_probs
=True)
332 total_relays
= util
.Result(zero_probs
=True)
333 if options
.by_country
or options
.by_as
or options
.by_network_family
:
334 filtered
= "relay groups"
338 # Add selected relays to the result set
339 for i
,relay
in enumerate(relay_set
):
340 # We have no links if we're grouping
341 if options
.by_country
or options
.by_as
or options
.by_network_family
:
346 output_relays
.append(relay
)
349 excluded_relays
.p_guard
+= relay
.p_guard
350 excluded_relays
.p_exit
+= relay
.p_exit
351 excluded_relays
.p_middle
+= relay
.p_middle
352 excluded_relays
.adv_bw
+= relay
.adv_bw
353 excluded_relays
.cw
+= relay
.cw
355 total_relays
.p_guard
+= relay
.p_guard
356 total_relays
.p_exit
+= relay
.p_exit
357 total_relays
.p_middle
+= relay
.p_middle
358 total_relays
.adv_bw
+= relay
.adv_bw
359 total_relays
.cw
+= relay
.cw
361 excluded_relays
.nick
= "(%d other %s)" % (
362 len(relay_set
) - options
.top
,
364 total_relays
.nick
= "(total in selection)"
366 # Only include the excluded line if
367 if len(relay_set
) <= options
.top
:
368 excluded_relays
= None
370 # Only include the last line if
371 if total_relays
.cw
> 99.9:
375 'results': output_relays
,
376 'excluded': excluded_relays
,
377 'total': total_relays
381 def select_relays(self
, grouped_relays
, options
):
383 Return a Pythonic representation of the relays result set. Return it as a set of Result objects.
386 for group
in grouped_relays
.itervalues():
387 #Initialize some stuff
388 group_weights
= dict.fromkeys(RelayStats
.WEIGHTS
, 0)
389 relays_in_group
, exits_in_group
, guards_in_group
= 0, 0, 0
390 ases_in_group
= set()
391 countries_in_group
= set()
392 network_families_in_group
= set()
393 result
= util
.Result()
395 for weight
in RelayStats
.WEIGHTS
:
396 group_weights
[weight
] += relay
.get(weight
, 0)
398 result
.nick
= relay
['nickname']
399 result
.fp
= relay
['fingerprint']
400 result
.link
= options
.links
402 if 'Exit' in set(relay
['flags']) and not 'BadExit' in set(relay
['flags']):
407 if 'Guard' in set(relay
['flags']):
408 result
.guard
= 'Guard'
412 result
.cc
= relay
.get('country', '??').upper()
413 countries_in_group
.add(result
.cc
)
414 result
.primary_ip
= relay
.get('or_addresses', ['??:0'])[0].split(':')[0]
415 network_families_in_group
.add(get_network_family(relay
))
416 result
.as_no
= relay
.get('as_number', '??')
417 result
.as_name
= relay
.get('as_name', '??')
418 result
.as_info
= "%s %s" %(result
.as_no
, result
.as_name
)
419 ases_in_group
.add(result
.as_info
)
422 # If we want to group by things, we need to handle some fields
424 if options
.by_country
or options
.by_as
or options
.by_network_family
:
426 result
.fp
= "(%d relays)" % relays_in_group
427 result
.exit
= "(%d)" % exits_in_group
428 result
.guard
= "(%d)" % guards_in_group
429 if not options
.by_as
and not options
.ases
:
430 result
.as_info
= "(%d)" % len(ases_in_group
)
431 if not options
.by_country
and not options
.country
:
432 result
.cc
= "(%d)" % len(countries_in_group
)
433 if not options
.by_network_family
:
434 result
.primary_ip
= "(%d diff. /16)" % len(network_families_in_group
)
436 result
.primary_ip
= network_families_in_group
.pop()
438 #Include our weight values
439 for weight
in group_weights
.iterkeys():
440 result
['cw'] = group_weights
['consensus_weight_fraction'] * 100.0
441 result
['adv_bw'] = group_weights
['advertised_bandwidth_fraction'] * 100.0
442 result
['p_guard'] = group_weights
['guard_probability'] * 100.0
443 result
['p_middle'] = group_weights
['middle_probability'] * 100.0
444 result
['p_exit'] = group_weights
['exit_probability'] * 100.0
446 results
.append(result
)
450 def create_option_parser():
451 parser
= OptionParser()
452 parser
.add_option("-d", "--download", action
="store_true",
453 help="download details.json from Onionoo service")
454 group
= OptionGroup(parser
, "Filtering options")
455 group
.add_option("-i", "--inactive", action
="store_true", default
=False,
456 help="include relays in selection that aren't currently running")
457 group
.add_option("-a", "--as", dest
="ases", action
="append",
458 help="select only relays from autonomous system number AS",
460 group
.add_option("-c", "--country", action
="append",
461 help="select only relays from country with code CC", metavar
="CC")
462 group
.add_option("-e", "--exits-only", action
="store_true",
463 help="select only relays suitable for exit position")
464 group
.add_option("-f", "--family", action
="store", type="string", metavar
="RELAY",
465 help="select family by fingerprint or nickname (for named relays)")
466 group
.add_option("-g", "--guards-only", action
="store_true",
467 help="select only relays suitable for guard position")
468 group
.add_option("--exit-filter",type="choice", dest
="exit_filter",
469 choices
=["fast_exits_only","almost_fast_exits_only",
470 "all_relays","fast_exits_only_any_network"],
471 metavar
="{fast_exits_only|almost_fast_exits_only|all_relays|fast_exits_only_any_network}",
472 default
='all_relays')
473 group
.add_option("--fast-exits-only", action
="store_true",
474 help="select only fast exits (%d+ Mbit/s, %d+ KB/s, %s, %d- per /24)" %
475 (FAST_EXIT_BANDWIDTH_RATE
/ (125 * 1024),
476 FAST_EXIT_ADVERTISED_BANDWIDTH
/ 1024,
477 '/'.join(map(str, FAST_EXIT_PORTS
)),
478 FAST_EXIT_MAX_PER_NETWORK
))
479 group
.add_option("--almost-fast-exits-only", action
="store_true",
480 help="select only almost fast exits (%d+ Mbit/s, %d+ KB/s, %s, not in set of fast exits)" %
481 (ALMOST_FAST_EXIT_BANDWIDTH_RATE
/ (125 * 1024),
482 ALMOST_FAST_EXIT_ADVERTISED_BANDWIDTH
/ 1024,
483 '/'.join(map(str, ALMOST_FAST_EXIT_PORTS
))))
484 group
.add_option("--fast-exits-only-any-network", action
="store_true",
485 help="select only fast exits without network restriction (%d+ Mbit/s, %d+ KB/s, %s)" %
486 (FAST_EXIT_BANDWIDTH_RATE
/ (125 * 1024),
487 FAST_EXIT_ADVERTISED_BANDWIDTH
/ 1024,
488 '/'.join(map(str, FAST_EXIT_PORTS
))))
489 parser
.add_option_group(group
)
490 group
= OptionGroup(parser
, "Grouping options")
491 group
.add_option("-A", "--by-as", action
="store_true", default
=False,
492 help="group relays by AS")
493 group
.add_option("-C", "--by-country", action
="store_true", default
=False,
494 help="group relays by country")
495 group
.add_option("-N", "--by-network-family", action
="store_true", default
=False,
496 help="group relays by network family (/16 IPv4)")
497 parser
.add_option_group(group
)
498 group
= OptionGroup(parser
, "Sorting options")
499 group
.add_option("--sort", type="choice",
500 choices
=["cw","adv_bw","p_guard","p_exit","p_middle",
502 metavar
="{cw|adv_bw|p_guard|p_exit|p_middle|nick|fp}",
504 help="sort by this field")
505 group
.add_option("--sort_reverse", action
="store_true", default
=True,
506 help="invert the sorting order")
507 parser
.add_option_group(group
)
508 group
= OptionGroup(parser
, "Display options")
509 group
.add_option("-l", "--links", action
="store_true",
510 help="display links to the Atlas service instead of fingerprints")
511 group
.add_option("-t", "--top", type="int", default
=10, metavar
="NUM",
512 help="display only the top results (default: %default; -1 for all)")
514 group
.add_option("-s", "--short", action
="store_const",dest
='short',const
=70,
515 help="cut the length of the line output at 70 chars")
516 group
.add_option("-j", "--json", action
="store_true",
517 help="output in JSON rather than human-readable format")
518 group
.add_option("--datafile", default
="details.json",
519 help="use a custom datafile (Default: 'details.json')")
520 parser
.add_option_group(group
)
523 def download_details_file():
524 url
= urllib
.urlopen('https://onionoo.torproject.org/details?type=relay')
525 details_file
= open(os
.path
.join(os
.path
.dirname(os
.path
.abspath(__file__
)), 'details.json'), 'w')
526 details_file
.write(url
.read())
530 def fix_exit_filter_options(options
):
532 Translate the old-style exit filter options into
533 the new format (as received on the front end).
535 if options
.exit_filter
!= "all_relays":
536 # We just accept this option's value
539 fast_exit_options
= 0
540 if options
.fast_exits_only
:
541 options
.exit_filter
= "fast_exits_only"
542 fast_exit_options
+= 1
543 if options
.almost_fast_exits_only
:
544 options
.exit_filter
= "almost_fast_exits_only"
545 fast_exit_options
+= 1
546 if options
.fast_exits_only_any_network
:
547 options
.exit_filter
= "fast_exits_only_any_network"
548 fast_exit_options
+= 1
550 if fast_exit_options
> 1:
556 if '__main__' == __name__
:
557 parser
= create_option_parser()
558 (options
, args
) = parser
.parse_args()
560 parser
.error("Did not understand positional argument(s), use options instead.")
561 if options
.family
and not re
.match(r
'^[A-F0-9]{40}$', options
.family
) and not re
.match(r
'^[A-Za-z0-9]{1,19}$', options
.family
):
562 parser
.error("Not a valid fingerprint or nickname: %s" % options
.family
)
565 options
= fix_exit_filter_options(options
)
567 parser
.error("Can only filter by one fast-exit option.")
570 download_details_file()
571 print "Downloaded details.json. Re-run without --download option."
573 if not os
.path
.exists(os
.path
.join(os
.path
.dirname(os
.path
.abspath(__file__
)), 'details.json')):
574 parser
.error("Did not find details.json. Re-run with --download.")
576 stats
= RelayStats(options
,options
.datafile
)
577 results
= stats
.select_relays(stats
.relays
,options
)
579 sorted_results
= stats
.sort_and_reduce(results
,options
)
582 print(json
.dumps(sorted_results
,cls
=util
.ResultEncoder
))
584 stats
.print_selection(sorted_results
,options
)