Update criteria for partial/full IPv6 support.
[tor-metrics-tasks.git] / task-7241 / first_pass.py
blob457a4f7fa4a32f24daddd0b81be0906cbee646be
1 # calculate frac_relays, frac_cw to compare consensus documents over time
3 # let Y be the base document and X be some hours before the base document
4 # frac_relays = count(intersection(Y, X)) / count(Y)
5 # frac_cw = sum(cw(Y) over intersection(Y,X)) / sum(cw(Y))
7 import os
8 from datetime import datetime, timedelta
9 from stem.descriptor import parse_file
11 # generate expected consensus filepath from time
12 def filepath_from_time(cur_datetime):
13 return os.path.join(
14 'consensuses-%s' % cur_datetime.strftime('%Y-%m'),
15 cur_datetime.strftime('%d'),
16 '%s-consensus' % cur_datetime.strftime('%Y-%m-%d-%H-%M-%S'),
19 # router bw storage by fingerprint
20 router_data = {}
22 # unit time interval
23 time_interval = timedelta(0, 60*60) # one hour
25 # interval multipliers for analysis: 1 hour to 7 days
26 time_interval_list = [1,2,3,4,5,6,12,24,36,48,72,96,120,144,168] # hours
28 # base consensuses for examination
29 initial_time_info_bound = datetime(2012, 1, 1) # inclusive
30 final_time_info_bound = datetime(2013, 1, 1) # exclusive
32 # data range for consensuses
33 initial_time_data_bound = datetime(2011, 12, 1) # inclusive
34 final_time_data_bound = datetime(2013, 1, 1) # exclusive
36 # load information
37 cur_datetime = initial_time_data_bound
38 while cur_datetime < final_time_data_bound:
39 cur_filepath = filepath_from_time(cur_datetime)
40 cur_filename = os.path.basename(cur_filepath)
42 try:
43 with open(cur_filepath) as consensus_file:
44 router_data[cur_filename] = dict([(r.fingerprint, r.bandwidth)
45 for r in parse_file(consensus_file)])
46 except IOError:
47 pass # file does not exist (possible situation) and iterate
49 # next file to read
50 cur_datetime += time_interval
52 # iterate over base consensuses for frac_relays, frac_cw
53 cur_datetime = initial_time_info_bound
54 while cur_datetime < final_time_info_bound:
55 cur_filepath = filepath_from_time(cur_datetime) # current
56 cur_filename = os.path.basename(cur_filepath) # current
58 # find base data, if data exists
59 if cur_filename in router_data:
60 base_routers = router_data[cur_filename]
61 base_router_count = len(router_data[cur_filename])
62 base_router_bw = sum(router_data[cur_filename].values())
64 # for each analysis analysis interval, find comparison locator
65 for time_interval_multiplier in time_interval_list:
66 comp_time_interval = time_interval_multiplier*time_interval
67 comp_datetime = cur_datetime - comp_time_interval
69 comp_filepath = filepath_from_time(comp_datetime) # comp
70 comp_filename = os.path.basename(comp_filepath) # comp
72 # find comparison data, if data exists
73 if comp_filename in router_data:
74 router_overlap_count = 0
75 base_router_overlap_bw = 0
77 # determine intersection(Y,X) and sum cw over intersection(Y,X)
78 for fingerprint in router_data[comp_filename]:
79 if fingerprint in base_routers:
80 router_overlap_count += 1
81 base_router_overlap_bw += base_routers[fingerprint]
83 # determine ratios
84 frac_relays = float(router_overlap_count)/float(base_router_count)
85 frac_cw = float(base_router_overlap_bw)/float(base_router_bw)
87 # output
88 print '%s,%d,%f,%f,%d,%d,%s' % (cur_filename, time_interval_multiplier,
89 frac_relays, frac_cw, cur_datetime.month, cur_datetime.day,
90 cur_datetime.strftime('%w'))
92 # next base consensus
93 cur_datetime += time_interval