1 # calculate frac_relays, frac_cw to compare consensus documents over time
3 # let Y be the base document and X be some hours before the base document
4 # frac_relays = count(intersection(Y, X)) / count(Y)
5 # frac_cw = sum(cw(Y) over intersection(Y,X)) / sum(cw(Y))
8 from datetime
import datetime
, timedelta
9 from stem
.descriptor
import parse_file
11 # generate expected consensus filepath from time
12 def filepath_from_time(cur_datetime
):
14 'consensuses-%s' % cur_datetime
.strftime('%Y-%m'),
15 cur_datetime
.strftime('%d'),
16 '%s-consensus' % cur_datetime
.strftime('%Y-%m-%d-%H-%M-%S'),
19 # router bw storage by fingerprint
23 time_interval
= timedelta(0, 60*60) # one hour
25 # interval multipliers for analysis: 1 hour to 7 days
26 time_interval_list
= [1,2,3,4,5,6,12,24,36,48,72,96,120,144,168] # hours
28 # base consensuses for examination
29 initial_time_info_bound
= datetime(2012, 1, 1) # inclusive
30 final_time_info_bound
= datetime(2013, 1, 1) # exclusive
32 # data range for consensuses
33 initial_time_data_bound
= datetime(2011, 12, 1) # inclusive
34 final_time_data_bound
= datetime(2013, 1, 1) # exclusive
37 cur_datetime
= initial_time_data_bound
38 while cur_datetime
< final_time_data_bound
:
39 cur_filepath
= filepath_from_time(cur_datetime
)
40 cur_filename
= os
.path
.basename(cur_filepath
)
43 with
open(cur_filepath
) as consensus_file
:
44 router_data
[cur_filename
] = dict([(r
.fingerprint
, r
.bandwidth
)
45 for r
in parse_file(consensus_file
)])
47 pass # file does not exist (possible situation) and iterate
50 cur_datetime
+= time_interval
52 # iterate over base consensuses for frac_relays, frac_cw
53 cur_datetime
= initial_time_info_bound
54 while cur_datetime
< final_time_info_bound
:
55 cur_filepath
= filepath_from_time(cur_datetime
) # current
56 cur_filename
= os
.path
.basename(cur_filepath
) # current
58 # find base data, if data exists
59 if cur_filename
in router_data
:
60 base_routers
= router_data
[cur_filename
]
61 base_router_count
= len(router_data
[cur_filename
])
62 base_router_bw
= sum(router_data
[cur_filename
].values())
64 # for each analysis analysis interval, find comparison locator
65 for time_interval_multiplier
in time_interval_list
:
66 comp_time_interval
= time_interval_multiplier
*time_interval
67 comp_datetime
= cur_datetime
- comp_time_interval
69 comp_filepath
= filepath_from_time(comp_datetime
) # comp
70 comp_filename
= os
.path
.basename(comp_filepath
) # comp
72 # find comparison data, if data exists
73 if comp_filename
in router_data
:
74 router_overlap_count
= 0
75 base_router_overlap_bw
= 0
77 # determine intersection(Y,X) and sum cw over intersection(Y,X)
78 for fingerprint
in router_data
[comp_filename
]:
79 if fingerprint
in base_routers
:
80 router_overlap_count
+= 1
81 base_router_overlap_bw
+= base_routers
[fingerprint
]
84 frac_relays
= float(router_overlap_count
)/float(base_router_count
)
85 frac_cw
= float(base_router_overlap_bw
)/float(base_router_bw
)
88 print '%s,%d,%f,%f,%d,%d,%s' % (cur_filename
, time_interval_multiplier
,
89 frac_relays
, frac_cw
, cur_datetime
.month
, cur_datetime
.day
,
90 cur_datetime
.strftime('%w'))
93 cur_datetime
+= time_interval