2 Usage - python pyentropy.py -h
3 Output - A CSV file of the format (without newlines):
5 <entropy for all nodes>,
6 <max entropy for all nodes>,
7 <entropy for exit nodes>,
8 <max entropy for exit nodes>,
9 <entropy for guard nodes>,
10 <max entropy for guard nodes>,
11 <entropy for countries>,
12 <max entropy for countries>,
15 rsync -arz --delete metrics.torproject.org::metrics-recent/relay-descriptors/consensuses in
23 import stem
.descriptor
25 from optparse
import OptionParser
26 from binascii
import b2a_hex
, a2b_base64
, a2b_hex
27 from stem
.descriptor
.server_descriptor
import RelayDescriptor
, BridgeDescriptor
32 self
.advertised_bw
= None
38 def add_router_info(self
, values
):
39 hex_digest
= b2a_hex(a2b_base64(values
[2]+"="))
40 self
.advertised_bw
= self
.get_advertised_bw(hex_digest
)
42 self
.country
= gi_db
.country_code_by_addr(ip
)
43 self
.as_no
= self
.get_as_details(ip
)
45 def add_weights(self
, values
):
46 self
.bandwidth
= int(values
[0].split('=')[1])
48 def add_flags(self
, values
):
49 if "Exit" in values
and not "BadExit" in values
:
54 def get_as_details(self
, ip
):
56 value
= as_db
.org_by_addr(str(ip
)).split()
61 def get_advertised_bw(self
, hex_digest
):
63 with
open(options
.server_desc
+hex_digest
) as f
:
66 desc_iter
= stem
.descriptor
.server_descriptor
.parse_file(StringIO
.StringIO(data
))
67 desc_entries
= list(desc_iter
)
68 desc
= desc_entries
[0]
69 return min(desc
.average_bandwidth
, desc
.burst_bandwidth
, desc
.observed_bandwidth
)
73 def parse_bw_weights(values
):
77 key
, value
= value
.split("=")
78 data
[key
] = float(value
) / 10000
86 Wed
, Wee
, Wgd
, Wgg
= 1, 1, 1, 1
88 with
open(file_name
, 'r') as f
:
89 for line
in f
.readlines():
91 values
= line
.split()[1:]
94 routers
.append(router
)
95 router
.add_router_info(values
)
97 router
.add_flags(values
)
99 router
.add_weights(values
)
100 elif key
== 'valid-after':
101 valid_after
= ' '.join(values
)
102 elif key
== 'bandwidth-weights':
103 data
= parse_bw_weights(values
)
112 if len(routers
) <= 0:
115 total_bw
, total_exit_bw
, total_guard_bw
= 0, 0, 0
116 guards_no
, exits_no
= 0, 0
117 bw_countries
, bw_as
= {}, {}
118 for router
in routers
:
119 if not router
.bandwidth
:
121 total_bw
+= router
.bandwidth
122 if router
.is_guard
and router
.is_exit
:
123 total_guard_bw
+= Wgd
*router
.bandwidth
124 total_exit_bw
+= Wed
*router
.bandwidth
127 elif router
.is_guard
:
128 total_guard_bw
+= Wgg
*router
.bandwidth
131 total_exit_bw
+= Wee
*router
.bandwidth
133 if bw_countries
.has_key(router
.country
):
134 bw_countries
[router
.country
] += router
.bandwidth
136 bw_countries
[router
.country
] = router
.bandwidth
137 if bw_as
.has_key(router
.as_no
):
138 bw_as
[router
.as_no
] += router
.bandwidth
140 bw_as
[router
.as_no
] = router
.bandwidth
145 entropy
, entropy_exit
, entropy_guard
, entropy_country
, entropy_as
= 0.0, 0.0, 0.0, 0.0, 0.0
146 for router
in routers
:
147 p
= float(router
.bandwidth
) / float(total_bw
)
149 entropy
+= -(p
* math
.log(p
, 2))
150 if router
.is_guard
and router
.is_exit
:
151 p
= float(Wgd
*router
.bandwidth
) / float(total_guard_bw
)
153 entropy_guard
+= -(p
* math
.log(p
, 2))
154 p
= float(Wed
*router
.bandwidth
) / float(total_exit_bw
)
156 entropy_exit
+= -(p
* math
.log(p
, 2))
157 elif router
.is_guard
:
158 p
= float(Wgg
*router
.bandwidth
) / float(total_guard_bw
)
160 entropy_guard
+= -(p
* math
.log(p
, 2))
162 p
= float(Wee
*router
.bandwidth
) / float(total_exit_bw
)
164 entropy_exit
+= -(p
* math
.log(p
, 2))
166 for country
in bw_countries
.iterkeys():
167 p
= float(bw_countries
[country
]) / float(total_bw
)
169 entropy_country
+= -(p
* math
.log(p
, 2))
171 for as_no
in bw_as
.iterkeys():
172 p
= float(bw_as
[as_no
]) / float(total_bw
)
174 entropy_as
+= -(p
* math
.log(p
, 2))
176 # Entropy of uniform distribution of 'n' possible values: log(n)
177 max_entropy
= math
.log(len(routers
), 2)
178 max_entropy_guard
= math
.log(guards_no
, 2)
179 max_entropy_exit
= math
.log(exits_no
, 2)
180 max_entropy_country
= math
.log(len(bw_countries
), 2)
181 max_entropy_as
= math
.log(len(bw_as
), 2)
183 return ",".join([valid_after
,
187 str(max_entropy_exit
),
189 str(max_entropy_guard
),
190 str(entropy_country
),
191 str(max_entropy_country
),
193 str(max_entropy_as
)])
196 usage
= "Usage - python pyentropy.py [options]"
197 parser
= OptionParser(usage
)
199 parser
.add_option("-g", "--geoip", dest
="gi_db", default
="GeoIP.dat",
200 help="Input GeoIP database")
201 parser
.add_option("-a", "--as", dest
="as_db", default
="GeoIPASNum.dat",
202 help="Input AS GeoIP database")
203 parser
.add_option("-s", "--server_desc", dest
="server_desc",
204 default
="data/relay-descriptors/server-descriptors/", help="Server descriptors directory")
205 parser
.add_option("-o", "--output", dest
="output", default
="entropy.csv",
206 help="Output filename")
207 parser
.add_option("-c", "--consensus", dest
="consensus", default
="in/consensus",
208 help="Input consensus dir")
210 (options
, args
) = parser
.parse_args()
214 if __name__
== "__main__":
215 options
= parse_args()
216 gi_db
= pygeoip
.GeoIP(options
.gi_db
)
217 as_db
= pygeoip
.GeoIP(options
.as_db
)
219 with
open(options
.output
, 'w') as f
:
220 for file_name
in os
.listdir(options
.consensus
):
221 string
= run(os
.path
.join(options
.consensus
, file_name
))
223 f
.write("%s\n" % (string
))