1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_Bridges -*-
3 # This file is part of BridgeDB, a Tor bridge distribution system.
5 # :authors: see AUTHORS file
6 # :copyright: (c) 2007-2017, The Tor Project, Inc.
7 # :license: 3-Clause BSD, see LICENSE for licensing information
9 """This module has low-level functionality for parsing bridges and arranging
10 them into hashrings for distributors.
23 import bridgedb
.Storage
25 from bridgedb
.bridges
import Bridge
26 from bridgedb
.crypto
import getHMACFunc
27 from bridgedb
.parse
import addr
28 from bridgedb
.parse
.fingerprint
import isValidFingerprint
29 from bridgedb
.parse
.fingerprint
import toHex
30 from bridgedb
.safelog
import logSafely
32 ID_LEN
= 20 # XXX Only used in commented out line in Storage.py
37 class BridgeRingParameters(object):
38 """Store validated settings on minimum number of Bridges with certain
39 attributes which should be included in any generated subring of a
42 :ivar list needPorts: List of two-tuples of desired port numbers and their
44 :ivar list needFlags: List of two-tuples of desired flags_ assigned to a
45 Bridge by the Bridge DirAuth.
47 .. _flags: https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt?id=6b557594ef#n1695
50 def __init__(self
, needPorts
=[], needFlags
=[]):
51 """Control the creation of subrings by including a minimum number of
52 bridges which possess certain attributes.
54 :type needPorts: iterable
55 :param needPorts: An iterable of two-tuples. Each two tuple should
56 contain ``(port, minimum)``, where ``port`` is an integer
57 specifying a port number, and ``minimum`` is another integer
58 specifying the minimum number of Bridges running on that ``port``
59 to include in any new subring.
60 :type needFlags: iterable
61 :param needFlags: An iterable of two-tuples. Each two tuple should
62 contain ``(flag, minimum)``, where ``flag`` is a string specifying
63 an OR flag_, and ``minimum`` is an integer for the minimum number
64 of Bridges which have acquired that ``flag`` to include in any new
66 :raises: An :exc:`TypeError` if an invalid port number, a minimum less
67 than one, or an "unsupported" flag is given. "Stable" appears to
68 be the only currently "supported" flag.
70 for port
, count
in needPorts
:
71 if not (1 <= port
<= 65535):
72 raise TypeError("Port %s out of range." % port
)
74 raise TypeError("Count %s out of range." % count
)
75 for flag
, count
in needFlags
:
77 if flag
not in ["stable", "running",]:
78 raise TypeError("Unsupported flag %s" % flag
)
80 raise TypeError("Count %s out of range." % count
)
82 self
.needPorts
= needPorts
[:]
83 self
.needFlags
= [(flag
.lower(), count
) for flag
, count
in needFlags
[:]]
86 class BridgeRing(object):
87 """Arranges bridges into a hashring based on an hmac function."""
89 def __init__(self
, key
, answerParameters
=None):
90 """Create a new BridgeRing, using key as its hmac key.
93 :param key: The HMAC key, generated with
94 :func:`~bridgedb.crypto.getKey`.
95 :type answerParameters: :class:`BridgeRingParameters`
96 :param answerParameters: DOCDOC
97 :ivar dict bridges: A dictionary which maps HMAC keys to
98 :class:`~bridgedb.bridges.Bridge`s.
99 :ivar dict bridgesByID: A dictionary which maps raw hash digests of
100 bridge ID keys to :class:`~bridgedb.bridges.Bridge`s.
102 :ivar hmac: An HMAC function, which uses the **key** parameter to
103 generate new HMACs for storing, inserting, and retrieving
104 :class:`~bridgedb.bridges.Bridge`s within mappings.
105 :ivar bool isSorted: ``True`` if ``sortedKeys`` is currently sorted.
106 :ivar list sortedKeys: A sorted list of all of the HMACs.
107 :ivar str name: A string which identifies this hashring, used mostly
108 for differentiating this hashring in log messages, but it is also
109 used for naming subrings. If this hashring is a subring, the
110 ``name`` will include whatever distinguishing parameters
111 differentiate that particular subring (i.e. ``'(port-443
112 subring)'`` or ``'(Stable subring)'``)
114 :ivar subrings: A list of other ``BridgeRing``s, each of which
115 contains bridges of a particular type. For example, a subring
116 might contain only ``Bridge``s which have been given the "Stable"
117 flag, or it might contain only IPv6 bridges. Each item in this
118 list should be a 4-tuple::
120 (type, value, count, ring)
124 - ``type`` is a string which describes what kind of parameter is
125 used to determine if a ``Bridge`` belongs in that subring,
126 i.e. ``'port'`` or ``'flag'``.
128 - ``value`` is a specific value pertaining to the ``type``,
129 e.g. ``type='port'; value=443``.
131 - ``count`` is an integer for the current total number of
132 bridges in the subring.
134 - ``ring`` is a :class:`BridgeRing`; it is the subhashring which
135 contains ``count`` number of
136 :class:`~bridgedb.bridges.Bridge`s of a certain ``type``.
139 self
.bridgesByID
= {}
140 self
.hmac
= getHMACFunc(key
, hex=False)
141 self
.isSorted
= False
143 if answerParameters
is None:
144 answerParameters
= BridgeRingParameters()
145 self
.answerParameters
= answerParameters
148 for port
,count
in self
.answerParameters
.needPorts
:
149 #note that we really need to use the same key here, so that
150 # the mapping is in the same order for all subrings.
151 self
.subrings
.append( ('port',port
,count
,BridgeRing(key
,None)) )
152 for flag
,count
in self
.answerParameters
.needFlags
:
153 self
.subrings
.append( ('flag',flag
,count
,BridgeRing(key
,None)) )
157 def setName(self
, name
):
158 """Tag a unique name to this hashring for identification.
160 :param string name: The name for this hashring.
163 for tp
, val
, _
, subring
in self
.subrings
:
165 subring
.setName("%s (port-%s subring)" % (name
, val
))
167 subring
.setName("%s (%s subring)" % (name
, val
))
170 """Get the number of unique bridges this hashring contains."""
171 return len(self
.bridges
)
174 """Remove all bridges and mappings from this hashring and subrings."""
176 self
.bridgesByID
= {}
177 self
.isSorted
= False
180 for tp
, val
, count
, subring
in self
.subrings
:
183 def insert(self
, bridge
):
184 """Add a **bridge** to this hashring.
186 The bridge's position in the hashring is dependent upon the HMAC of
187 the raw hash digest of the bridge's ID key. The function used to
188 generate the HMAC, :ivar:`BridgeRing.hmac`, is unique to each
191 If the (presumably same) bridge is already at that determined position
192 in this hashring, replace the old one.
194 :type bridge: :class:`~bridgedb.Bridges.Bridge`
195 :param bridge: The bridge to insert into this hashring.
197 for tp
, val
, _
, subring
in self
.subrings
:
199 if val
== bridge
.orPort
:
200 subring
.insert(bridge
)
202 assert tp
== 'flag' and val
== 'stable'
203 if val
== 'stable' and bridge
.flags
.stable
:
204 subring
.insert(bridge
)
206 pos
= self
.hmac(bridge
.identity
)
207 if not pos
in self
.bridges
:
208 self
.sortedKeys
.append(pos
)
209 self
.isSorted
= False
210 self
.bridges
[pos
] = bridge
211 self
.bridgesByID
[bridge
.identity
] = bridge
212 logging
.debug("Adding %s to %s" % (bridge
.address
, self
.name
))
215 """Helper: put the keys in sorted order."""
216 if not self
.isSorted
:
217 self
.sortedKeys
.sort()
220 def _getBridgeKeysAt(self
, pos
, N
=1):
221 """Bisect a list of bridges at a specified position, **pos**, and
222 retrieve bridges from that point onwards, wrapping around the hashring
225 If the number of bridges requested, **N**, is larger that the size of
226 this hashring, return the entire ring. Otherwise:
228 1. Sort this bridges in this hashring, if it is currently unsorted.
230 2. Bisect the sorted bridges. If the bridge at the desired position,
231 **pos**, already exists within this hashring, the the bisection
232 result is the bridge at position **pos**. Otherwise, the bisection
233 result is the first position after **pos** which has a bridge
236 3. Try to obtain **N** bridges, starting at (and including) the
237 bridge in the requested position, **pos**.
239 a. If there aren't **N** bridges after **pos**, wrap back
240 around to the beginning of the hashring and obtain bridges
241 until we have **N** bridges.
243 4. Check that the number of bridges obtained is indeed **N**, then
246 :param bytes pos: The position to jump to. Any bridges returned will
247 start at this position in the hashring, if there is a bridge
248 assigned to that position. Otherwise, indexing will start at the
249 first position after this one which has a bridge assigned to it.
250 :param int N: The number of bridges to return.
252 :returns: A list of :class:`~bridgedb.Bridges.Bridge`s.
254 assert len(pos
) == DIGEST_LEN
255 if N
>= len(self
.sortedKeys
):
256 return self
.sortedKeys
257 if not self
.isSorted
:
259 idx
= bisect
.bisect_left(self
.sortedKeys
, pos
)
260 r
= self
.sortedKeys
[idx
:idx
+N
]
262 # wrap around as needed.
263 r
.extend(self
.sortedKeys
[:N
- len(r
)])
267 def filterDistinctSubnets(self
, fingerprints
):
268 """Given a chosen set of ``fingerprints`` of bridges to distribute,
269 filter the bridges such that they are in distinct subnets.
271 logging
.debug("Got %d possible bridges to filter" % len(fingerprints
))
276 for fingerprint
in fingerprints
:
277 bridge
= self
.bridges
[fingerprint
]
280 # HOTFIX for https://bugs.torproject.org/26150
281 if not bridge
.address
:
282 logging
.error("Got strange bridge with no address field set: %s"
283 % toHex(fingerprint
))
286 for subnet
in subnets
:
287 if bridge
.address
in subnet
:
290 ("Skipping distribution of bridge %s in a subnet which "
291 "contains another bridge we're already distributing")
297 bridges
.append(bridge
)
298 if bridge
.address
.version
== 4:
299 cidr
= str(bridge
.address
) + "/16"
301 cidr
= str(bridge
.address
) + "/32"
302 subnets
.append(ipaddr
.IPNetwork(cidr
))
306 def getBridges(self
, pos
, N
=1, filterBySubnet
=False):
307 """Return **N** bridges appearing in this hashring after a position.
309 :param bytes pos: The position to jump to. Any bridges returned will
310 start at this position in the hashring, if there is a bridge
311 assigned to that position. Otherwise, indexing will start at the
312 first position after this one which has a bridge assigned to it.
313 :param int N: The number of bridges to return.
315 :returns: A list of :class:`~bridgedb.bridges.Bridge`s.
318 for _
, _
, count
, subring
in self
.subrings
:
319 if len(subring
) < count
:
321 forced
.extend(subring
._getBridgeKeysAt
(pos
, count
))
325 # Oversample double the number we need, in case we need to
326 # filter them and some are within the same subnet.
327 for k
in forced
+ self
._getBridgeKeysAt
(pos
, N
+ N
):
332 "Got duplicate bridge %r in main hashring for position %r."
333 % (logSafely(binascii
.hexlify(k
).decode('utf-8')), binascii
.hexlify(pos
).decode('utf-8')))
337 bridges
= self
.filterDistinctSubnets(keys
)
339 bridges
= [self
.bridges
[k
] for k
in keys
]
341 bridges
= bridges
[:N
]
342 logging
.debug("Caller asked for N=%d, filterBySubnet=%s bridges. "
343 "Returning %d bridges." % (N
, filterBySubnet
, len(bridges
)))
347 def getBridgeByID(self
, fp
):
348 """Return the bridge whose identity digest is fp, or None if no such
350 for _
,_
,_
,subring
in self
.subrings
:
351 b
= subring
.getBridgeByID(fp
)
355 return self
.bridgesByID
.get(fp
)
357 def dumpAssignments(self
, f
, description
=""):
358 logging
.info("Dumping bridge assignments for %s..." % self
.name
)
359 for b
in self
.bridges
.values():
360 desc
= [ description
]
361 for tp
,val
,_
,subring
in self
.subrings
:
362 if subring
.getBridgeByID(b
.identity
):
363 desc
.append("%s=%s"%(tp
,val
))
364 f
.write("%s %s\n" % (b
.fingerprint
, " ".join(desc
).strip()))
367 class FixedBridgeSplitter(object):
368 """Splits bridges up based on an HMAC and assigns them to one of several
369 subhashrings with equal probability.
371 def __init__(self
, key
, rings
):
372 self
.hmac
= getHMACFunc(key
, hex=True)
373 self
.rings
= rings
[:]
375 def insert(self
, bridge
):
376 # Grab the first 4 bytes
377 digest
= self
.hmac(bridge
.identity
)
378 pos
= int( digest
[:8], 16 )
379 which
= pos
% len(self
.rings
)
380 self
.rings
[which
].insert(bridge
)
383 """Clear all bridges from every ring in ``rings``."""
388 """Returns the total number of bridges in all ``rings``."""
390 for ring
in self
.rings
:
394 def dumpAssignments(self
, filename
, description
=""):
395 """Write all bridges assigned to this hashring to ``filename``.
397 :param string description: If given, include a description next to the
398 index number of the ring from :attr:`FilteredBridgeSplitter.rings`
399 the following bridges were assigned to. For example, if the
400 description is ``"IPv6 obfs2 bridges"`` the line would read:
401 ``"IPv6 obfs2 bridges ring=3"``.
403 for index
, ring
in zip(range(len(self
.rings
)), self
.rings
):
404 ring
.dumpAssignments(filename
, "%s ring=%s" % (description
, index
))
407 class UnallocatedHolder(object):
408 """A pseudo-bridgeholder that ignores its bridges and leaves them
412 self
.fingerprints
= []
414 def insert(self
, bridge
):
415 logging
.debug("Leaving %s unallocated", bridge
.fingerprint
)
416 if not bridge
.fingerprint
in self
.fingerprints
:
417 self
.fingerprints
.append(bridge
.fingerprint
)
420 return len(self
.fingerprints
)
423 self
.fingerprints
= []
425 def dumpAssignments(self
, f
, description
=""):
426 with bridgedb
.Storage
.getDB() as db
:
427 allBridges
= db
.getAllBridges()
428 for bridge
in allBridges
:
429 if bridge
.hex_key
not in self
.fingerprints
:
431 dist
= bridge
.distributor
432 desc
= [ description
]
433 if dist
!= "unallocated":
435 f
.write("%s %s\n" % (bridge
.hex_key
, " ".join(desc
).strip()))
438 class BridgeSplitter(object):
439 """Splits incoming bridges up based on an HMAC, and assigns them to
440 sub-bridgeholders with different probabilities. Bridge ←→ BridgeSplitter
441 associations are recorded in a store.
443 def __init__(self
, key
):
444 self
.hmac
= getHMACFunc(key
, hex=True)
445 self
.ringsByName
= {}
449 self
.statsHolders
= []
453 for r
in self
.ringsByName
.values():
457 def addRing(self
, ring
, ringname
, p
=1):
458 """Add a new subring.
460 :param ring: The subring to add.
461 :param str ringname: This is used to record which bridges have been
462 assigned where in the store.
463 :param int p: The relative proportion of bridges to assign to this
466 self
.ringsByName
[ringname
] = ring
467 self
.pValues
.append(self
.totalP
)
468 self
.rings
.append(ringname
)
471 def addTracker(self
, t
):
472 """Adds a statistics tracker that gets told about every bridge we see.
474 self
.statsHolders
.append(t
)
477 for r
in self
.ringsByName
.values():
480 def insert(self
, bridge
):
483 for s
in self
.statsHolders
:
486 # The bridge must be running to insert it:
487 if not bridge
.flags
.running
:
490 validRings
= self
.rings
491 distribution_method
= None
493 # If the bridge already has a distributor, use that.
494 with bridgedb
.Storage
.getDB() as db
:
495 distribution_method
= db
.getBridgeDistributor(bridge
, validRings
)
497 if distribution_method
:
498 logging
.info("%s bridge %s was already in hashring %s" %
499 (self
.__class
__.__name
__, bridge
, distribution_method
))
501 # Check if the bridge requested a specific distribution method.
502 if bridge
.distribution_request
:
503 distribution_method
= bridge
.distribution_request
504 logging
.info("%s bridge %s requested placement in hashring %s"
505 % (self
.__class
__.__name
__, bridge
,
506 distribution_method
))
508 # If they requested not to be distributed, honor the request:
509 if distribution_method
== "none":
510 logging
.info("%s bridge %s requested to not be distributed."
511 % (self
.__class
__.__name
__, bridge
))
514 # If we didn't know what they are talking about, or they requested
515 # "any" distribution method, and we've never seen this bridge
516 # before, then determine where to place it.
517 if ((distribution_method
not in validRings
) or
518 (distribution_method
== "any")):
520 pos
= self
.hmac(bridge
.identity
)
521 n
= int(pos
[:8], 16) % self
.totalP
522 pos
= bisect
.bisect_right(self
.pValues
, n
) - 1
523 assert 0 <= pos
< len(self
.rings
)
524 distribution_method
= self
.rings
[pos
]
525 logging
.info(("%s placing bridge %s into hashring %s (via n=%s,"
526 " pos=%s).") % (self
.__class
__.__name
__, bridge
,
527 distribution_method
, n
, pos
))
529 with bridgedb
.Storage
.getDB() as db
:
530 ringname
= db
.insertBridgeAndGetRing(bridge
, distribution_method
,
531 time
.time(), validRings
)
534 ring
= self
.ringsByName
.get(ringname
)
538 logging
.warn("Couldn't recognise ring named: '%s'" % ringname
)
539 logging
.info("Current rings: %s" % " ".join(self
.ringsByName
))
541 def dumpAssignments(self
, f
, description
=""):
542 for name
,ring
in self
.ringsByName
.items():
543 ring
.dumpAssignments(f
, "%s %s" % (description
, name
))
546 class FilteredBridgeSplitter(object):
547 """Places bridges into subrings based upon sets of filters.
549 The set of subrings and conditions used to assign :class:`Bridge`s should
550 be passed to :meth:`~FilteredBridgeSplitter.addRing`.
553 def __init__(self
, key
, max_cached_rings
=3):
554 """Create a hashring which filters bridges into sub hashrings.
557 :param key: An HMAC key.
558 :param int max_cached_rings: XXX max_cached_rings appears to not be
561 :ivar filterRings: A dictionary of subrings which has the form
562 ``{ringname: (filterFn, subring)}``, where:
563 - ``ringname`` is a unique string identifying the subring.
564 - ``filterFn`` is a callable which filters Bridges in some
565 manner, i.e. by whether they are IPv4 or IPv6, etc.
566 - ``subring`` is any of the horribly-implemented,
567 I-guess-it-passes-for-some-sort-of-hashring classes in this
570 :ivar bridges: DOCDOC
571 :type distributorName: str
572 :ivar distributorName: The name of this splitter's distributor. See
573 :meth:`~bridgedb.distributors.https.distributor.HTTPSDistributor.setDistributorName`.
576 self
.filterRings
= {}
577 self
.hmac
= getHMACFunc(key
, hex=True)
579 self
.distributorName
= ''
582 self
.max_cached_rings
= max_cached_rings
585 return len(self
.bridges
)
589 self
.filterRings
= {}
591 def insert(self
, bridge
):
592 """Insert a bridge into all appropriate sub-hashrings.
594 For all sub-hashrings, the ``bridge`` will only be added iff it passes
595 the filter functions for that sub-hashring.
597 :type bridge: :class:`~bridgedb.bridges.Bridge`
598 :param bridge: The bridge to add.
600 # The bridge must be running to insert it:
601 if not bridge
.flags
.running
:
602 logging
.warn(("Skipping hashring insertion for non-running "
603 "bridge: %s") % bridge
)
607 logging
.debug("Inserting %s into hashring..." % bridge
)
608 for old_bridge
in self
.bridges
[:]:
609 if bridge
.fingerprint
== old_bridge
.fingerprint
:
610 self
.bridges
[index
] = bridge
614 self
.bridges
.append(bridge
)
615 for ringname
, (filterFn
, subring
) in self
.filterRings
.items():
617 subring
.insert(bridge
)
618 logging
.debug("Inserted bridge %s into %s subhashring." %
621 def extractFilterNames(self
, ringname
):
622 """Get the names of the filters applied to a particular sub hashring.
624 :param str ringname: A unique name identifying a sub hashring.
626 :returns: A sorted list of strings, all the function names of the
627 filters applied to the sub hashring named **ringname**.
631 for filterName
in [x
.__name
__ for x
in list(ringname
)]:
632 # Using `assignBridgesToSubring.__name__` gives us a messy
633 # string which includes all parameters and memory addresses. Get
634 # rid of this by partitioning at the first `(`:
635 realFilterName
= filterName
.partition('(')[0]
636 filterNames
.append(realFilterName
)
641 def addRing(self
, subring
, ringname
, filterFn
, populate_from
=None):
642 """Add a subring to this hashring.
644 :param subring: The subring to add.
645 :param str ringname: A unique name for identifying the new subring.
646 :param filterFn: A function whose input is a :class:`Bridge`, and
647 returns True/False based on some filtration criteria.
648 :type populate_from: iterable or None
649 :param populate_from: A group of :class:`Bridge`s. If given, the newly
650 added subring will be populated with these bridges.
652 :returns: False if there was a problem adding the subring, True
655 # XXX I think subring and ringname are switched in this function, or
656 # at least that whatever is passed into this function as as the
657 # `ringname` parameter from somewhere else is odd; for example, with
658 # the original code, which was `log.debug("Inserted %d bridges into
659 # hashring '%s'!" % (inserted, ringname))`, this log message appears:
661 # Jan 04 23:18:37 [INFO] Inserted 12 bridges into hashring
662 # frozenset([<function byIPv4 at 0x2d67cf8>, <function
663 # assignBridgesToSubring(<function hmac_fn at 0x3778398>, 4, 0) at
666 # I suppose since it contains memory addresses, it *is* technically
667 # likely to be a unique string, but it is messy.
669 if ringname
in self
.filterRings
.keys():
670 logging
.fatal("%s hashring already has a subring named '%s'!"
671 % (self
.distributorName
, ringname
))
674 filterNames
= self
.extractFilterNames(ringname
)
675 subringName
= [self
.distributorName
]
677 for filterName
in filterNames
:
678 if filterName
.startswith('assignBridgesToSubring'):
679 subringNumber
= filterName
.lstrip('assignBridgesToSubring')
681 subringName
.append(filterName
.lstrip('by'))
682 if subring
.name
and 'Proxy' in subring
.name
:
683 subringName
.append('Proxy')
685 subringName
.append(subringNumber
)
686 subringName
= '-'.join([x
for x
in subringName
])
687 subring
.setName(subringName
)
689 logging
.info("Adding %s subring %s to the %s Distributor's hashring..." %
690 (subring
.name
, subringNumber
, self
.distributorName
))
691 logging
.info(" Subring filters: %s" % filterNames
)
693 #TODO: drop LRU ring if len(self.filterRings) > self.max_cached_rings
694 self
.filterRings
[ringname
] = (filterFn
, subring
)
698 for bridge
in populate_from
:
699 if isinstance(bridge
, Bridge
) and filterFn(bridge
):
700 subring
.insert(bridge
)
702 logging
.info("Bridges inserted into %s subring %s: %d"
703 % (subring
.name
, subringNumber
, inserted
))
707 def dumpAssignments(self
, f
, description
=""):
708 # one ring per filter set
709 # bridges may be present in multiple filter sets
710 # only one line should be dumped per bridge
712 for b
in self
.bridges
:
713 # gather all the filter descriptions
715 for n
,(g
,r
) in self
.filterRings
.items():
717 # ghetto. get subring flags, ports
718 for tp
,val
,_
,subring
in r
.subrings
:
719 if subring
.getBridgeByID(b
.identity
):
720 desc
.append("%s=%s"%(tp
,val
))
722 desc
.extend(g
.description
.split())
724 desc
.append(g
.description
)
727 logging
.debug("%s supports %d transports" % (b
, len(b
.transports
)))
728 for transport
in b
.transports
:
729 desc
.append("transport=%s"%(transport
.methodname
))
737 grouped
[l
] = "%s,%s"%(grouped
[l
],r
)
742 desc
= "%s %s" % (description
.strip(),
743 " ".join([v
for k
,v
in grouped
.items()]).strip())
744 f
.write("%s %s\n" % (b
.fingerprint
, desc
))