Avoid giving out bridges with broken tor versions.
[tor-bridgedb.git] / bridgedb / main.py
blob44d06682c7108d86d921259ff61a9e2191a90f86
1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_Main -*-
3 # This file is part of BridgeDB, a Tor bridge distribution system.
5 # :authors: please see the AUTHORS file for attributions
6 # :copyright: (c) 2013-2017, Isis Lovecruft
7 # (c) 2013-2017, Matthew Finkel
8 # (c) 2007-2017, Nick Mathewson
9 # (c) 2007-2017, The Tor Project, Inc.
10 # :license: see LICENSE for licensing information
12 """This module sets up BridgeDB and starts the servers running."""
14 import logging
15 import os
16 import signal
17 import sys
18 import time
20 from twisted.internet import reactor
21 from twisted.internet import task
23 from bridgedb import crypto
24 from bridgedb import persistent
25 from bridgedb import proxy
26 from bridgedb import runner
27 from bridgedb import util
28 from bridgedb import metrics
29 from bridgedb import antibot
30 from bridgedb.bridges import MalformedBridgeInfo
31 from bridgedb.bridges import MissingServerDescriptorDigest
32 from bridgedb.bridges import ServerDescriptorDigestMismatch
33 from bridgedb.bridges import ServerDescriptorWithoutNetworkstatus
34 from bridgedb.bridges import Bridge
35 from bridgedb.configure import loadConfig
36 from bridgedb.distributors.email.distributor import EmailDistributor
37 from bridgedb.distributors.https.distributor import HTTPSDistributor
38 from bridgedb.distributors.moat.distributor import MoatDistributor
39 from bridgedb.parse import descriptors
40 from bridgedb.parse.blacklist import parseBridgeBlacklistFile
41 from bridgedb.parse.versions import parseVersionsList
43 import bridgedb.Storage
45 from bridgedb import bridgerings
46 from bridgedb.Stability import updateBridgeHistory
49 def expandBridgeAuthDir(authdir, filename):
50 """Expands a descriptor ``filename`` relative to which of the
51 BRIDGE_AUTHORITY_DIRECTORIES, ``authdir`` it resides within.
52 """
53 path = filename
55 if not authdir in filename or not os.path.isabs(filename):
56 path = os.path.abspath(os.path.expanduser(os.sep.join([authdir, filename])))
58 return path
60 def writeAssignments(hashring, filename):
61 """Dump bridge distributor assignments to disk.
63 :type hashring: A :class:`~bridgedb.bridgerings.BridgeSplitter`
64 :ivar hashring: A class which takes an HMAC key and splits bridges
65 into their hashring assignments.
66 :param str filename: The filename to write the assignments to.
67 """
68 logging.debug("Dumping pool assignments to file: '%s'" % filename)
70 try:
71 with open(filename, 'a') as fh:
72 fh.write("bridge-pool-assignment %s\n" %
73 time.strftime("%Y-%m-%d %H:%M:%S"))
74 hashring.dumpAssignments(fh)
75 except IOError:
76 logging.info("I/O error while writing assignments to: '%s'" % filename)
78 def writeMetrics(filename, measurementInterval):
79 """Dump usage metrics to disk.
81 :param str filename: The filename to write the metrics to.
82 :param int measurementInterval: The number of seconds after which we rotate
83 and dump our metrics.
84 """
86 logging.debug("Dumping metrics to file: '%s'" % filename)
88 try:
89 with open(filename, 'w') as fh:
90 metrics.export(fh, measurementInterval)
91 except IOError as err:
92 logging.error("Failed to write metrics to '%s': %s" % (filename, err))
94 def load(state, hashring, clear=False):
95 """Read and parse all descriptors, and load into a bridge hashring.
97 Read all the appropriate bridge files from the saved
98 :class:`~bridgedb.persistent.State`, parse and validate them, and then
99 store them into our ``state.hashring`` instance. The ``state`` will be
100 saved again at the end of this function.
102 :type hashring: :class:`~bridgedb.bridgerings.BridgeSplitter`
103 :param hashring: A class which provides a mechanism for HMACing
104 Bridges in order to assign them to hashrings.
105 :param boolean clear: If True, clear all previous bridges from the
106 hashring before parsing for new ones.
108 if not state:
109 logging.fatal("bridgedb.main.load() could not retrieve state!")
110 sys.exit(2)
112 if clear:
113 logging.info("Clearing old bridges...")
114 hashring.clear()
116 logging.info("Loading bridges...")
118 ignoreNetworkstatus = state.IGNORE_NETWORKSTATUS
119 if ignoreNetworkstatus:
120 logging.info("Ignoring BridgeAuthority networkstatus documents.")
122 for auth in state.BRIDGE_AUTHORITY_DIRECTORIES:
123 logging.info("Processing descriptors in %s directory..." % auth)
125 bridges = {}
126 timestamps = {}
128 fn = expandBridgeAuthDir(auth, state.STATUS_FILE)
129 logging.info("Opening networkstatus file: %s" % fn)
130 networkstatuses = descriptors.parseNetworkStatusFile(fn)
131 logging.debug("Closing networkstatus file: %s" % fn)
133 logging.info("Processing networkstatus descriptors...")
134 for router in networkstatuses:
135 bridge = Bridge()
136 bridge.updateFromNetworkStatus(router, ignoreNetworkstatus)
137 try:
138 bridge.assertOK()
139 except MalformedBridgeInfo as error:
140 logging.warn(str(error))
141 else:
142 bridges[bridge.fingerprint] = bridge
144 for filename in state.BRIDGE_FILES:
145 fn = expandBridgeAuthDir(auth, filename)
146 logging.info("Opening bridge-server-descriptor file: '%s'" % fn)
147 serverdescriptors = descriptors.parseServerDescriptorsFile(fn)
148 logging.debug("Closing bridge-server-descriptor file: '%s'" % fn)
150 for router in serverdescriptors:
151 try:
152 bridge = bridges[router.fingerprint]
153 except KeyError:
154 logging.warn(
155 ("Received server descriptor for bridge '%s' which wasn't "
156 "in the networkstatus!") % router.fingerprint)
157 if ignoreNetworkstatus:
158 bridge = Bridge()
159 else:
160 continue
162 try:
163 bridge.updateFromServerDescriptor(router, ignoreNetworkstatus)
164 except (ServerDescriptorWithoutNetworkstatus,
165 MissingServerDescriptorDigest,
166 ServerDescriptorDigestMismatch) as error:
167 logging.warn(str(error))
168 # Reject any routers whose server descriptors didn't pass
169 # :meth:`~bridges.Bridge._checkServerDescriptor`, i.e. those
170 # bridges who don't have corresponding networkstatus
171 # documents, or whose server descriptor digests don't check
172 # out:
173 bridges.pop(router.fingerprint)
174 continue
176 if state.COLLECT_TIMESTAMPS:
177 # Update timestamps from server descriptors, not from network
178 # status descriptors (because networkstatus documents and
179 # descriptors aren't authenticated in any way):
180 if bridge.fingerprint in timestamps.keys():
181 timestamps[bridge.fingerprint].append(router.published)
182 else:
183 timestamps[bridge.fingerprint] = [router.published]
185 eifiles = [expandBridgeAuthDir(auth, fn) for fn in state.EXTRA_INFO_FILES]
186 extrainfos = descriptors.parseExtraInfoFiles(*eifiles)
187 for fingerprint, router in extrainfos.items():
188 try:
189 bridges[fingerprint].updateFromExtraInfoDescriptor(router)
190 except MalformedBridgeInfo as error:
191 logging.warn(str(error))
192 except KeyError as error:
193 logging.warn(("Received extrainfo descriptor for bridge '%s', "
194 "but could not find bridge with that fingerprint.")
195 % router.fingerprint)
197 blacklist = parseBridgeBlacklistFile(state.NO_DISTRIBUTION_FILE)
199 inserted = 0
200 logging.info("Trying to insert %d bridges into hashring, %d of which "
201 "have the 'Running' flag..." % (len(bridges),
202 len(list(filter(lambda b: b.flags.running, bridges.values())))))
204 for fingerprint, bridge in bridges.items():
205 # Skip insertion of bridges which are geolocated to be in one of the
206 # NO_DISTRIBUTION_COUNTRIES, a.k.a. the countries we don't distribute
207 # bridges from:
208 if bridge.country in state.NO_DISTRIBUTION_COUNTRIES:
209 logging.warn("Not distributing Bridge %s %s:%s in country %s!" %
210 (bridge, bridge.address, bridge.orPort, bridge.country))
211 # Skip insertion of blacklisted bridges.
212 elif bridge in blacklist.keys():
213 logging.warn("Not distributing blacklisted Bridge %s %s:%s: %s" %
214 (bridge, bridge.address, bridge.orPort, blacklist[bridge]))
215 # Skip bridges that are running a blacklisted version of Tor.
216 elif bridge.runsVersion(state.BLACKLISTED_TOR_VERSIONS):
217 logging.warn("Not distributing bridge %s because it runs blacklisted "
218 "Tor version %s." % (router.fingerprint, bridge.software))
219 else:
220 # If the bridge is not running, then it is skipped during the
221 # insertion process.
222 hashring.insert(bridge)
223 inserted += 1
224 logging.info("Tried to insert %d bridges into hashring. Resulting "
225 "hashring is of length %d." % (inserted, len(hashring)))
227 if state.COLLECT_TIMESTAMPS:
228 reactor.callInThread(updateBridgeHistory, bridges, timestamps)
230 state.save()
232 def _reloadFn(*args):
233 """Placeholder callback function for :func:`_handleSIGHUP`."""
234 return True
236 def _handleSIGHUP(*args):
237 """Called when we receive a SIGHUP; invokes _reloadFn."""
238 reactor.callInThread(_reloadFn)
240 def replaceBridgeRings(current, replacement):
241 """Replace the current thing with the new one"""
242 current.hashring = replacement.hashring
244 def createBridgeRings(cfg, proxyList, key):
245 """Create the bridge distributors defined by the config file
247 :type cfg: :class:`Conf`
248 :param cfg: The current configuration, including any in-memory settings
249 (i.e. settings whose values were not obtained from the config file,
250 but were set via a function somewhere)
251 :type proxyList: :class:`~bridgedb.proxy.ProxySet`
252 :param proxyList: The container for the IP addresses of any currently
253 known open proxies.
254 :param bytes key: Hashring master key
255 :rtype: tuple
256 :returns: A :class:`~bridgedb.bridgerings.BridgeSplitter` hashring, an
257 :class:`~bridgedb.distributors.https.distributor.HTTPSDistributor` or None, and an
258 :class:`~bridgedb.distributors.email.distributor.EmailDistributor` or None, and an
259 :class:`~bridgedb.distributors.moat.distributor.MoatDistributor` or None.
261 # Create a BridgeSplitter to assign the bridges to the different
262 # distributors.
263 hashring = bridgerings.BridgeSplitter(crypto.getHMAC(key, "Hashring-Key"))
264 logging.debug("Created hashring: %r" % hashring)
266 # Create ring parameters.
267 ringParams = bridgerings.BridgeRingParameters(needPorts=cfg.FORCE_PORTS,
268 needFlags=cfg.FORCE_FLAGS)
270 emailDistributor = ipDistributor = moatDistributor = None
272 # As appropriate, create a Moat distributor.
273 if cfg.MOAT_DIST and cfg.MOAT_SHARE:
274 logging.debug("Setting up Moat Distributor...")
275 moatDistributor = MoatDistributor(
276 cfg.MOAT_N_IP_CLUSTERS,
277 crypto.getHMAC(key, "Moat-Dist-Key"),
278 proxyList,
279 answerParameters=ringParams)
280 hashring.addRing(moatDistributor.hashring, "moat", cfg.MOAT_SHARE)
282 # As appropriate, create an IP-based distributor.
283 if cfg.HTTPS_DIST and cfg.HTTPS_SHARE:
284 logging.debug("Setting up HTTPS Distributor...")
285 ipDistributor = HTTPSDistributor(
286 cfg.N_IP_CLUSTERS,
287 crypto.getHMAC(key, "HTTPS-IP-Dist-Key"),
288 proxyList,
289 answerParameters=ringParams)
290 hashring.addRing(ipDistributor.hashring, "https", cfg.HTTPS_SHARE)
292 # As appropriate, create an email-based distributor.
293 if cfg.EMAIL_DIST and cfg.EMAIL_SHARE:
294 logging.debug("Setting up Email Distributor...")
295 emailDistributor = EmailDistributor(
296 crypto.getHMAC(key, "Email-Dist-Key"),
297 cfg.EMAIL_DOMAIN_MAP.copy(),
298 cfg.EMAIL_DOMAIN_RULES.copy(),
299 answerParameters=ringParams,
300 whitelist=cfg.EMAIL_WHITELIST.copy())
301 hashring.addRing(emailDistributor.hashring, "email", cfg.EMAIL_SHARE)
303 # As appropriate, tell the hashring to leave some bridges unallocated.
304 if cfg.RESERVED_SHARE:
305 hashring.addRing(bridgerings.UnallocatedHolder(),
306 "unallocated",
307 cfg.RESERVED_SHARE)
309 return hashring, emailDistributor, ipDistributor, moatDistributor
311 def run(options, reactor=reactor):
312 """This is BridgeDB's main entry point and main runtime loop.
314 Given the parsed commandline options, this function handles locating the
315 configuration file, loading and parsing it, and then either (re)parsing
316 plus (re)starting the servers, or dumping bridge assignments to files.
318 :type options: :class:`bridgedb.parse.options.MainOptions`
319 :param options: A pre-parsed options class containing any arguments and
320 options given in the commandline we were called with.
321 :type state: :class:`bridgedb.persistent.State`
322 :ivar state: A persistent state object which holds config changes.
323 :param reactor: An implementer of
324 :api:`twisted.internet.interfaces.IReactorCore`. This parameter is
325 mainly for testing; the default
326 :api:`twisted.internet.epollreactor.EPollReactor` is fine for normal
327 application runs.
329 # Change to the directory where we're supposed to run. This must be done
330 # before parsing the config file, otherwise there will need to be two
331 # copies of the config file, one in the directory BridgeDB is started in,
332 # and another in the directory it changes into.
333 os.chdir(options['rundir'])
334 if options['verbosity'] <= 10: # Corresponds to logging.DEBUG
335 print("Changed to runtime directory %r" % os.getcwd())
337 config = loadConfig(options['config'])
338 config.RUN_IN_DIR = options['rundir']
340 # Set up logging as early as possible. We cannot import from the bridgedb
341 # package any of our modules which import :mod:`logging` and start using
342 # it, at least, not until :func:`safelog.configureLogging` is
343 # called. Otherwise a default handler that logs to the console will be
344 # created by the imported module, and all further calls to
345 # :func:`logging.basicConfig` will be ignored.
346 util.configureLogging(config)
348 if options.subCommand is not None:
349 runSubcommand(options, config)
351 # Write the pidfile only after any options.subCommands are run (because
352 # these exit when they are finished). Otherwise, if there is a subcommand,
353 # the real PIDFILE would get overwritten with the PID of the temporary
354 # bridgedb process running the subcommand.
355 if config.PIDFILE:
356 logging.debug("Writing server PID to file: '%s'" % config.PIDFILE)
357 with open(config.PIDFILE, 'w') as pidfile:
358 pidfile.write("%s\n" % os.getpid())
359 pidfile.flush()
361 # Let our pluggable transport class know what transports are resistant to
362 # active probing. We need to know because we shouldn't hand out a
363 # probing-vulnerable transport on a bridge that supports a
364 # probing-resistant transport. See
365 # <https://bugs.torproject.org/28655> for details.
366 from bridgedb.bridges import PluggableTransport
367 PluggableTransport.probing_resistant_transports = config.PROBING_RESISTANT_TRANSPORTS
369 from bridgedb import persistent
371 state = persistent.State(config=config)
373 from bridgedb.distributors.email.server import addServer as addSMTPServer
374 from bridgedb.distributors.https.server import addWebServer
375 from bridgedb.distributors.moat.server import addMoatServer
377 # Load the master key, or create a new one.
378 key = crypto.getKey(config.MASTER_KEY_FILE)
379 proxies = proxy.ProxySet()
380 emailDistributor = None
381 ipDistributor = None
382 moatDistributor = None
384 # Save our state
385 state.key = key
386 state.save()
388 def reload(inThread=True): # pragma: no cover
389 """Reload settings, proxy lists, and bridges.
391 State should be saved before calling this method, and will be saved
392 again at the end of it.
394 The internal variables ``cfg`` and ``hashring`` are taken from a
395 :class:`~bridgedb.persistent.State` instance, which has been saved to a
396 statefile with :meth:`bridgedb.persistent.State.save`.
398 :type cfg: :class:`Conf`
399 :ivar cfg: The current configuration, including any in-memory
400 settings (i.e. settings whose values were not obtained from the
401 config file, but were set via a function somewhere)
402 :type hashring: A :class:`~bridgedb.bridgerings.BridgeSplitter`
403 :ivar hashring: A class which takes an HMAC key and splits bridges
404 into their hashring assignments.
406 logging.debug("Caught SIGHUP")
407 logging.info("Reloading...")
409 logging.info("Loading saved state...")
410 state = persistent.load()
411 cfg = loadConfig(state.CONFIG_FILE, state.config)
412 logging.info("Updating any changed settings...")
413 state.useChangedSettings(cfg)
415 level = getattr(state, 'LOGLEVEL', 'WARNING')
416 logging.info("Updating log level to: '%s'" % level)
417 level = getattr(logging, level)
418 logging.getLogger().setLevel(level)
420 logging.info("Reloading the list of open proxies...")
421 for proxyfile in cfg.PROXY_LIST_FILES:
422 logging.info("Loading proxies from: %s" % proxyfile)
423 proxy.loadProxiesFromFile(proxyfile, proxies, removeStale=True)
424 metrics.setProxies(proxies)
426 state.BLACKLISTED_TOR_VERSIONS = parseVersionsList(state.BLACKLISTED_TOR_VERSIONS)
428 logging.info("Reloading blacklisted request headers...")
429 antibot.loadBlacklistedRequestHeaders(config.BLACKLISTED_REQUEST_HEADERS_FILE)
430 logging.info("Reloading decoy bridges...")
431 antibot.loadDecoyBridges(config.DECOY_BRIDGES_FILE)
433 (hashring,
434 emailDistributorTmp,
435 ipDistributorTmp,
436 moatDistributorTmp) = createBridgeRings(cfg, proxies, key)
438 # Initialize our DB.
439 bridgedb.Storage.initializeDBLock()
440 bridgedb.Storage.setDBFilename(cfg.DB_FILE + ".sqlite")
441 logging.info("Reparsing bridge descriptors...")
442 load(state, hashring, clear=False)
443 logging.info("Bridges loaded: %d" % len(hashring))
445 if emailDistributorTmp is not None:
446 emailDistributorTmp.prepopulateRings() # create default rings
447 else:
448 logging.warn("No email distributor created!")
450 if ipDistributorTmp is not None:
451 ipDistributorTmp.prepopulateRings() # create default rings
452 else:
453 logging.warn("No HTTP(S) distributor created!")
455 if moatDistributorTmp is not None:
456 moatDistributorTmp.prepopulateRings()
457 else:
458 logging.warn("No Moat distributor created!")
460 # Dump bridge pool assignments to disk.
461 writeAssignments(hashring, state.ASSIGNMENTS_FILE)
462 state.save()
464 if inThread:
465 # XXX shutdown the distributors if they were previously running
466 # and should now be disabled
467 if moatDistributorTmp:
468 reactor.callFromThread(replaceBridgeRings,
469 moatDistributor, moatDistributorTmp)
470 if ipDistributorTmp:
471 reactor.callFromThread(replaceBridgeRings,
472 ipDistributor, ipDistributorTmp)
473 if emailDistributorTmp:
474 reactor.callFromThread(replaceBridgeRings,
475 emailDistributor, emailDistributorTmp)
476 else:
477 # We're still starting up. Return these distributors so
478 # they are configured in the outer-namespace
479 return emailDistributorTmp, ipDistributorTmp, moatDistributorTmp
481 global _reloadFn
482 _reloadFn = reload
483 signal.signal(signal.SIGHUP, _handleSIGHUP)
485 if reactor: # pragma: no cover
486 # And actually load it to start parsing. Get back our distributors.
487 emailDistributor, ipDistributor, moatDistributor = reload(False)
489 # Configure all servers:
490 if config.MOAT_DIST and config.MOAT_SHARE:
491 addMoatServer(config, moatDistributor)
492 if config.HTTPS_DIST and config.HTTPS_SHARE:
493 addWebServer(config, ipDistributor)
494 if config.EMAIL_DIST and config.EMAIL_SHARE:
495 addSMTPServer(config, emailDistributor)
497 metrics.setSupportedTransports(config.SUPPORTED_TRANSPORTS)
499 tasks = {}
501 # Setup all our repeating tasks:
502 if config.TASKS['GET_TOR_EXIT_LIST']:
503 tasks['GET_TOR_EXIT_LIST'] = task.LoopingCall(
504 proxy.downloadTorExits,
505 proxies,
506 config.SERVER_PUBLIC_EXTERNAL_IP)
508 if config.TASKS.get('DELETE_UNPARSEABLE_DESCRIPTORS'):
509 delUnparseableSecs = config.TASKS['DELETE_UNPARSEABLE_DESCRIPTORS']
510 else:
511 delUnparseableSecs = 24 * 60 * 60 # Default to 24 hours
513 # We use the directory name of STATUS_FILE, since that directory
514 # is where the *.unparseable descriptor files will be written to.
515 tasks['DELETE_UNPARSEABLE_DESCRIPTORS'] = task.LoopingCall(
516 runner.cleanupUnparseableDescriptors,
517 os.path.dirname(config.STATUS_FILE), delUnparseableSecs)
519 measurementInterval, _ = config.TASKS['EXPORT_METRICS']
520 tasks['EXPORT_METRICS'] = task.LoopingCall(
521 writeMetrics, state.METRICS_FILE, measurementInterval)
523 # Schedule all configured repeating tasks:
524 for name, value in config.TASKS.items():
525 seconds, startNow = value
526 if seconds:
527 try:
528 # Set now to False to get the servers up and running when
529 # first started, rather than spend a bunch of time in
530 # scheduled tasks.
531 tasks[name].start(abs(seconds), now=startNow)
532 except KeyError:
533 logging.info("Task %s is disabled and will not run." % name)
534 else:
535 logging.info("Scheduled task %s to run every %s seconds."
536 % (name, seconds))
538 # Actually run the servers.
539 try:
540 if reactor and not reactor.running:
541 logging.info("Starting reactors.")
542 reactor.run()
543 except KeyboardInterrupt: # pragma: no cover
544 logging.fatal("Received keyboard interrupt. Shutting down...")
545 finally:
546 if config.PIDFILE:
547 os.unlink(config.PIDFILE)
548 logging.info("Exiting...")
549 sys.exit()
551 def runSubcommand(options, config):
552 """Run a subcommand from the 'Commands' section of the bridgedb help menu.
554 :type options: :class:`bridgedb.opt.MainOptions`
555 :param options: A pre-parsed options class containing any arguments and
556 options given in the commandline we were called with.
557 :type config: :class:`bridgedb.main.Conf`
558 :param config: The current configuration.
559 :raises: :exc:`SystemExit` when all subCommands and subOptions have
560 finished running.
562 # Make sure that the runner module is only imported after logging is set
563 # up, otherwise we run into the same logging configuration problem as
564 # mentioned above with the email.server and https.server.
565 from bridgedb import runner
567 if options.subCommand is not None:
568 logging.debug("Running BridgeDB command: '%s'" % options.subCommand)
570 if 'descriptors' in options.subOptions:
571 runner.generateDescriptors(int(options.subOptions['descriptors']), config.RUN_IN_DIR)
572 sys.exit(0)