Re-flow indentation of bugfix items.
[tor-bridgedb.git] / bridgedb / main.py
blob70001a907a3f91c57248dc024277bef9c24e90f7
1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_Main -*-
3 # This file is part of BridgeDB, a Tor bridge distribution system.
5 # :authors: please see the AUTHORS file for attributions
6 # :copyright: (c) 2013-2017, Isis Lovecruft
7 # (c) 2013-2017, Matthew Finkel
8 # (c) 2007-2017, Nick Mathewson
9 # (c) 2007-2017, The Tor Project, Inc.
10 # :license: see LICENSE for licensing information
12 """This module sets up BridgeDB and starts the servers running."""
14 import logging
15 import os
16 import signal
17 import sys
18 import time
20 from twisted.internet import reactor
21 from twisted.internet import task
23 from bridgedb import crypto
24 from bridgedb import persistent
25 from bridgedb import proxy
26 from bridgedb import runner
27 from bridgedb import util
28 from bridgedb import metrics
29 from bridgedb import antibot
30 from bridgedb.bridges import MalformedBridgeInfo
31 from bridgedb.bridges import MissingServerDescriptorDigest
32 from bridgedb.bridges import ServerDescriptorDigestMismatch
33 from bridgedb.bridges import ServerDescriptorWithoutNetworkstatus
34 from bridgedb.bridges import Bridge
35 from bridgedb.configure import loadConfig
36 from bridgedb.distributors.email.distributor import EmailDistributor
37 from bridgedb.distributors.https.distributor import HTTPSDistributor
38 from bridgedb.distributors.moat.distributor import MoatDistributor
39 from bridgedb.parse import descriptors
40 from bridgedb.parse.blacklist import parseBridgeBlacklistFile
42 import bridgedb.Storage
44 from bridgedb import bridgerings
45 from bridgedb.Stability import updateBridgeHistory
48 def expandBridgeAuthDir(authdir, filename):
49 """Expands a descriptor ``filename`` relative to which of the
50 BRIDGE_AUTHORITY_DIRECTORIES, ``authdir`` it resides within.
51 """
52 path = filename
54 if not authdir in filename or not os.path.isabs(filename):
55 path = os.path.abspath(os.path.expanduser(os.sep.join([authdir, filename])))
57 return path
59 def writeAssignments(hashring, filename):
60 """Dump bridge distributor assignments to disk.
62 :type hashring: A :class:`~bridgedb.bridgerings.BridgeSplitter`
63 :ivar hashring: A class which takes an HMAC key and splits bridges
64 into their hashring assignments.
65 :param str filename: The filename to write the assignments to.
66 """
67 logging.debug("Dumping pool assignments to file: '%s'" % filename)
69 try:
70 with open(filename, 'a') as fh:
71 fh.write("bridge-pool-assignment %s\n" %
72 time.strftime("%Y-%m-%d %H:%M:%S"))
73 hashring.dumpAssignments(fh)
74 except IOError:
75 logging.info("I/O error while writing assignments to: '%s'" % filename)
77 def writeMetrics(filename, measurementInterval):
78 """Dump usage metrics to disk.
80 :param str filename: The filename to write the metrics to.
81 :param int measurementInterval: The number of seconds after which we rotate
82 and dump our metrics.
83 """
85 logging.debug("Dumping metrics to file: '%s'" % filename)
87 try:
88 with open(filename, 'w') as fh:
89 metrics.export(fh, measurementInterval)
90 except IOError as err:
91 logging.error("Failed to write metrics to '%s': %s" % (filename, err))
93 def load(state, hashring, clear=False):
94 """Read and parse all descriptors, and load into a bridge hashring.
96 Read all the appropriate bridge files from the saved
97 :class:`~bridgedb.persistent.State`, parse and validate them, and then
98 store them into our ``state.hashring`` instance. The ``state`` will be
99 saved again at the end of this function.
101 :type hashring: :class:`~bridgedb.bridgerings.BridgeSplitter`
102 :param hashring: A class which provides a mechanism for HMACing
103 Bridges in order to assign them to hashrings.
104 :param boolean clear: If True, clear all previous bridges from the
105 hashring before parsing for new ones.
107 if not state:
108 logging.fatal("bridgedb.main.load() could not retrieve state!")
109 sys.exit(2)
111 if clear:
112 logging.info("Clearing old bridges...")
113 hashring.clear()
115 logging.info("Loading bridges...")
117 ignoreNetworkstatus = state.IGNORE_NETWORKSTATUS
118 if ignoreNetworkstatus:
119 logging.info("Ignoring BridgeAuthority networkstatus documents.")
121 for auth in state.BRIDGE_AUTHORITY_DIRECTORIES:
122 logging.info("Processing descriptors in %s directory..." % auth)
124 bridges = {}
125 timestamps = {}
127 fn = expandBridgeAuthDir(auth, state.STATUS_FILE)
128 logging.info("Opening networkstatus file: %s" % fn)
129 networkstatuses = descriptors.parseNetworkStatusFile(fn)
130 logging.debug("Closing networkstatus file: %s" % fn)
132 logging.info("Processing networkstatus descriptors...")
133 for router in networkstatuses:
134 bridge = Bridge()
135 bridge.updateFromNetworkStatus(router, ignoreNetworkstatus)
136 try:
137 bridge.assertOK()
138 except MalformedBridgeInfo as error:
139 logging.warn(str(error))
140 else:
141 bridges[bridge.fingerprint] = bridge
143 for filename in state.BRIDGE_FILES:
144 fn = expandBridgeAuthDir(auth, filename)
145 logging.info("Opening bridge-server-descriptor file: '%s'" % fn)
146 serverdescriptors = descriptors.parseServerDescriptorsFile(fn)
147 logging.debug("Closing bridge-server-descriptor file: '%s'" % fn)
149 for router in serverdescriptors:
150 try:
151 bridge = bridges[router.fingerprint]
152 except KeyError:
153 logging.warn(
154 ("Received server descriptor for bridge '%s' which wasn't "
155 "in the networkstatus!") % router.fingerprint)
156 if ignoreNetworkstatus:
157 bridge = Bridge()
158 else:
159 continue
161 try:
162 bridge.updateFromServerDescriptor(router, ignoreNetworkstatus)
163 except (ServerDescriptorWithoutNetworkstatus,
164 MissingServerDescriptorDigest,
165 ServerDescriptorDigestMismatch) as error:
166 logging.warn(str(error))
167 # Reject any routers whose server descriptors didn't pass
168 # :meth:`~bridges.Bridge._checkServerDescriptor`, i.e. those
169 # bridges who don't have corresponding networkstatus
170 # documents, or whose server descriptor digests don't check
171 # out:
172 bridges.pop(router.fingerprint)
173 continue
175 if state.COLLECT_TIMESTAMPS:
176 # Update timestamps from server descriptors, not from network
177 # status descriptors (because networkstatus documents and
178 # descriptors aren't authenticated in any way):
179 if bridge.fingerprint in timestamps.keys():
180 timestamps[bridge.fingerprint].append(router.published)
181 else:
182 timestamps[bridge.fingerprint] = [router.published]
184 eifiles = [expandBridgeAuthDir(auth, fn) for fn in state.EXTRA_INFO_FILES]
185 extrainfos = descriptors.parseExtraInfoFiles(*eifiles)
186 for fingerprint, router in extrainfos.items():
187 try:
188 bridges[fingerprint].updateFromExtraInfoDescriptor(router)
189 except MalformedBridgeInfo as error:
190 logging.warn(str(error))
191 except KeyError as error:
192 logging.warn(("Received extrainfo descriptor for bridge '%s', "
193 "but could not find bridge with that fingerprint.")
194 % router.fingerprint)
196 blacklist = parseBridgeBlacklistFile(state.NO_DISTRIBUTION_FILE)
198 inserted = 0
199 logging.info("Trying to insert %d bridges into hashring, %d of which "
200 "have the 'Running' flag..." % (len(bridges),
201 len(list(filter(lambda b: b.flags.running, bridges.values())))))
203 for fingerprint, bridge in bridges.items():
204 # Skip insertion of bridges which are geolocated to be in one of the
205 # NO_DISTRIBUTION_COUNTRIES, a.k.a. the countries we don't distribute
206 # bridges from:
207 if bridge.country in state.NO_DISTRIBUTION_COUNTRIES:
208 logging.warn("Not distributing Bridge %s %s:%s in country %s!" %
209 (bridge, bridge.address, bridge.orPort, bridge.country))
210 # Skip insertion of blacklisted bridges.
211 elif bridge in blacklist.keys():
212 logging.warn("Not distributing blacklisted Bridge %s %s:%s: %s" %
213 (bridge, bridge.address, bridge.orPort, blacklist[bridge]))
214 else:
215 # If the bridge is not running, then it is skipped during the
216 # insertion process.
217 hashring.insert(bridge)
218 inserted += 1
219 logging.info("Tried to insert %d bridges into hashring. Resulting "
220 "hashring is of length %d." % (inserted, len(hashring)))
222 if state.COLLECT_TIMESTAMPS:
223 reactor.callInThread(updateBridgeHistory, bridges, timestamps)
225 state.save()
227 def _reloadFn(*args):
228 """Placeholder callback function for :func:`_handleSIGHUP`."""
229 return True
231 def _handleSIGHUP(*args):
232 """Called when we receive a SIGHUP; invokes _reloadFn."""
233 reactor.callInThread(_reloadFn)
235 def replaceBridgeRings(current, replacement):
236 """Replace the current thing with the new one"""
237 current.hashring = replacement.hashring
239 def createBridgeRings(cfg, proxyList, key):
240 """Create the bridge distributors defined by the config file
242 :type cfg: :class:`Conf`
243 :param cfg: The current configuration, including any in-memory settings
244 (i.e. settings whose values were not obtained from the config file,
245 but were set via a function somewhere)
246 :type proxyList: :class:`~bridgedb.proxy.ProxySet`
247 :param proxyList: The container for the IP addresses of any currently
248 known open proxies.
249 :param bytes key: Hashring master key
250 :rtype: tuple
251 :returns: A :class:`~bridgedb.bridgerings.BridgeSplitter` hashring, an
252 :class:`~bridgedb.distributors.https.distributor.HTTPSDistributor` or None, and an
253 :class:`~bridgedb.distributors.email.distributor.EmailDistributor` or None, and an
254 :class:`~bridgedb.distributors.moat.distributor.MoatDistributor` or None.
256 # Create a BridgeSplitter to assign the bridges to the different
257 # distributors.
258 hashring = bridgerings.BridgeSplitter(crypto.getHMAC(key, "Hashring-Key"))
259 logging.debug("Created hashring: %r" % hashring)
261 # Create ring parameters.
262 ringParams = bridgerings.BridgeRingParameters(needPorts=cfg.FORCE_PORTS,
263 needFlags=cfg.FORCE_FLAGS)
265 emailDistributor = ipDistributor = moatDistributor = None
267 # As appropriate, create a Moat distributor.
268 if cfg.MOAT_DIST and cfg.MOAT_SHARE:
269 logging.debug("Setting up Moat Distributor...")
270 moatDistributor = MoatDistributor(
271 cfg.MOAT_N_IP_CLUSTERS,
272 crypto.getHMAC(key, "Moat-Dist-Key"),
273 proxyList,
274 answerParameters=ringParams)
275 hashring.addRing(moatDistributor.hashring, "moat", cfg.MOAT_SHARE)
277 # As appropriate, create an IP-based distributor.
278 if cfg.HTTPS_DIST and cfg.HTTPS_SHARE:
279 logging.debug("Setting up HTTPS Distributor...")
280 ipDistributor = HTTPSDistributor(
281 cfg.N_IP_CLUSTERS,
282 crypto.getHMAC(key, "HTTPS-IP-Dist-Key"),
283 proxyList,
284 answerParameters=ringParams)
285 hashring.addRing(ipDistributor.hashring, "https", cfg.HTTPS_SHARE)
287 # As appropriate, create an email-based distributor.
288 if cfg.EMAIL_DIST and cfg.EMAIL_SHARE:
289 logging.debug("Setting up Email Distributor...")
290 emailDistributor = EmailDistributor(
291 crypto.getHMAC(key, "Email-Dist-Key"),
292 cfg.EMAIL_DOMAIN_MAP.copy(),
293 cfg.EMAIL_DOMAIN_RULES.copy(),
294 answerParameters=ringParams,
295 whitelist=cfg.EMAIL_WHITELIST.copy())
296 hashring.addRing(emailDistributor.hashring, "email", cfg.EMAIL_SHARE)
298 # As appropriate, tell the hashring to leave some bridges unallocated.
299 if cfg.RESERVED_SHARE:
300 hashring.addRing(bridgerings.UnallocatedHolder(),
301 "unallocated",
302 cfg.RESERVED_SHARE)
304 return hashring, emailDistributor, ipDistributor, moatDistributor
306 def run(options, reactor=reactor):
307 """This is BridgeDB's main entry point and main runtime loop.
309 Given the parsed commandline options, this function handles locating the
310 configuration file, loading and parsing it, and then either (re)parsing
311 plus (re)starting the servers, or dumping bridge assignments to files.
313 :type options: :class:`bridgedb.parse.options.MainOptions`
314 :param options: A pre-parsed options class containing any arguments and
315 options given in the commandline we were called with.
316 :type state: :class:`bridgedb.persistent.State`
317 :ivar state: A persistent state object which holds config changes.
318 :param reactor: An implementer of
319 :api:`twisted.internet.interfaces.IReactorCore`. This parameter is
320 mainly for testing; the default
321 :api:`twisted.internet.epollreactor.EPollReactor` is fine for normal
322 application runs.
324 # Change to the directory where we're supposed to run. This must be done
325 # before parsing the config file, otherwise there will need to be two
326 # copies of the config file, one in the directory BridgeDB is started in,
327 # and another in the directory it changes into.
328 os.chdir(options['rundir'])
329 if options['verbosity'] <= 10: # Corresponds to logging.DEBUG
330 print("Changed to runtime directory %r" % os.getcwd())
332 config = loadConfig(options['config'])
333 config.RUN_IN_DIR = options['rundir']
335 # Set up logging as early as possible. We cannot import from the bridgedb
336 # package any of our modules which import :mod:`logging` and start using
337 # it, at least, not until :func:`safelog.configureLogging` is
338 # called. Otherwise a default handler that logs to the console will be
339 # created by the imported module, and all further calls to
340 # :func:`logging.basicConfig` will be ignored.
341 util.configureLogging(config)
343 if options.subCommand is not None:
344 runSubcommand(options, config)
346 # Write the pidfile only after any options.subCommands are run (because
347 # these exit when they are finished). Otherwise, if there is a subcommand,
348 # the real PIDFILE would get overwritten with the PID of the temporary
349 # bridgedb process running the subcommand.
350 if config.PIDFILE:
351 logging.debug("Writing server PID to file: '%s'" % config.PIDFILE)
352 with open(config.PIDFILE, 'w') as pidfile:
353 pidfile.write("%s\n" % os.getpid())
354 pidfile.flush()
356 # Let our pluggable transport class know what transports are resistant to
357 # active probing. We need to know because we shouldn't hand out a
358 # probing-vulnerable transport on a bridge that supports a
359 # probing-resistant transport. See
360 # <https://bugs.torproject.org/28655> for details.
361 from bridgedb.bridges import PluggableTransport
362 PluggableTransport.probing_resistant_transports = config.PROBING_RESISTANT_TRANSPORTS
364 from bridgedb import persistent
366 state = persistent.State(config=config)
368 from bridgedb.distributors.email.server import addServer as addSMTPServer
369 from bridgedb.distributors.https.server import addWebServer
370 from bridgedb.distributors.moat.server import addMoatServer
372 # Load the master key, or create a new one.
373 key = crypto.getKey(config.MASTER_KEY_FILE)
374 proxies = proxy.ProxySet()
375 emailDistributor = None
376 ipDistributor = None
377 moatDistributor = None
379 # Save our state
380 state.key = key
381 state.save()
383 def reload(inThread=True): # pragma: no cover
384 """Reload settings, proxy lists, and bridges.
386 State should be saved before calling this method, and will be saved
387 again at the end of it.
389 The internal variables ``cfg`` and ``hashring`` are taken from a
390 :class:`~bridgedb.persistent.State` instance, which has been saved to a
391 statefile with :meth:`bridgedb.persistent.State.save`.
393 :type cfg: :class:`Conf`
394 :ivar cfg: The current configuration, including any in-memory
395 settings (i.e. settings whose values were not obtained from the
396 config file, but were set via a function somewhere)
397 :type hashring: A :class:`~bridgedb.bridgerings.BridgeSplitter`
398 :ivar hashring: A class which takes an HMAC key and splits bridges
399 into their hashring assignments.
401 logging.debug("Caught SIGHUP")
402 logging.info("Reloading...")
404 logging.info("Loading saved state...")
405 state = persistent.load()
406 cfg = loadConfig(state.CONFIG_FILE, state.config)
407 logging.info("Updating any changed settings...")
408 state.useChangedSettings(cfg)
410 level = getattr(state, 'LOGLEVEL', 'WARNING')
411 logging.info("Updating log level to: '%s'" % level)
412 level = getattr(logging, level)
413 logging.getLogger().setLevel(level)
415 logging.info("Reloading the list of open proxies...")
416 for proxyfile in cfg.PROXY_LIST_FILES:
417 logging.info("Loading proxies from: %s" % proxyfile)
418 proxy.loadProxiesFromFile(proxyfile, proxies, removeStale=True)
419 metrics.setProxies(proxies)
421 logging.info("Reloading blacklisted request headers...")
422 antibot.loadBlacklistedRequestHeaders(config.BLACKLISTED_REQUEST_HEADERS_FILE)
423 logging.info("Reloading decoy bridges...")
424 antibot.loadDecoyBridges(config.DECOY_BRIDGES_FILE)
426 (hashring,
427 emailDistributorTmp,
428 ipDistributorTmp,
429 moatDistributorTmp) = createBridgeRings(cfg, proxies, key)
431 # Initialize our DB.
432 bridgedb.Storage.initializeDBLock()
433 bridgedb.Storage.setDBFilename(cfg.DB_FILE + ".sqlite")
434 logging.info("Reparsing bridge descriptors...")
435 load(state, hashring, clear=False)
436 logging.info("Bridges loaded: %d" % len(hashring))
438 if emailDistributorTmp is not None:
439 emailDistributorTmp.prepopulateRings() # create default rings
440 else:
441 logging.warn("No email distributor created!")
443 if ipDistributorTmp is not None:
444 ipDistributorTmp.prepopulateRings() # create default rings
445 else:
446 logging.warn("No HTTP(S) distributor created!")
448 if moatDistributorTmp is not None:
449 moatDistributorTmp.prepopulateRings()
450 else:
451 logging.warn("No Moat distributor created!")
453 # Dump bridge pool assignments to disk.
454 writeAssignments(hashring, state.ASSIGNMENTS_FILE)
455 state.save()
457 if inThread:
458 # XXX shutdown the distributors if they were previously running
459 # and should now be disabled
460 if moatDistributorTmp:
461 reactor.callFromThread(replaceBridgeRings,
462 moatDistributor, moatDistributorTmp)
463 if ipDistributorTmp:
464 reactor.callFromThread(replaceBridgeRings,
465 ipDistributor, ipDistributorTmp)
466 if emailDistributorTmp:
467 reactor.callFromThread(replaceBridgeRings,
468 emailDistributor, emailDistributorTmp)
469 else:
470 # We're still starting up. Return these distributors so
471 # they are configured in the outer-namespace
472 return emailDistributorTmp, ipDistributorTmp, moatDistributorTmp
474 global _reloadFn
475 _reloadFn = reload
476 signal.signal(signal.SIGHUP, _handleSIGHUP)
478 if reactor: # pragma: no cover
479 # And actually load it to start parsing. Get back our distributors.
480 emailDistributor, ipDistributor, moatDistributor = reload(False)
482 # Configure all servers:
483 if config.MOAT_DIST and config.MOAT_SHARE:
484 addMoatServer(config, moatDistributor)
485 if config.HTTPS_DIST and config.HTTPS_SHARE:
486 addWebServer(config, ipDistributor)
487 if config.EMAIL_DIST and config.EMAIL_SHARE:
488 addSMTPServer(config, emailDistributor)
490 metrics.setSupportedTransports(config.SUPPORTED_TRANSPORTS)
492 tasks = {}
494 # Setup all our repeating tasks:
495 if config.TASKS['GET_TOR_EXIT_LIST']:
496 tasks['GET_TOR_EXIT_LIST'] = task.LoopingCall(
497 proxy.downloadTorExits,
498 proxies,
499 config.SERVER_PUBLIC_EXTERNAL_IP)
501 if config.TASKS.get('DELETE_UNPARSEABLE_DESCRIPTORS'):
502 delUnparseableSecs = config.TASKS['DELETE_UNPARSEABLE_DESCRIPTORS']
503 else:
504 delUnparseableSecs = 24 * 60 * 60 # Default to 24 hours
506 # We use the directory name of STATUS_FILE, since that directory
507 # is where the *.unparseable descriptor files will be written to.
508 tasks['DELETE_UNPARSEABLE_DESCRIPTORS'] = task.LoopingCall(
509 runner.cleanupUnparseableDescriptors,
510 os.path.dirname(config.STATUS_FILE), delUnparseableSecs)
512 measurementInterval, _ = config.TASKS['EXPORT_METRICS']
513 tasks['EXPORT_METRICS'] = task.LoopingCall(
514 writeMetrics, state.METRICS_FILE, measurementInterval)
516 # Schedule all configured repeating tasks:
517 for name, value in config.TASKS.items():
518 seconds, startNow = value
519 if seconds:
520 try:
521 # Set now to False to get the servers up and running when
522 # first started, rather than spend a bunch of time in
523 # scheduled tasks.
524 tasks[name].start(abs(seconds), now=startNow)
525 except KeyError:
526 logging.info("Task %s is disabled and will not run." % name)
527 else:
528 logging.info("Scheduled task %s to run every %s seconds."
529 % (name, seconds))
531 # Actually run the servers.
532 try:
533 if reactor and not reactor.running:
534 logging.info("Starting reactors.")
535 reactor.run()
536 except KeyboardInterrupt: # pragma: no cover
537 logging.fatal("Received keyboard interrupt. Shutting down...")
538 finally:
539 if config.PIDFILE:
540 os.unlink(config.PIDFILE)
541 logging.info("Exiting...")
542 sys.exit()
544 def runSubcommand(options, config):
545 """Run a subcommand from the 'Commands' section of the bridgedb help menu.
547 :type options: :class:`bridgedb.opt.MainOptions`
548 :param options: A pre-parsed options class containing any arguments and
549 options given in the commandline we were called with.
550 :type config: :class:`bridgedb.main.Conf`
551 :param config: The current configuration.
552 :raises: :exc:`SystemExit` when all subCommands and subOptions have
553 finished running.
555 # Make sure that the runner module is only imported after logging is set
556 # up, otherwise we run into the same logging configuration problem as
557 # mentioned above with the email.server and https.server.
558 from bridgedb import runner
560 if options.subCommand is not None:
561 logging.debug("Running BridgeDB command: '%s'" % options.subCommand)
563 if 'descriptors' in options.subOptions:
564 runner.generateDescriptors(int(options.subOptions['descriptors']), config.RUN_IN_DIR)
565 sys.exit(0)