Make getCaptchaImage return (bytes, str).
[tor-bridgedb.git] / bridgedb / util.py
blob9572ad1c6a0740786b2f5771fabad2de8bef9d28
1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_util -*-
3 # This file is part of BridgeDB, a Tor bridge distribution system.
5 # :authors: Isis Lovecruft 0xA3ADB67A2CDB8B35 <isis@torproject.org>
6 # Matthew Finkel 0x017DD169EA793BE2 <sysrqb@torproject.org>
7 # :copyright: (c) 2013-2017, Isis Lovecruft
8 # (c) 2013-2017, Matthew Finkel
9 # (c) 2007-2017, The Tor Project, Inc.
10 # :license: 3-Clause BSD, see LICENSE for licensing information
12 """Common utilities for BridgeDB."""
14 from functools import partial
16 import abc
17 import logging
18 import logging.config
19 import logging.handlers
20 import os
21 import re
22 import time
24 from twisted.python import components
27 def _getLogHandlers(logToFile=True, logToStderr=True):
28 """Get the appropriate list of log handlers.
30 :param bool logToFile: If ``True``, add a logfile handler.
31 :param bool logToStderr: If ``True``, add a stream handler to stderr.
32 :rtype: list
33 :returns: A list containing the appropriate log handler names from the
34 :class:`logging.config.dictConfigClass`.
35 """
36 logHandlers = []
37 if logToFile:
38 logHandlers.append('rotating')
39 if logToStderr:
40 logHandlers.append('console')
41 return logHandlers
43 def _getRotatingFileHandler(filename, mode='a', maxBytes=1000000, backupCount=0,
44 encoding='utf-8', uid=None, gid=None):
45 """Get a :class:`logging.RotatingFileHandler` with a logfile which is
46 readable+writable only by the given **uid** and **gid**.
48 :param str filename: The full path to the log file.
49 :param str mode: The mode to open **filename** with. (default: ``'a'``)
50 :param int maxBytes: Rotate logfiles after they have grown to this size in
51 bytes.
52 :param int backupCount: The number of logfiles to keep in rotation.
53 :param str encoding: The encoding for the logfile.
54 :param int uid: The owner UID to set on the logfile.
55 :param int gid: The GID to set on the logfile.
56 :rtype: :class:`logging.handlers.RotatingFileHandler`
57 :returns: A logfile handler which will rotate files and chown/chmod newly
58 created files.
59 """
60 # Default to the current process owner's uid and gid:
61 uid = os.getuid() if not uid else uid
62 gid = os.getgid() if not gid else gid
64 if not os.path.exists(filename):
65 open(filename, 'a').close()
66 os.chown(filename, uid, gid)
67 try:
68 os.chmod(filename, os.ST_WRITE | os.ST_APPEND)
69 except AttributeError: # pragma: no cover
70 logging.error("""
71 XXX FIXME: Travis chokes on `os.ST_WRITE` saying that the module doesn't
72 have that attribute, for some reason:
73 https://travis-ci.org/isislovecruft/bridgedb/builds/24145963#L1601""")
74 os.chmod(filename, 384)
76 fileHandler = partial(logging.handlers.RotatingFileHandler,
77 filename,
78 mode,
79 maxBytes=maxBytes,
80 backupCount=backupCount,
81 encoding=encoding)
82 return fileHandler
84 def configureLogging(cfg):
85 """Set up Python's logging subsystem based on the configuration.
87 :type cfg: :class:`~bridgedb.persistent.Conf`
88 :param cfg: The current configuration, including any in-memory settings.
89 """
90 from bridgedb import safelog
92 # Turn on safe logging by default:
93 safelogging = getattr(cfg, 'SAFELOGGING', True)
94 safelog.setSafeLogging(safelogging)
96 level = getattr(cfg, 'LOGLEVEL', 'WARNING')
97 logLevel = getattr(logging, level, 0)
98 logStderr = getattr(cfg, 'LOG_TO_STDERR', False)
99 logfileName = getattr(cfg, 'LOGFILE', "bridgedb.log")
100 logfileCount = getattr(cfg, 'LOGFILE_COUNT', 3) - 1
101 logfileRotateSize = getattr(cfg, 'LOGFILE_ROTATE_SIZE', 10000000)
102 logThreads = getattr(cfg, 'LOG_THREADS', False)
103 logTrace = getattr(cfg, 'LOG_TRACE', False)
104 logTimeFormat = getattr(cfg, 'LOG_TIME_FORMAT', "%H:%M:%S")
106 logFilters = []
107 if safelogging:
108 logFilters = ['safelogEmail', 'safelogIPv4', 'safelogIPv6']
110 logConfig = {
111 'version': 1,
112 'filters': {
113 'safelogEmail': {'()': safelog.SafelogEmailFilter},
114 'safelogIPv4': {'()': safelog.SafelogIPv4Filter},
115 'safelogIPv6': {'()': safelog.SafelogIPv6Filter},
117 'formatters': {
118 'default': {'()': JustifiedLogFormatter,
119 # These values below are kwargs passed to
120 # :class:`JustifiedFormatter`:
121 'logThreads': logThreads,
122 'logTrace': logTrace,
123 'datefmt': logTimeFormat},
125 'handlers': {
126 'console': {'class': 'logging.StreamHandler',
127 'level': logLevel,
128 'formatter': 'default',
129 'filters': logFilters},
130 'rotating': {'()': _getRotatingFileHandler(logfileName, 'a',
131 logfileRotateSize,
132 logfileCount),
133 'level': logLevel,
134 'formatter': 'default',
135 'filters': logFilters},
137 'root': {
138 'handlers': _getLogHandlers(logfileName, logStderr),
139 'level': logLevel,
143 logging.config.dictConfig(logConfig)
145 logging.info("Logger Started.")
146 logging.info("Level: %s", logLevel)
147 logging.info("Safe Logging: %sabled" % ("En" if safelogging else "Dis"))
149 def deleteFilesOlderThan(files, seconds):
150 """Delete any file in ``files`` with an mtime more than ``seconds`` ago.
152 :param list files: A list of paths to files which should be
153 considered for deletion.
154 :param int seconds: If a file's mtime is more than this number (in
155 seconds), it will be deleted.
156 :rtype: list
157 :returns: A list of the deleted files.
159 deleted = []
160 now = int(time.time())
162 for fn in files:
163 if (now - os.stat(fn).st_mtime) > seconds:
164 os.unlink(fn)
165 deleted.append(fn)
167 return deleted
169 def levenshteinDistance(s1, s2, len1=None, len2=None,
170 offset1=0, offset2=0, memo=None):
171 """Compute the Levenstein Distance between two strings.
173 The `Levenshtein String Distance Algorithm
174 <https://en.wikipedia.org/wiki/Levenshtein_distance>` efficiently computes
175 the number of characters which must be changed in **s1** to make it
176 identical to **s2**.
178 >>> from bridgedb.util import levenshteinDistance
179 >>> levenshteinDistance('cat', 'cat')
181 >>> levenshteinDistance('cat', 'hat')
183 >>> levenshteinDistance('arma', 'armadillo')
186 :param str s1: The string which should be changed.
187 :param str s2: The string which **stringOne** should be compared to.
189 len1 = len(s1) if len1 is None else len1
190 len2 = len(s2) if len2 is None else len2
191 memo = {} if memo is None else memo
193 key = ','.join([str(offset1), str(len1), str(offset2), str(len2)])
194 if memo.get(key) is not None: return memo[key]
196 if len1 == 0: return len2
197 elif len2 == 0: return len1
199 cost = 0 if (s1[offset1] == s2[offset2]) else 1
200 distance = min(
201 levenshteinDistance(s1, s2, len1-1, len2, offset1+1, offset2, memo) + 1,
202 levenshteinDistance(s1, s2, len1, len2-1, offset1, offset2+1, memo) + 1,
203 levenshteinDistance(s1, s2, len1-1, len2-1, offset1+1, offset2+1, memo) + cost,
205 memo[key] = distance
206 return distance
208 def isascii(s):
209 """Return True if there are no non-ASCII characters in s, False otherwise.
211 Note that this function differs from the str.is* methods in that
212 it returns True for the empty string, rather than False.
214 >>> from bridgedb.util import isascii
215 >>> isascii('\x80')
216 False
217 >>> isascii('foo\tbar\rbaz\n')
218 True
219 >>> isascii('foo bar')
220 True
222 :param str s: The string to check for non-ASCII characters.
224 return all(map((lambda ch: ord(ch) < 128), s))
226 def isascii_noncontrol(s):
227 """Return True if there are no non-ASCII or control characters in
228 s, False otherwise.
230 Note that this function differs from the str.is* methods in that
231 it returns True for the empty string, rather than False.
233 >>> from bridgedb.util import isascii_noncontrol
234 >>> isascii_noncontrol('\x80')
235 False
236 >>> isascii_noncontrol('foo\tbar\rbaz\n')
237 False
238 >>> isascii_noncontrol('foo bar')
239 True
241 :param str s: The string to check for non-ASCII or control characters.
243 return all(map((lambda ch: 32 <= ord(ch) < 127), s))
245 def replaceControlChars(text, replacement=None, encoding="utf-8"):
246 """Remove ASCII control characters [0-31, 92, 127].
248 >>> from bridgedb.util import replaceControlChars
249 >>> replaceControlChars('foo\n bar\\ baz\r \t\0quux\n')
250 'foo bar baz quux'
251 >>> replaceControlChars("\bI wonder if I'm outside the quotes now")
252 "I wonder if I'm outside the quotes now"
254 :param str text: Some text to remove ASCII control characters from.
255 :param int replacement: If given, the **replacement** should be an integer
256 representing the decimal representation of the byte to replace
257 occurences of ASCII control characters with. For example, if they
258 should be replaced with the character ``'a'``, then ``97`` should be
259 used as the **replacement**, because ``ord('a') == 97``.
260 :param str encoding: The encoding of the **text**.
261 :rtype: str
262 :returns: The sanitized **text**.
265 if replacement is None:
266 replacement = ''
268 # the following replaces characters 0-31, 92, and 127
270 text = text.decode(encoding) if isinstance(text, bytes) else text
271 return re.sub(r'[\x00-\x1f\x5c\x7f]', '', text)
274 def registerAdapter(adapter, adapted, interface):
275 """Register a Zope interface adapter for global use.
277 See :api:`twisted.python.components.registerAdapter` and the Twisted
278 Matrix Labs `howto documentation for components`_.
280 .. howto documentation for components:
281 https://twistedmatrix.com/documents/current/core/howto/components.html
283 try:
284 components.registerAdapter(adapter, adapted, interface)
285 except ValueError: # An adapter class was already registered
286 pass
289 class JustifiedLogFormatter(logging.Formatter):
290 """A logging formatter which pretty prints thread and calling function
291 information, in addition to the normal timestamp, log level, and log
292 message.
294 :ivar int width: The width of the column for the calling function
295 information, if the latter is to be included.
297 width = 30
299 def __init__(self, logThreads=False, logTrace=False,
300 datefmt="%H:%M:%s"):
301 """If **logTrace** is ``True``, the line number, module name, and
302 function name where the logger was called will be included in the
303 message, and the width of this information will always equal ``width``.
305 :param bool logThreads: If ``True``, include the current thread name
306 and ID in formatted log messages.
307 :param bool logTrace: If ``True``, include information on the calling
308 function in formatted log messages.
310 self.logThreads = logThreads
311 self.logTrace = logTrace
313 _fmt = ["%(asctime)s %(levelname)-7.7s"]
314 if self.logThreads:
315 _fmt.append("[%(threadName)s id:%(thread)d]")
316 _fmt.append("%(callingFunc)s")
317 _fmt.append("%(message)s")
319 super(JustifiedLogFormatter, self).__init__(fmt = " ".join(_fmt), datefmt=datefmt)
321 def _formatCallingFuncName(self, record):
322 """Format the combined module name and function name of the place where
323 the log message/record was recorded, so that the formatted string is
324 left-justified and not longer than the :cvar:`width`.
326 :type record: :class:`logging.LogRecord`
327 :param record: A record of an event created by calling a logger.
328 :returns: The :class:`logging.LogRecord` with its ``message``
329 attribute rewritten to contain the module and function name,
330 truncated to ``width``, or padded on the right with spaces as is
331 necessary.
333 callingFunc = ""
334 if self.logTrace:
335 # The '.' character between the module name and function name
336 # would otherwise be interpreted as a format string specifier, so
337 # we must specify ``chr(46)``:
338 lineno = "L%s:" % record.lineno
339 caller = "%s%-s%s" % (lineno.rjust(6), record.module, chr(46))
340 maxFuncNameWidth = self.width - 2 - len(caller)
341 funcName = record.funcName
342 if len(funcName) > maxFuncNameWidth:
343 funcName = record.funcName[:maxFuncNameWidth]
344 caller += "%s()" % (funcName)
345 callingFunc = caller.ljust(self.width)
347 record.callingFunc = callingFunc
348 return record
350 def format(self, record):
351 """Reformat this log **record** to neatly print thread and function
352 traces, if configured to do so.
354 :type record: :class:`logging.LogRecord`
355 :param record: A record of an event created by calling a logger.
357 record = self._formatCallingFuncName(record)
358 return super(JustifiedLogFormatter, self).format(record)
361 class mixin(metaclass=abc.ABCMeta):
362 """Subclasses of me can be used as a mixin class by registering another
363 class, ``ClassA``, which should be mixed with the ``mixin`` subclass, in
364 order to provide simple, less error-prone, multiple inheritance models::
366 >>> from __future__ import print_function
367 >>> from bridgedb.util import mixin
369 >>> class ClassA(object):
370 ... def sayWhich(self):
371 ... print("ClassA.sayWhich() called.")
372 ... def doSuperThing(self):
373 ... print("%s" % super(ClassA, self))
374 ... def doThing(self):
375 ... print("ClassA is doing a thing.")
377 >>> class ClassB(ClassA):
378 ... def sayWhich(self):
379 ... print("ClassB.sayWhich() called.")
380 ... def doSuperThing(self):
381 ... print("%s" % super(ClassB, self))
382 ... def doOtherThing(self):
383 ... print("ClassB is doing something else.")
385 >>> class ClassM(mixin):
386 ... def sayWhich(self):
387 ... print("ClassM.sayWhich() called.")
389 >>> ClassM.register(ClassA)
391 >>> class ClassC(ClassM, ClassB):
392 ... def sayWhich(self):
393 ... super(ClassC, self).sayWhich()
395 >>> c = ClassC()
396 >>> c.sayWhich()
397 ClassM.sayWhich() called.
398 >>> c.doSuperThing()
399 <super: <class 'ClassB'>, <ClassC object>>
400 >>> c.doThing()
401 ClassA is doing a thing.
402 >>> c.doOtherThing()
403 ClassB is doing something else.
405 .. info:: This class' name is lowercased because pylint is hardcoded to
406 expect mixin classes to end in ``'mixin'``.