1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_util -*-
3 # This file is part of BridgeDB, a Tor bridge distribution system.
5 # :authors: Isis Lovecruft 0xA3ADB67A2CDB8B35 <isis@torproject.org>
6 # Matthew Finkel 0x017DD169EA793BE2 <sysrqb@torproject.org>
7 # :copyright: (c) 2013-2017, Isis Lovecruft
8 # (c) 2013-2017, Matthew Finkel
9 # (c) 2007-2017, The Tor Project, Inc.
10 # :license: 3-Clause BSD, see LICENSE for licensing information
12 """Common utilities for BridgeDB."""
14 from functools
import partial
19 import logging
.handlers
24 from twisted
.python
import components
27 def _getLogHandlers(logToFile
=True, logToStderr
=True):
28 """Get the appropriate list of log handlers.
30 :param bool logToFile: If ``True``, add a logfile handler.
31 :param bool logToStderr: If ``True``, add a stream handler to stderr.
33 :returns: A list containing the appropriate log handler names from the
34 :class:`logging.config.dictConfigClass`.
38 logHandlers
.append('rotating')
40 logHandlers
.append('console')
43 def _getRotatingFileHandler(filename
, mode
='a', maxBytes
=1000000, backupCount
=0,
44 encoding
='utf-8', uid
=None, gid
=None):
45 """Get a :class:`logging.RotatingFileHandler` with a logfile which is
46 readable+writable only by the given **uid** and **gid**.
48 :param str filename: The full path to the log file.
49 :param str mode: The mode to open **filename** with. (default: ``'a'``)
50 :param int maxBytes: Rotate logfiles after they have grown to this size in
52 :param int backupCount: The number of logfiles to keep in rotation.
53 :param str encoding: The encoding for the logfile.
54 :param int uid: The owner UID to set on the logfile.
55 :param int gid: The GID to set on the logfile.
56 :rtype: :class:`logging.handlers.RotatingFileHandler`
57 :returns: A logfile handler which will rotate files and chown/chmod newly
60 # Default to the current process owner's uid and gid:
61 uid
= os
.getuid() if not uid
else uid
62 gid
= os
.getgid() if not gid
else gid
64 if not os
.path
.exists(filename
):
65 open(filename
, 'a').close()
66 os
.chown(filename
, uid
, gid
)
68 os
.chmod(filename
, os
.ST_WRITE | os
.ST_APPEND
)
69 except AttributeError: # pragma: no cover
71 XXX FIXME: Travis chokes on `os.ST_WRITE` saying that the module doesn't
72 have that attribute, for some reason:
73 https://travis-ci.org/isislovecruft/bridgedb/builds/24145963#L1601""")
74 os
.chmod(filename
, 384)
76 fileHandler
= partial(logging
.handlers
.RotatingFileHandler
,
80 backupCount
=backupCount
,
84 def configureLogging(cfg
):
85 """Set up Python's logging subsystem based on the configuration.
87 :type cfg: :class:`~bridgedb.persistent.Conf`
88 :param cfg: The current configuration, including any in-memory settings.
90 from bridgedb
import safelog
92 # Turn on safe logging by default:
93 safelogging
= getattr(cfg
, 'SAFELOGGING', True)
94 safelog
.setSafeLogging(safelogging
)
96 level
= getattr(cfg
, 'LOGLEVEL', 'WARNING')
97 logLevel
= getattr(logging
, level
, 0)
98 logStderr
= getattr(cfg
, 'LOG_TO_STDERR', False)
99 logfileName
= getattr(cfg
, 'LOGFILE', "bridgedb.log")
100 logfileCount
= getattr(cfg
, 'LOGFILE_COUNT', 3) - 1
101 logfileRotateSize
= getattr(cfg
, 'LOGFILE_ROTATE_SIZE', 10000000)
102 logThreads
= getattr(cfg
, 'LOG_THREADS', False)
103 logTrace
= getattr(cfg
, 'LOG_TRACE', False)
104 logTimeFormat
= getattr(cfg
, 'LOG_TIME_FORMAT', "%H:%M:%S")
108 logFilters
= ['safelogEmail', 'safelogIPv4', 'safelogIPv6']
113 'safelogEmail': {'()': safelog
.SafelogEmailFilter
},
114 'safelogIPv4': {'()': safelog
.SafelogIPv4Filter
},
115 'safelogIPv6': {'()': safelog
.SafelogIPv6Filter
},
118 'default': {'()': JustifiedLogFormatter
,
119 # These values below are kwargs passed to
120 # :class:`JustifiedFormatter`:
121 'logThreads': logThreads
,
122 'logTrace': logTrace
,
123 'datefmt': logTimeFormat
},
126 'console': {'class': 'logging.StreamHandler',
128 'formatter': 'default',
129 'filters': logFilters
},
130 'rotating': {'()': _getRotatingFileHandler(logfileName
, 'a',
134 'formatter': 'default',
135 'filters': logFilters
},
138 'handlers': _getLogHandlers(logfileName
, logStderr
),
143 logging
.config
.dictConfig(logConfig
)
145 logging
.info("Logger Started.")
146 logging
.info("Level: %s", logLevel
)
147 logging
.info("Safe Logging: %sabled" % ("En" if safelogging
else "Dis"))
149 def deleteFilesOlderThan(files
, seconds
):
150 """Delete any file in ``files`` with an mtime more than ``seconds`` ago.
152 :param list files: A list of paths to files which should be
153 considered for deletion.
154 :param int seconds: If a file's mtime is more than this number (in
155 seconds), it will be deleted.
157 :returns: A list of the deleted files.
160 now
= int(time
.time())
163 if (now
- os
.stat(fn
).st_mtime
) > seconds
:
169 def levenshteinDistance(s1
, s2
, len1
=None, len2
=None,
170 offset1
=0, offset2
=0, memo
=None):
171 """Compute the Levenstein Distance between two strings.
173 The `Levenshtein String Distance Algorithm
174 <https://en.wikipedia.org/wiki/Levenshtein_distance>` efficiently computes
175 the number of characters which must be changed in **s1** to make it
178 >>> from bridgedb.util import levenshteinDistance
179 >>> levenshteinDistance('cat', 'cat')
181 >>> levenshteinDistance('cat', 'hat')
183 >>> levenshteinDistance('arma', 'armadillo')
186 :param str s1: The string which should be changed.
187 :param str s2: The string which **stringOne** should be compared to.
189 len1
= len(s1
) if len1
is None else len1
190 len2
= len(s2
) if len2
is None else len2
191 memo
= {} if memo
is None else memo
193 key
= ','.join([str(offset1
), str(len1
), str(offset2
), str(len2
)])
194 if memo
.get(key
) is not None: return memo
[key
]
196 if len1
== 0: return len2
197 elif len2
== 0: return len1
199 cost
= 0 if (s1
[offset1
] == s2
[offset2
]) else 1
201 levenshteinDistance(s1
, s2
, len1
-1, len2
, offset1
+1, offset2
, memo
) + 1,
202 levenshteinDistance(s1
, s2
, len1
, len2
-1, offset1
, offset2
+1, memo
) + 1,
203 levenshteinDistance(s1
, s2
, len1
-1, len2
-1, offset1
+1, offset2
+1, memo
) + cost
,
209 """Return True if there are no non-ASCII characters in s, False otherwise.
211 Note that this function differs from the str.is* methods in that
212 it returns True for the empty string, rather than False.
214 >>> from bridgedb.util import isascii
217 >>> isascii('foo\tbar\rbaz\n')
219 >>> isascii('foo bar')
222 :param str s: The string to check for non-ASCII characters.
224 return all(map((lambda ch
: ord(ch
) < 128), s
))
226 def isascii_noncontrol(s
):
227 """Return True if there are no non-ASCII or control characters in
230 Note that this function differs from the str.is* methods in that
231 it returns True for the empty string, rather than False.
233 >>> from bridgedb.util import isascii_noncontrol
234 >>> isascii_noncontrol('\x80')
236 >>> isascii_noncontrol('foo\tbar\rbaz\n')
238 >>> isascii_noncontrol('foo bar')
241 :param str s: The string to check for non-ASCII or control characters.
243 return all(map((lambda ch
: 32 <= ord(ch
) < 127), s
))
245 def replaceControlChars(text
, replacement
=None, encoding
="utf-8"):
246 """Remove ASCII control characters [0-31, 92, 127].
248 >>> from bridgedb.util import replaceControlChars
249 >>> replaceControlChars('foo\n bar\\ baz\r \t\0quux\n')
251 >>> replaceControlChars("\bI wonder if I'm outside the quotes now")
252 "I wonder if I'm outside the quotes now"
254 :param str text: Some text to remove ASCII control characters from.
255 :param int replacement: If given, the **replacement** should be an integer
256 representing the decimal representation of the byte to replace
257 occurences of ASCII control characters with. For example, if they
258 should be replaced with the character ``'a'``, then ``97`` should be
259 used as the **replacement**, because ``ord('a') == 97``.
260 :param str encoding: The encoding of the **text**.
262 :returns: The sanitized **text**.
265 if replacement
is None:
268 # the following replaces characters 0-31, 92, and 127
270 text
= text
.decode(encoding
) if isinstance(text
, bytes
) else text
271 return re
.sub(r
'[\x00-\x1f\x5c\x7f]', '', text
)
274 def registerAdapter(adapter
, adapted
, interface
):
275 """Register a Zope interface adapter for global use.
277 See :api:`twisted.python.components.registerAdapter` and the Twisted
278 Matrix Labs `howto documentation for components`_.
280 .. howto documentation for components:
281 https://twistedmatrix.com/documents/current/core/howto/components.html
284 components
.registerAdapter(adapter
, adapted
, interface
)
285 except ValueError: # An adapter class was already registered
289 class JustifiedLogFormatter(logging
.Formatter
):
290 """A logging formatter which pretty prints thread and calling function
291 information, in addition to the normal timestamp, log level, and log
294 :ivar int width: The width of the column for the calling function
295 information, if the latter is to be included.
299 def __init__(self
, logThreads
=False, logTrace
=False,
301 """If **logTrace** is ``True``, the line number, module name, and
302 function name where the logger was called will be included in the
303 message, and the width of this information will always equal ``width``.
305 :param bool logThreads: If ``True``, include the current thread name
306 and ID in formatted log messages.
307 :param bool logTrace: If ``True``, include information on the calling
308 function in formatted log messages.
310 self
.logThreads
= logThreads
311 self
.logTrace
= logTrace
313 _fmt
= ["%(asctime)s %(levelname)-7.7s"]
315 _fmt
.append("[%(threadName)s id:%(thread)d]")
316 _fmt
.append("%(callingFunc)s")
317 _fmt
.append("%(message)s")
319 super(JustifiedLogFormatter
, self
).__init
__(fmt
= " ".join(_fmt
), datefmt
=datefmt
)
321 def _formatCallingFuncName(self
, record
):
322 """Format the combined module name and function name of the place where
323 the log message/record was recorded, so that the formatted string is
324 left-justified and not longer than the :cvar:`width`.
326 :type record: :class:`logging.LogRecord`
327 :param record: A record of an event created by calling a logger.
328 :returns: The :class:`logging.LogRecord` with its ``message``
329 attribute rewritten to contain the module and function name,
330 truncated to ``width``, or padded on the right with spaces as is
335 # The '.' character between the module name and function name
336 # would otherwise be interpreted as a format string specifier, so
337 # we must specify ``chr(46)``:
338 lineno
= "L%s:" % record
.lineno
339 caller
= "%s%-s%s" % (lineno
.rjust(6), record
.module
, chr(46))
340 maxFuncNameWidth
= self
.width
- 2 - len(caller
)
341 funcName
= record
.funcName
342 if len(funcName
) > maxFuncNameWidth
:
343 funcName
= record
.funcName
[:maxFuncNameWidth
]
344 caller
+= "%s()" % (funcName
)
345 callingFunc
= caller
.ljust(self
.width
)
347 record
.callingFunc
= callingFunc
350 def format(self
, record
):
351 """Reformat this log **record** to neatly print thread and function
352 traces, if configured to do so.
354 :type record: :class:`logging.LogRecord`
355 :param record: A record of an event created by calling a logger.
357 record
= self
._formatCallingFuncName
(record
)
358 return super(JustifiedLogFormatter
, self
).format(record
)
361 class mixin(metaclass
=abc
.ABCMeta
):
362 """Subclasses of me can be used as a mixin class by registering another
363 class, ``ClassA``, which should be mixed with the ``mixin`` subclass, in
364 order to provide simple, less error-prone, multiple inheritance models::
366 >>> from __future__ import print_function
367 >>> from bridgedb.util import mixin
369 >>> class ClassA(object):
370 ... def sayWhich(self):
371 ... print("ClassA.sayWhich() called.")
372 ... def doSuperThing(self):
373 ... print("%s" % super(ClassA, self))
374 ... def doThing(self):
375 ... print("ClassA is doing a thing.")
377 >>> class ClassB(ClassA):
378 ... def sayWhich(self):
379 ... print("ClassB.sayWhich() called.")
380 ... def doSuperThing(self):
381 ... print("%s" % super(ClassB, self))
382 ... def doOtherThing(self):
383 ... print("ClassB is doing something else.")
385 >>> class ClassM(mixin):
386 ... def sayWhich(self):
387 ... print("ClassM.sayWhich() called.")
389 >>> ClassM.register(ClassA)
391 >>> class ClassC(ClassM, ClassB):
392 ... def sayWhich(self):
393 ... super(ClassC, self).sayWhich()
397 ClassM.sayWhich() called.
399 <super: <class 'ClassB'>, <ClassC object>>
401 ClassA is doing a thing.
403 ClassB is doing something else.
405 .. info:: This class' name is lowercased because pylint is hardcoded to
406 expect mixin classes to end in ``'mixin'``.