Make getCaptchaImage return (bytes, str).
[tor-bridgedb.git] / bridgedb / safelog.py
blobdb8e296d441d8071b4167361d5127b07424198c1
1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_safelog -*-
3 # This file is part of BridgeDB, a Tor bridge distribution system.
5 # :authors: Isis Lovecruft 0xA3ADB67A2CDB8B35 <isis@torproject.org>
6 # :copyright: (c) 2013-2017, Isis Lovecruft
7 # (c) 2007-2017, The Tor Project, Inc.
8 # :license: 3-Clause BSD, see LICENSE for licensing information
10 """Filters for log sanitisation.
12 .. inheritance-diagram:: BaseSafelogFilter SafelogEmailFilter SafelogIPv4Filter SafelogIPv6Filter
13 :parts: 1
15 The ``Safelog*Filter`` classes within this module can be instantiated and
16 adding to any :class:`logging.Handler`, in order to transparently filter
17 substrings within log messages which match the given ``pattern``. Matching
18 substrings may be optionally additionally validated by implementing the
19 :meth:`~BaseSafelogFilter.doubleCheck` method before they are finally replaced
20 with the ``replacement`` string. For example::
22 >>> import io
23 >>> import logging
24 >>> from bridgedb import safelog
25 >>> handler = logging.StreamHandler(io.BytesIO())
26 >>> logger = logging.getLogger()
27 >>> logger.addHandler(handler)
28 >>> logger.addFilter(safelog.SafelogEmailFilter())
29 >>> logger.info("Sent response email to: blackhole@torproject.org")
33 **Module Overview:**
37 bridgedb.safelog
39 |_ setSafeLogging - Enable or disable safelogging globally.
40 |_ logSafely - Utility for manually sanitising a portion of a log message
42 \_ BaseSafelogFilter - Base class for log message sanitisation filters
43 | |_ doubleCheck - Optional stricter validation on matching substrings
44 | \_ filter - Determine if some part of a log message should be filtered
46 |_ SafelogEmailFilter - Filter for removing email addresses from logs
47 |_ SafelogIPv4Filter - Filter for removing IPv4 addresses from logs
48 |_ SafelogIPv6Filter - Filter for removing IPv6 addresses from logs
51 """
53 import functools
54 import logging
55 import re
57 from bridgedb.parse import addr
60 safe_logging = True
63 def setSafeLogging(safe):
64 """Enable or disable automatic filtering of log messages.
66 :param bool safe: If ``True``, filter email and IP addresses from log
67 messages automagically.
68 """
69 global safe_logging
70 safe_logging = safe
72 def logSafely(string):
73 """Utility for manually sanitising a portion of a log message.
75 :param str string: If ``SAFELOGGING`` is enabled, sanitise this **string**
76 by replacing it with ``"[scrubbed]"``. Otherwise, return the
77 **string** unchanged.
78 :rtype: str
79 :returns: ``"[scrubbed]"`` or the original string.
80 """
81 if safe_logging:
82 return "[scrubbed]"
83 return string
86 class BaseSafelogFilter(logging.Filter):
87 """Base class for creating log message sanitisation filters.
89 A :class:`BaseSafelogFilter` uses a compiled regex :attr:`pattern` to
90 match particular items of data in log messages which should be sanitised
91 (if ``SAFELOGGING`` is enabled in :file:`bridgedb.conf`).
93 .. note::
94 The :attr:`pattern` is used only for string *matching* purposes, and
95 *not* for validation. In other words, a :attr:`pattern` which matches
96 email addresses should simply match something which appears to be an
97 email address, even though that matching string might not technically
98 be a valid email address vis-รก-vis :rfc:`5321`.
100 In addition, a ``BaseSafelogFilter`` uses a :attr:`easyFind`, which is
101 simply a string or character to search for before running checking against
102 the regular expression, to attempt to avoid regexing *everything* which
103 passes through the logger.
105 :cvar pattern: A compiled regular expression, whose matches will be
106 scrubbed from log messages and replaced with :attr:`replacement`.
107 :vartype easyFind: str
108 :cvar easyFind: A simpler string to search for before to match by regex.
109 :vartype replacement: str
110 :cvar replacement: The string to replace ``pattern`` matches
111 with. (default: ``"[scrubbed]"``)
113 pattern = re.compile("FILTERME")
114 easyFind = "FILTERME"
115 replacement = "[scrubbed]"
117 def doubleCheck(self, match):
118 """Subclasses should override this function to implement any additional
119 substring filtering to decrease the false positive rate, i.e. any
120 additional filtering or validation which is *more* costly than
121 checking against the regular expression, :attr:`pattern`.
123 To use only the :attr:`pattern` matching in :meth:`filter`, and not
124 use this method, simply do::
126 return True
128 :param str match: Some portion of the :ivar:`logging.LogRecord.msg`
129 string which has already passed the checks in :meth:`filter`, for
130 which additional validation/checking is required.
131 :rtype: bool
132 :returns: ``True`` if the additional validation passes (in other
133 words, the **match** *should* be filtered), and ``None`` or
134 ``False`` otherwise.
136 return True
138 def filter(self, record):
139 """Filter a log record.
141 The log **record** is filtered, and thus sanitised by replacing
142 matching substrings with the :attr:`replacement` string, if the
143 following checks pass:
145 1. ``SAFELOGGING`` is currently enabled.
146 2. The ``record.msg`` string contains :attr:`easyFind`.
147 3. The ``record.msg`` matches the regular expression, :attr:`pattern`.
149 :type record: :class:`logging.LogRecord`
150 :param record: Basically, anything passed to :func:`logging.log`.
152 if safe_logging:
153 msg = str(record.msg)
154 if msg.find(self.easyFind) > 0:
155 matches = self.pattern.findall(msg)
156 for match in matches:
157 if self.doubleCheck(match):
158 msg = msg.replace(match, self.replacement)
159 record.msg = msg
160 return record
163 class SafelogEmailFilter(BaseSafelogFilter):
164 """A log filter which removes email addresses from log messages."""
166 pattern = re.compile(
167 "([a-zA-Z0-9]+[.+a-zA-Z0-9]*[@]{1}[a-zA-Z0-9]+[.-a-zA-Z0-9]*[.]{1}[a-zA-Z]+)")
168 easyFind = "@"
170 @functools.wraps(BaseSafelogFilter.filter)
171 def filter(self, record):
172 return BaseSafelogFilter.filter(self, record)
175 class SafelogIPv4Filter(BaseSafelogFilter):
176 """A log filter which removes IPv4 addresses from log messages."""
178 pattern = re.compile("(?:\d{1,3}\.?){4}")
179 easyFind = "."
181 def doubleCheck(self, match):
182 """Additional check to ensure that **match** is an IPv4 address."""
183 if addr.isIPv4(match):
184 return True
186 @functools.wraps(BaseSafelogFilter.filter)
187 def filter(self, record):
188 return BaseSafelogFilter.filter(self, record)
191 class SafelogIPv6Filter(BaseSafelogFilter):
192 """A log filter which removes IPv6 addresses from log messages."""
194 pattern = re.compile("([:]?[a-fA-F0-9:]+[:]+[a-fA-F0-9:]+){1,8}")
195 easyFind = ":"
197 def doubleCheck(self, match):
198 """Additional check to ensure that **match** is an IPv6 address."""
199 if addr.isIPv6(match):
200 return True
202 @functools.wraps(BaseSafelogFilter.filter)
203 def filter(self, record):
204 return BaseSafelogFilter.filter(self, record)