1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_safelog -*-
3 # This file is part of BridgeDB, a Tor bridge distribution system.
5 # :authors: Isis Lovecruft 0xA3ADB67A2CDB8B35 <isis@torproject.org>
6 # :copyright: (c) 2013-2017, Isis Lovecruft
7 # (c) 2007-2017, The Tor Project, Inc.
8 # :license: 3-Clause BSD, see LICENSE for licensing information
10 """Filters for log sanitisation.
12 .. inheritance-diagram:: BaseSafelogFilter SafelogEmailFilter SafelogIPv4Filter SafelogIPv6Filter
15 The ``Safelog*Filter`` classes within this module can be instantiated and
16 adding to any :class:`logging.Handler`, in order to transparently filter
17 substrings within log messages which match the given ``pattern``. Matching
18 substrings may be optionally additionally validated by implementing the
19 :meth:`~BaseSafelogFilter.doubleCheck` method before they are finally replaced
20 with the ``replacement`` string. For example::
24 >>> from bridgedb import safelog
25 >>> handler = logging.StreamHandler(io.BytesIO())
26 >>> logger = logging.getLogger()
27 >>> logger.addHandler(handler)
28 >>> logger.addFilter(safelog.SafelogEmailFilter())
29 >>> logger.info("Sent response email to: blackhole@torproject.org")
39 |_ setSafeLogging - Enable or disable safelogging globally.
40 |_ logSafely - Utility for manually sanitising a portion of a log message
42 \_ BaseSafelogFilter - Base class for log message sanitisation filters
43 | |_ doubleCheck - Optional stricter validation on matching substrings
44 | \_ filter - Determine if some part of a log message should be filtered
46 |_ SafelogEmailFilter - Filter for removing email addresses from logs
47 |_ SafelogIPv4Filter - Filter for removing IPv4 addresses from logs
48 |_ SafelogIPv6Filter - Filter for removing IPv6 addresses from logs
57 from bridgedb
.parse
import addr
63 def setSafeLogging(safe
):
64 """Enable or disable automatic filtering of log messages.
66 :param bool safe: If ``True``, filter email and IP addresses from log
67 messages automagically.
72 def logSafely(string
):
73 """Utility for manually sanitising a portion of a log message.
75 :param str string: If ``SAFELOGGING`` is enabled, sanitise this **string**
76 by replacing it with ``"[scrubbed]"``. Otherwise, return the
79 :returns: ``"[scrubbed]"`` or the original string.
86 class BaseSafelogFilter(logging
.Filter
):
87 """Base class for creating log message sanitisation filters.
89 A :class:`BaseSafelogFilter` uses a compiled regex :attr:`pattern` to
90 match particular items of data in log messages which should be sanitised
91 (if ``SAFELOGGING`` is enabled in :file:`bridgedb.conf`).
94 The :attr:`pattern` is used only for string *matching* purposes, and
95 *not* for validation. In other words, a :attr:`pattern` which matches
96 email addresses should simply match something which appears to be an
97 email address, even though that matching string might not technically
98 be a valid email address vis-รก-vis :rfc:`5321`.
100 In addition, a ``BaseSafelogFilter`` uses a :attr:`easyFind`, which is
101 simply a string or character to search for before running checking against
102 the regular expression, to attempt to avoid regexing *everything* which
103 passes through the logger.
105 :cvar pattern: A compiled regular expression, whose matches will be
106 scrubbed from log messages and replaced with :attr:`replacement`.
107 :vartype easyFind: str
108 :cvar easyFind: A simpler string to search for before to match by regex.
109 :vartype replacement: str
110 :cvar replacement: The string to replace ``pattern`` matches
111 with. (default: ``"[scrubbed]"``)
113 pattern
= re
.compile("FILTERME")
114 easyFind
= "FILTERME"
115 replacement
= "[scrubbed]"
117 def doubleCheck(self
, match
):
118 """Subclasses should override this function to implement any additional
119 substring filtering to decrease the false positive rate, i.e. any
120 additional filtering or validation which is *more* costly than
121 checking against the regular expression, :attr:`pattern`.
123 To use only the :attr:`pattern` matching in :meth:`filter`, and not
124 use this method, simply do::
128 :param str match: Some portion of the :ivar:`logging.LogRecord.msg`
129 string which has already passed the checks in :meth:`filter`, for
130 which additional validation/checking is required.
132 :returns: ``True`` if the additional validation passes (in other
133 words, the **match** *should* be filtered), and ``None`` or
138 def filter(self
, record
):
139 """Filter a log record.
141 The log **record** is filtered, and thus sanitised by replacing
142 matching substrings with the :attr:`replacement` string, if the
143 following checks pass:
145 1. ``SAFELOGGING`` is currently enabled.
146 2. The ``record.msg`` string contains :attr:`easyFind`.
147 3. The ``record.msg`` matches the regular expression, :attr:`pattern`.
149 :type record: :class:`logging.LogRecord`
150 :param record: Basically, anything passed to :func:`logging.log`.
153 msg
= str(record
.msg
)
154 if msg
.find(self
.easyFind
) > 0:
155 matches
= self
.pattern
.findall(msg
)
156 for match
in matches
:
157 if self
.doubleCheck(match
):
158 msg
= msg
.replace(match
, self
.replacement
)
163 class SafelogEmailFilter(BaseSafelogFilter
):
164 """A log filter which removes email addresses from log messages."""
166 pattern
= re
.compile(
167 "([a-zA-Z0-9]+[.+a-zA-Z0-9]*[@]{1}[a-zA-Z0-9]+[.-a-zA-Z0-9]*[.]{1}[a-zA-Z]+)")
170 @functools.wraps(BaseSafelogFilter
.filter)
171 def filter(self
, record
):
172 return BaseSafelogFilter
.filter(self
, record
)
175 class SafelogIPv4Filter(BaseSafelogFilter
):
176 """A log filter which removes IPv4 addresses from log messages."""
178 pattern
= re
.compile("(?:\d{1,3}\.?){4}")
181 def doubleCheck(self
, match
):
182 """Additional check to ensure that **match** is an IPv4 address."""
183 if addr
.isIPv4(match
):
186 @functools.wraps(BaseSafelogFilter
.filter)
187 def filter(self
, record
):
188 return BaseSafelogFilter
.filter(self
, record
)
191 class SafelogIPv6Filter(BaseSafelogFilter
):
192 """A log filter which removes IPv6 addresses from log messages."""
194 pattern
= re
.compile("([:]?[a-fA-F0-9:]+[:]+[a-fA-F0-9:]+){1,8}")
197 def doubleCheck(self
, match
):
198 """Additional check to ensure that **match** is an IPv6 address."""
199 if addr
.isIPv6(match
):
202 @functools.wraps(BaseSafelogFilter
.filter)
203 def filter(self
, record
):
204 return BaseSafelogFilter
.filter(self
, record
)