1 # -*- coding: utf-8 -*-
3 # This file is part of BridgeDB, a Tor bridge distribution system.
5 # :authors: Isis Lovecruft 0xA3ADB67A2CDB8B35 <isis@torproject.org>
6 # please also see AUTHORS file
7 # :copyright: (c) 2013 Isis Lovecruft
8 # (c) 2007-2013, The Tor Project, Inc.
9 # (c) 2007-2013, all entities within the AUTHORS file
10 # :license: 3-clause BSD, see included LICENSE for information
12 """Utilities for parsing IP and email addresses.
14 .. py:module:: bridgedb.parse.addr
15 :synopsis: Parsers for finding and validating IP addresses, email
16 addresses, and port ranges.
25 | |_ extractEmailAddress() - Validate a :rfc:2822 email address.
26 | |_ isIPAddress() - Check if an arbitrary string is an IP address.
27 | |_ isIPv4() - Check if an arbitrary string is an IPv4 address.
28 | |_ isIPv6() - Check if an arbitrary string is an IPv6 address.
29 | \_ isValidIP() - Check that an IP address is valid.
31 |_ PortList - A container class for validated port ranges.
36 How address validity is determined
37 ----------------------------------
39 The following terms define addresses which are **not** valid. All other
40 addresses are taken to be valid.
43 Private IP Address Ranges
44 ^^^^^^^^^^^^^^^^^^^^^^^^^
48 These address ranges are reserved by IANA for private intranets, and not
49 routable to the Internet::
50 10.0.0.0 - 10.255.255.255 (10.0.0.0/8)
51 172.16.0.0 - 172.31.255.255 (172.16.0.0/12)
52 192.168.0.0 - 192.168.255.255 (192.168.0.0/16)
53 For additional information, see :rfc:`1918`.
56 Reserved and Special Use Addresses
57 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
62 Current network (only valid as source address). See :rfc:`1122`. An
63 **Unspecified Address** in the context of firewalls means "all addresses
64 of the local machine". In a routing context, it is usually termed the
65 **Default Route**, and it means the default route (to "the rest of" the
66 internet). See :rfc:`1700`.
72 Reserved for loopback and IPC on the localhost. See :rfc:`1122`.
77 Loopback IP addresses (refers to self). See :rfc:`5735`.
79 127.0.0.1 - 127.255.255.254 (127.0.0.0/8)
83 These are the link-local blocks, used for communication between hosts on
84 a single link. See :rfc:`3927`.
90 Reserved for multicast addresses. See :rfc:`3171`.
92 224.0.0.0 - 239.255.255.255 (224.0.0.0/4)
95 Reserved for private networks. See :rfc:`1918`.
96 Some examples include::
102 Reserved (former Class E network). See :rfc:`1700`, :rfc:`3232`, and
103 :rfc:`5735`. The one exception to this rule is the :term:`Limited
104 Broadcast Address`, ``255.255.255.255`` for which packets at the IP
105 layer are not forwarded to the public internet. For example::
106 240.0.0.0 - 255.255.255.255 (240.0.0.0/4)
108 Limited Broadcast Address
109 Limited broadcast address (limited to all other nodes on the LAN). See
110 :rfc:`919`. For IPv4, ``255`` in any part of the IP is reserved for
111 broadcast addressing to the local LAN, e.g.::
115 .. warning:: The :mod:`ipaddr` module (as of version 2.1.10) does not
116 understand the following reserved_ addresses:
117 .. _reserved: https://tools.ietf.org/html/rfc5735#page-4
121 Reserved Address (Protocol Assignments)
122 Reserved for IETF protocol assignments. See :rfc:`5735`.
126 Reserved Address (6to4 Relay Anycast)
127 IPv6 to IPv4 relay. See :rfc:`3068`.
131 Reserved Address (Network Benchmark)
132 Network benchmark tests. See :rfc:`2544`.
136 Reserved Address (TEST-NET-1)
137 Reserved for use in documentation and example code. It is often used in
138 conjunction with domain names ``example.com`` or ``example.net`` in
139 vendor and protocol documentation. See :rfc:`1166`.
143 Reserved Address (TEST-NET-2)
144 TEST-NET-2. See :rfc:`5737`.
148 Reserved Address (TEST-NET-3)
149 TEST-NET-3. See :rfc:`5737`.
160 Similar uses to :term:`Limited Broadcast Address`. For IPv6, everything
161 becomes convoluted_ and complicated_, and then redefined_. See
162 :rfc:`4193`, :rfc:`3879`, and :rfc:`3513`. The
163 :meth:`ipaddr.IPAddress.is_site_local` method *only* checks to see if
164 the address is a **Unique Local Address** vis-á-vis :rfc:`3513` §2.5.6,
171 .. _convoluted: https://en.wikipedia.org/wiki/IPv6_address#Multicast_addresses
172 .. _complicated: https://en.wikipedia.org/wiki/IPv6_address#IPv6_address_scopes
173 .. _redefined: https://en.wikipedia.org/wiki/Unique_local_address
176 from __future__
import absolute_import
177 from __future__
import print_function
178 from __future__
import unicode_literals
186 #: These are the special characters which RFC2822 allows within email addresses:
187 #: ASPECIAL = '!#$%&*+-/=?^_`{|}~' + "\\\'"
188 #: …But these are the only ones we're confident that we can handle correctly:
189 #: ASPECIAL = '-_+/=_~'
191 ACHAR
= r
'[\w%s]' % "".join("\\%s" % c
for c
in ASPECIAL
)
192 DOTATOM
= r
'%s+(?:\.%s+)*' % (ACHAR
, ACHAR
)
193 DOMAIN
= r
'\w+(?:\.\w+)*'
194 ADDRSPEC
= r
'(%s)\@(%s)' % (DOTATOM
, DOMAIN
)
195 # A compiled regexp which matches on any type and ammount of whitespace:
196 SPACE_PAT
= re
.compile(r
'\s+')
197 # A compiled regexp which matches RFC2822 email address strings:
198 ADDRSPEC_PAT
= re
.compile(ADDRSPEC
)
201 class BadEmail(Exception):
202 """Exception raised when we get a bad email address."""
203 def __init__(self
, msg
, email
):
204 Exception.__init
__(self
, msg
)
207 class InvalidPort(ValueError):
208 """Raised when a given port number is invalid."""
210 class UnsupportedDomain(ValueError):
211 """Raised when we get an email address from an unsupported domain."""
214 def canonicalizeEmailDomain(domain
, domainmap
):
215 """Decide if an email was sent from a permitted domain.
217 :param str domain: The domain portion of an email address to validate. It
218 will be checked that it is one of the domains allowed to email
219 requests for bridges to the
220 :class:`~bridgedb.distributors.email.distributor.EmailDistributor`.
221 :param dict domainmap: A map of permitted alternate domains (in lowercase)
222 to their canonical domain names (in lowercase). This can be configured
223 with the ``EMAIL_DOMAIN_MAP`` option in ``bridgedb.conf``, for
225 EMAIL_DOMAIN_MAP = {'mail.google.com': 'gmail.com',
226 'googlemail.com': 'gmail.com'}
228 :raises UnsupportedDomain: if the domain portion of the email address is
229 not within the map of alternate to canonical allowed domain names.
231 :returns: The canonical domain name for the email address.
234 domain
= domain
.decode('utf-8') if isinstance(domain
, bytes
) else domain
237 permitted
= domainmap
.get(domain
)
238 except AttributeError:
239 logging
.debug("Got non-dict for 'domainmap' parameter: %r" % domainmap
)
242 raise UnsupportedDomain("Domain not permitted: %s" % domain
)
246 def extractEmailAddress(emailaddr
):
247 """Given an email address, obtained for example, via a ``From:`` or
248 ``Sender:`` email header, try to extract and parse (according to
249 :rfc:`2822`) the local and domain portions.
251 We only allow the following form::
253 LOCAL_PART := DOTATOM
255 ADDRSPEC := LOCAL_PART "@" DOMAIN
257 In particular, we are disallowing: obs-local-part, obs-domain, comment,
258 and obs-FWS. Other forms exist, but none of the incoming services we
259 recognize support them.
261 :param emailaddr: An email address to validate.
262 :raises BadEmail: if the **emailaddr** couldn't be validated or parsed.
263 :returns: A tuple of the validated email address, containing the mail
264 local part and the domain::
270 addr
= SPACE_PAT
.sub(' ', emailaddr
).strip()
271 except TypeError as error
:
273 raise BadEmail("Can't extract address from object type %r!"
276 # Only works on usual-form addresses; raises BadEmail on weird
277 # address form. That's okay, since we'll only get those when
278 # people are trying to fool us.
280 # Take the _last_ index of <, so that we don't need to bother
281 # with quoting tricks.
282 idx
= addr
.rindex('<')
284 m
= re
.search(r
'<([^>]*)>', addr
)
286 raise BadEmail("Couldn't extract address spec", orig
)
289 # At this point, addr holds a putative addr-spec.
290 addr
= addr
.replace(" ", "")
291 m
= ADDRSPEC_PAT
.match(addr
)
293 raise BadEmail("Bad address spec format", orig
)
295 localpart
, domain
= m
.groups()
296 return localpart
, domain
298 def isIPAddress(ip
, compressed
=True):
299 """Check if an arbitrary string is an IP address, and that it's valid.
302 :param ip: The IP address to check.
303 :param boolean compressed: If True, return a string representing the
304 compressed form of the address. Otherwise, return an
305 :class:`ipaddr.IPAddress` instance.
306 :rtype: A :class:`ipaddr.IPAddress`, or a string, or False
307 :returns: The IP, as a string or a class, if it passed the
308 checks. Otherwise, returns False.
311 ip
= ipaddr
.IPAddress(ip
)
322 def isIPv(version
, ip
):
323 """Check if **ip** is a certain **version** (IPv4 or IPv6).
325 .. warning: Do *not* put any calls to the logging module in this function,
326 or else an infinite recursion will occur when the call is made, due
327 the the log :class:`~logging.Filter`s in :mod:`~bridgedb.safelog`
328 using this function to validate matches from the regular expression
331 :param integer version: The IPv[4|6] version to check; must be either
332 ``4`` or ``6``. Any other value will be silently changed to ``4``.
333 :param ip: The IP address to check. May be an any type which
334 :class:`ipaddr.IPAddress` will accept.
336 :returns: ``True``, if the address is an IPv4 address.
339 ipaddr
.IPAddress(ip
, version
=version
)
340 except (ipaddr
.AddressValueError
, Exception):
347 """Check if an address is IPv4.
349 .. attention:: This does *not* check validity. See :func:`isValidIP`.
352 :param ip: The IP address to check.
354 :returns: True if the address is an IPv4 address.
359 """Check if an address is IPv6.
361 .. attention:: This does *not* check validity. See :func:`isValidIP`.
364 :param ip: The IP address to check.
366 :returns: True if the address is an IPv6 address.
371 """Check that an IP (v4 or v6) is valid.
373 The IP address, **ip**, must not be any of the following:
375 * A :term:`Link-Local Address`,
376 * A :term:`Loopback Address` or :term:`Localhost Address`,
377 * A :term:`Multicast Address`,
378 * An :term:`Unspecified Address` or :term:`Default Route`,
379 * Any other :term:`Private Address`, or address within a privately
380 allocated space, such as the IANA-reserved
381 :term:`Shared Address Space`.
383 If it is an IPv6 address, it also must not be:
385 * A :term:`Site-Local Address` or an :term:`Unique Local Address`.
387 >>> from bridgedb.parse.addr import isValidIP
388 >>> isValidIP('1.2.3.4')
390 >>> isValidIP('1.2.3.255')
392 >>> isValidIP('1.2.3.256')
396 >>> isValidIP('1.2.3')
398 >>> isValidIP('xyzzy')
401 :type ip: An :class:`ipaddr.IPAddress`, :class:`ipaddr.IPv4Address`,
402 :class:`ipaddr.IPv6Address`, or str
403 :param ip: An IP address. If it is a string, it will be converted to a
404 :class:`ipaddr.IPAddress`.
406 :returns: ``True``, if **ip** passes the checks; False otherwise.
411 if isinstance(ip
, str):
412 ip
= ipaddr
.IPAddress(ip
)
415 reasons
.append('link local')
417 reasons
.append('loopback')
419 reasons
.append('multicast')
421 reasons
.append('private')
422 if ip
.is_unspecified
:
423 reasons
.append('unspecified')
425 if (ip
.version
== 6) and ip
.is_site_local
:
426 reasons
.append('site local')
427 elif (ip
.version
== 4) and ip
.is_reserved
:
428 reasons
.append('reserved')
430 reasons
.append('cannot convert to ip')
433 explain
= ', '.join([r
for r
in reasons
])
434 logging
.debug("IP address %r is invalid! Reason(s): %s"
440 """Determine if ``ip`` is a loopback or localhost address (127.0.0.1/8).
442 :type: ``str`` or ``ipaddr.IPAddress``
443 :param ip: An IP address.
445 :returns: ``True`` if the IP was a loopback or localhost address; ``False``
449 if isinstance(ip
, str):
450 ip
= ipaddr
.IPAddress(ip
)
456 except Exception as err
:
457 logging
.debug("Error attempting to parse IP address: %s", ip
)
461 def normalizeEmail(emailaddr
, domainmap
, domainrules
, ignorePlus
=True):
462 """Normalise an email address according to the processing rules for its
463 canonical originating domain.
465 The email address, **emailaddr**, will be parsed and validated, and then
466 checked that it originated from one of the domains allowed to email
467 requests for bridges to the
468 :class:`~bridgedb.distributors.email.distributor.EmailDistributor` via the
469 :func:`canonicaliseEmailDomain` function.
471 :param str emailaddr: An email address to normalise.
472 :param dict domainmap: A map of permitted alternate domains (in lowercase)
473 to their canonical domain names (in lowercase). This can be configured
474 with the ``EMAIL_DOMAIN_MAP`` option in ``bridgedb.conf``, for
477 EMAIL_DOMAIN_MAP = {'mail.google.com': 'gmail.com',
478 'googlemail.com': 'gmail.com'}
480 :param dict domainrules: A mapping of canonical permitted domain names to
481 a list of rules which should be applied to processing them, for
484 EMAIL_DOMAIN_RULES = {'gmail.com': ["ignore_dots", "dkim"]
486 Currently, ``"ignore_dots"`` means that all ``"."`` characters will be
487 removed from the local part of the validated email address.
489 :param bool ignorePlus: If ``True``, assume that
490 ``blackhole+kerr@torproject.org`` is an alias for
491 ``blackhole@torproject.org``, and remove everything after the first
494 :raises UnsupportedDomain: if the email address originated from a domain
495 that we do not explicitly support.
496 :raises BadEmail: if the email address could not be parsed or validated.
498 :returns: The validated, normalised email address, if it was from a
499 permitted domain. Otherwise, returns an empty string.
501 emailaddr
= emailaddr
.lower()
502 localpart
, domain
= extractEmailAddress(emailaddr
)
503 canonical
= canonicalizeEmailDomain(domain
, domainmap
)
506 idx
= localpart
.find('+')
508 localpart
= localpart
[:idx
]
510 rules
= domainrules
.get(canonical
, [])
511 if 'ignore_dots' in rules
:
512 localpart
= localpart
.replace(".", "")
514 normalized
= "%s@%s" % (localpart
, domain
)
518 class PortList(object):
519 """A container class for validated port ranges.
521 From torspec.git/dir-spec.txt §2.3:
523 | portspec ::= "*" | port | port "-" port
524 | port ::= an integer between 1 and 65535, inclusive.
526 | [Some implementations incorrectly generate ports with value 0.
527 | Implementations SHOULD accept this, and SHOULD NOT generate it.
528 | Connections to port 0 are never permitted.]
531 :ivar set ports: All ports which have been added to this ``PortList``.
534 #: The maximum number of allowed ports per IP address.
537 def __init__(self
, *args
, **kwargs
):
538 """Create a :class:`~bridgedb.parse.addr.PortList`.
540 :param args: Should match the ``portspec`` defined above.
541 :raises: InvalidPort, if one of ``args`` doesn't match ``port`` as
547 def _sanitycheck(self
, port
):
548 """Check that ``port`` is in the range [1, 65535] inclusive.
550 :raises: InvalidPort, if ``port`` doesn't match ``port`` as defined
551 in the excert from torspec above.
553 :returns: The **port**, if no exceptions were raised.
555 if (not isinstance(port
, int)) or not (0 < port
<= 65535):
556 raise InvalidPort("%s is not a valid port number!" % port
)
559 def __contains__(self
, port
):
560 """Determine whether ``port`` is already in this ``PortList``.
562 :returns: True if ``port`` is in this ``PortList``; False otherwise.
564 return port
in self
.ports
566 def add(self
, *args
):
567 """Add a port (or ports) to this ``PortList``.
569 :param args: Should match the ``portspec`` defined above.
570 :raises: InvalidPort, if one of ``args`` doesn't match ``port`` as
576 if isinstance(arg
, str):
578 for p
in arg
.split(',')][:self
.PORTSPEC_LEN
])
579 portlist
.extend([self
._sanitycheck
(p
) for p
in ports
])
580 if isinstance(arg
, int):
581 portlist
.append(self
._sanitycheck
(arg
))
582 if isinstance(arg
, PortList
):
583 self
.add(list(arg
.ports
))
585 raise InvalidPort("%s is not a valid port number!" % arg
)
587 self
.ports
.update(set(portlist
))
590 """Iterate through all ports in this PortList."""
591 return self
.ports
.__iter
__()
594 """Returns a pretty string representation of this PortList."""
595 return ','.join(['%s' % port
for port
in self
.ports
])
598 """Returns a raw depiction of this PortList."""
599 return "PortList('%s')" % self
.__str
__()
602 """Returns the total number of ports in this PortList."""
603 return len(self
.ports
)
605 def __getitem__(self
, port
):
606 """Get the value of ``port`` if it is in this PortList.
608 :raises: ValueError, if ``port`` isn't in this PortList.
610 :returns: The ``port``, if it is in this PortList.
612 return list(self
.ports
)[port
]