libs/openid/server/trustroot.py

   1 # -*- test-case-name: openid.test.test_rpverify -*-
   2 """
   3 This module contains the C{L{TrustRoot}} class, which helps handle
   4 trust root checking.  This module is used by the
   5 C{L{openid.server.server}} module, but it is also available to server
   6 implementers who wish to use it for additional trust root checking.
   7
   8 It also implements relying party return_to URL verification, based on
   9 the realm.
  10 """
  11
  12 __all__ = [
  13     'TrustRoot',
  14     'RP_RETURN_TO_URL_TYPE',
  15     'extractReturnToURLs',
  16     'returnToMatches',
  17     'verifyReturnTo',
  18     ]
  19
  20 from openid import oidutil
  21 from openid import urinorm
  22 from openid.yadis import services
  23
  24 from urlparse import urlparse, urlunparse
  25 import re
  26
  27 ############################################
  28 _protocols = ['http', 'https']
  29 _top_level_domains = [
  30     'ac', 'ad', 'ae', 'aero', 'af', 'ag', 'ai', 'al', 'am', 'an',
  31     'ao', 'aq', 'ar', 'arpa', 'as', 'asia', 'at', 'au', 'aw',
  32     'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh', 'bi',
  33     'biz', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw',
  34     'by', 'bz', 'ca', 'cat', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci',
  35     'ck', 'cl', 'cm', 'cn', 'co', 'com', 'coop', 'cr', 'cu', 'cv',
  36     'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz', 'ec',
  37     'edu', 'ee', 'eg', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk',
  38     'fm', 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh',
  39     'gi', 'gl', 'gm', 'gn', 'gov', 'gp', 'gq', 'gr', 'gs', 'gt',
  40     'gu', 'gw', 'gy', 'hk', 'hm', 'hn', 'hr', 'ht', 'hu', 'id',
  41     'ie', 'il', 'im', 'in', 'info', 'int', 'io', 'iq', 'ir', 'is',
  42     'it', 'je', 'jm', 'jo', 'jobs', 'jp', 'ke', 'kg', 'kh', 'ki',
  43     'km', 'kn', 'kp', 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc',
  44     'li', 'lk', 'lr', 'ls', 'lt', 'lu', 'lv', 'ly', 'ma', 'mc',
  45     'md', 'me', 'mg', 'mh', 'mil', 'mk', 'ml', 'mm', 'mn', 'mo',
  46     'mobi', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'museum', 'mv',
  47     'mw', 'mx', 'my', 'mz', 'na', 'name', 'nc', 'ne', 'net', 'nf',
  48     'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu', 'nz', 'om', 'org',
  49     'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr',
  50     'pro', 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru',
  51     'rw', 'sa', 'sb', 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj',
  52     'sk', 'sl', 'sm', 'sn', 'so', 'sr', 'st', 'su', 'sv', 'sy',
  53     'sz', 'tc', 'td', 'tel', 'tf', 'tg', 'th', 'tj', 'tk', 'tl',
  54     'tm', 'tn', 'to', 'tp', 'tr', 'travel', 'tt', 'tv', 'tw',
  55     'tz', 'ua', 'ug', 'uk', 'us', 'uy', 'uz', 'va', 'vc', 've',
  56     'vg', 'vi', 'vn', 'vu', 'wf', 'ws', 'xn--0zwm56d',
  57     'xn--11b5bs3a9aj6g', 'xn--80akhbyknj4f', 'xn--9t4b11yi5a',
  58     'xn--deba0ad', 'xn--g6w251d', 'xn--hgbk6aj7f53bba',
  59     'xn--hlcj6aya9esc7a', 'xn--jxalpdlp', 'xn--kgbechtv',
  60     'xn--zckzah', 'ye', 'yt', 'yu', 'za', 'zm', 'zw']
  61
  62 # Build from RFC3986, section 3.2.2. Used to reject hosts with invalid
  63 # characters.
  64 host_segment_re = re.compile(
  65     r"(?:[-a-zA-Z0-9!$&'\(\)\*+,;=._~]|%[a-zA-Z0-9]{2})*$")
  66
  67 class RealmVerificationRedirected(Exception):
  68     """Attempting to verify this realm resulted in a redirect.
  69
  70     @since: 2.1.0
  71     """
  72     def __init__(self, relying_party_url, rp_url_after_redirects):
  73         self.relying_party_url = relying_party_url
  74         self.rp_url_after_redirects = rp_url_after_redirects
  75
  76     def __str__(self):
  77         return ("Attempting to verify %r resulted in "
  78                 "redirect to %r" %
  79                 (self.relying_party_url,
  80                  self.rp_url_after_redirects))
  81
  82
  83 def _parseURL(url):
  84     try:
  85         url = urinorm.urinorm(url)
  86     except ValueError:
  87         return None
  88     proto, netloc, path, params, query, frag = urlparse(url)
  89     if not path:
  90         # Python <2.4 does not parse URLs with no path properly
  91         if not query and '?' in netloc:
  92             netloc, query = netloc.split('?', 1)
  93
  94         path = '/'
  95
  96     path = urlunparse(('', '', path, params, query, frag))
  97
  98     if ':' in netloc:
  99         try:
 100             host, port = netloc.split(':')
 101         except ValueError:
 102             return None
 103
 104         if not re.match(r'\d+$', port):
 105             return None
 106     else:
 107         host = netloc
 108         port = ''
 109
 110     host = host.lower()
 111     if not host_segment_re.match(host):
 112         return None
 113
 114     return proto, host, port, path
 115
 116 class TrustRoot(object):
 117     """
 118     This class represents an OpenID trust root.  The C{L{parse}}
 119     classmethod accepts a trust root string, producing a
 120     C{L{TrustRoot}} object.  The method OpenID server implementers
 121     would be most likely to use is the C{L{isSane}} method, which
 122     checks the trust root for given patterns that indicate that the
 123     trust root is too broad or points to a local network resource.
 124
 125     @sort: parse, isSane
 126     """
 127
 128     def __init__(self, unparsed, proto, wildcard, host, port, path):
 129         self.unparsed = unparsed
 130         self.proto = proto
 131         self.wildcard = wildcard
 132         self.host = host
 133         self.port = port
 134         self.path = path
 135
 136     def isSane(self):
 137         """
 138         This method checks the to see if a trust root represents a
 139         reasonable (sane) set of URLs.  'http://*.com/', for example
 140         is not a reasonable pattern, as it cannot meaningfully specify
 141         the site claiming it.  This function attempts to find many
 142         related examples, but it can only work via heuristics.
 143         Negative responses from this method should be treated as
 144         advisory, used only to alert the user to examine the trust
 145         root carefully.
 146
 147
 148         @return: Whether the trust root is sane
 149
 150         @rtype: C{bool}
 151         """
 152
 153         if self.host == 'localhost':
 154             return True
 155
 156         host_parts = self.host.split('.')
 157         if self.wildcard:
 158             assert host_parts[0] == '', host_parts
 159             del host_parts[0]
 160
 161         # If it's an absolute domain name, remove the empty string
 162         # from the end.
 163         if host_parts and not host_parts[-1]:
 164             del host_parts[-1]
 165
 166         if not host_parts:
 167             return False
 168
 169         # Do not allow adjacent dots
 170         if '' in host_parts:
 171             return False
 172
 173         tld = host_parts[-1]
 174         if tld not in _top_level_domains:
 175             return False
 176
 177         if len(host_parts) == 1:
 178             return False
 179
 180         if self.wildcard:
 181             if len(tld) == 2 and len(host_parts[-2]) <= 3:
 182                 # It's a 2-letter tld with a short second to last segment
 183                 # so there needs to be more than two segments specified
 184                 # (e.g. *.co.uk is insane)
 185                 return len(host_parts) > 2
 186
 187         # Passed all tests for insanity.
 188         return True
 189
 190     def validateURL(self, url):
 191         """
 192         Validates a URL against this trust root.
 193
 194
 195         @param url: The URL to check
 196
 197         @type url: C{str}
 198
 199
 200         @return: Whether the given URL is within this trust root.
 201
 202         @rtype: C{bool}
 203         """
 204
 205         url_parts = _parseURL(url)
 206         if url_parts is None:
 207             return False
 208
 209         proto, host, port, path = url_parts
 210
 211         if proto != self.proto:
 212             return False
 213
 214         if port != self.port:
 215             return False
 216
 217         if '*' in host:
 218             return False
 219
 220         if not self.wildcard:
 221             if host != self.host:
 222                 return False
 223         elif ((not host.endswith(self.host)) and
 224               ('.' + host) != self.host):
 225             return False
 226
 227         if path != self.path:
 228             path_len = len(self.path)
 229             trust_prefix = self.path[:path_len]
 230             url_prefix = path[:path_len]
 231
 232             # must be equal up to the length of the path, at least
 233             if trust_prefix != url_prefix:
 234                 return False
 235
 236             # These characters must be on the boundary between the end
 237             # of the trust root's path and the start of the URL's
 238             # path.
 239             if '?' in self.path:
 240                 allowed = '&'
 241             else:
 242                 allowed = '?/'
 243
 244             return (self.path[-1] in allowed or
 245                 path[path_len] in allowed)
 246
 247         return True
 248
 249     def parse(cls, trust_root):
 250         """
 251         This method creates a C{L{TrustRoot}} instance from the given
 252         input, if possible.
 253
 254
 255         @param trust_root: This is the trust root to parse into a
 256         C{L{TrustRoot}} object.
 257
 258         @type trust_root: C{str}
 259
 260
 261         @return: A C{L{TrustRoot}} instance if trust_root parses as a
 262         trust root, C{None} otherwise.
 263
 264         @rtype: C{NoneType} or C{L{TrustRoot}}
 265         """
 266         url_parts = _parseURL(trust_root)
 267         if url_parts is None:
 268             return None
 269
 270         proto, host, port, path = url_parts
 271
 272         # check for valid prototype
 273         if proto not in _protocols:
 274             return None
 275
 276         # check for URI fragment
 277         if path.find('#') != -1:
 278             return None
 279
 280         # extract wildcard if it is there
 281         if host.find('*', 1) != -1:
 282             # wildcard must be at start of domain:  *.foo.com, not foo.*.com
 283             return None
 284
 285         if host.startswith('*'):
 286             # Starts with star, so must have a dot after it (if a
 287             # domain is specified)
 288             if len(host) > 1 and host[1] != '.':
 289                 return None
 290
 291             host = host[1:]
 292             wilcard = True
 293         else:
 294             wilcard = False
 295
 296         # we have a valid trust root
 297         tr = cls(trust_root, proto, wilcard, host, port, path)
 298
 299         return tr
 300
 301     parse = classmethod(parse)
 302
 303     def checkSanity(cls, trust_root_string):
 304         """str -> bool
 305
 306         is this a sane trust root?
 307         """
 308         trust_root = cls.parse(trust_root_string)
 309         if trust_root is None:
 310             return False
 311         else:
 312             return trust_root.isSane()
 313
 314     checkSanity = classmethod(checkSanity)
 315
 316     def checkURL(cls, trust_root, url):
 317         """quick func for validating a url against a trust root.  See the
 318         TrustRoot class if you need more control."""
 319         tr = cls.parse(trust_root)
 320         return tr is not None and tr.validateURL(url)
 321
 322     checkURL = classmethod(checkURL)
 323
 324     def buildDiscoveryURL(self):
 325         """Return a discovery URL for this realm.
 326
 327         This function does not check to make sure that the realm is
 328         valid. Its behaviour on invalid inputs is undefined.
 329
 330         @rtype: str
 331
 332         @returns: The URL upon which relying party discovery should be run
 333             in order to verify the return_to URL
 334
 335         @since: 2.1.0
 336         """
 337         if self.wildcard:
 338             # Use "www." in place of the star
 339             assert self.host.startswith('.'), self.host
 340             www_domain = 'www' + self.host
 341             return '%s://%s%s' % (self.proto, www_domain, self.path)
 342         else:
 343             return self.unparsed
 344
 345     def __repr__(self):
 346         return "TrustRoot('%s', '%s', '%s', '%s', '%s', '%s')" % (
 347             self.unparsed, self.proto, self.wildcard, self.host, self.port,
 348             self.path)
 349
 350     def __str__(self):
 351         return repr(self)
 352
 353 # The URI for relying party discovery, used in realm verification.
 354 #
 355 # XXX: This should probably live somewhere else (like in
 356 # openid.consumer or openid.yadis somewhere)
 357 RP_RETURN_TO_URL_TYPE = 'http://specs.openid.net/auth/2.0/return_to'
 358
 359 def _extractReturnURL(endpoint):
 360     """If the endpoint is a relying party OpenID return_to endpoint,
 361     return the endpoint URL. Otherwise, return None.
 362
 363     This function is intended to be used as a filter for the Yadis
 364     filtering interface.
 365
 366     @see: C{L{openid.yadis.services}}
 367     @see: C{L{openid.yadis.filters}}
 368
 369     @param endpoint: An XRDS BasicServiceEndpoint, as returned by
 370         performing Yadis dicovery.
 371
 372     @returns: The endpoint URL or None if the endpoint is not a
 373         relying party endpoint.
 374     @rtype: str or NoneType
 375     """
 376     if endpoint.matchTypes([RP_RETURN_TO_URL_TYPE]):
 377         return endpoint.uri
 378     else:
 379         return None
 380
 381 def returnToMatches(allowed_return_to_urls, return_to):
 382     """Is the return_to URL under one of the supplied allowed
 383     return_to URLs?
 384
 385     @since: 2.1.0
 386     """
 387
 388     for allowed_return_to in allowed_return_to_urls:
 389         # A return_to pattern works the same as a realm, except that
 390         # it's not allowed to use a wildcard. We'll model this by
 391         # parsing it as a realm, and not trying to match it if it has
 392         # a wildcard.
 393
 394         return_realm = TrustRoot.parse(allowed_return_to)
 395         if (# Parses as a trust root
 396             return_realm is not None and
 397
 398             # Does not have a wildcard
 399             not return_realm.wildcard and
 400
 401             # Matches the return_to that we passed in with it
 402             return_realm.validateURL(return_to)
 403             ):
 404             return True
 405
 406     # No URL in the list matched
 407     return False
 408
 409 def getAllowedReturnURLs(relying_party_url):
 410     """Given a relying party discovery URL return a list of return_to URLs.
 411
 412     @since: 2.1.0
 413     """
 414     (rp_url_after_redirects, return_to_urls) = services.getServiceEndpoints(
 415         relying_party_url, _extractReturnURL)
 416
 417     if rp_url_after_redirects != relying_party_url:
 418         # Verification caused a redirect
 419         raise RealmVerificationRedirected(
 420             relying_party_url, rp_url_after_redirects)
 421
 422     return return_to_urls
 423
 424 # _vrfy parameter is there to make testing easier
 425 def verifyReturnTo(realm_str, return_to, _vrfy=getAllowedReturnURLs):
 426     """Verify that a return_to URL is valid for the given realm.
 427
 428     This function builds a discovery URL, performs Yadis discovery on
 429     it, makes sure that the URL does not redirect, parses out the
 430     return_to URLs, and finally checks to see if the current return_to
 431     URL matches the return_to.
 432
 433     @raises DiscoveryFailure: When Yadis discovery fails
 434     @returns: True if the return_to URL is valid for the realm
 435
 436     @since: 2.1.0
 437     """
 438     realm = TrustRoot.parse(realm_str)
 439     if realm is None:
 440         # The realm does not parse as a URL pattern
 441         return False
 442
 443     try:
 444         allowable_urls = _vrfy(realm.buildDiscoveryURL())
 445     except RealmVerificationRedirected, err:
 446         oidutil.log(str(err))
 447         return False
 448
 449     if returnToMatches(allowable_urls, return_to):
 450         return True
 451     else:
 452         oidutil.log("Failed to validate return_to %r for realm %r, was not "
 453                     "in %s" % (return_to, realm_str, allowable_urls))
 454         return False