1 # -*- test-case-name: openid.test.test_rpverify -*-
3 This module contains the C{L{TrustRoot}} class, which helps handle
4 trust root checking. This module is used by the
5 C{L{openid.server.server}} module, but it is also available to server
6 implementers who wish to use it for additional trust root checking.
8 It also implements relying party return_to URL verification, based on
14 'RP_RETURN_TO_URL_TYPE',
15 'extractReturnToURLs',
20 from openid
import oidutil
21 from openid
import urinorm
22 from openid
.yadis
import services
24 from urlparse
import urlparse
, urlunparse
27 ############################################
28 _protocols
= ['http', 'https']
29 _top_level_domains
= [
30 'ac', 'ad', 'ae', 'aero', 'af', 'ag', 'ai', 'al', 'am', 'an',
31 'ao', 'aq', 'ar', 'arpa', 'as', 'asia', 'at', 'au', 'aw',
32 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh', 'bi',
33 'biz', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw',
34 'by', 'bz', 'ca', 'cat', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci',
35 'ck', 'cl', 'cm', 'cn', 'co', 'com', 'coop', 'cr', 'cu', 'cv',
36 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz', 'ec',
37 'edu', 'ee', 'eg', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk',
38 'fm', 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh',
39 'gi', 'gl', 'gm', 'gn', 'gov', 'gp', 'gq', 'gr', 'gs', 'gt',
40 'gu', 'gw', 'gy', 'hk', 'hm', 'hn', 'hr', 'ht', 'hu', 'id',
41 'ie', 'il', 'im', 'in', 'info', 'int', 'io', 'iq', 'ir', 'is',
42 'it', 'je', 'jm', 'jo', 'jobs', 'jp', 'ke', 'kg', 'kh', 'ki',
43 'km', 'kn', 'kp', 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc',
44 'li', 'lk', 'lr', 'ls', 'lt', 'lu', 'lv', 'ly', 'ma', 'mc',
45 'md', 'me', 'mg', 'mh', 'mil', 'mk', 'ml', 'mm', 'mn', 'mo',
46 'mobi', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'museum', 'mv',
47 'mw', 'mx', 'my', 'mz', 'na', 'name', 'nc', 'ne', 'net', 'nf',
48 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu', 'nz', 'om', 'org',
49 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr',
50 'pro', 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru',
51 'rw', 'sa', 'sb', 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj',
52 'sk', 'sl', 'sm', 'sn', 'so', 'sr', 'st', 'su', 'sv', 'sy',
53 'sz', 'tc', 'td', 'tel', 'tf', 'tg', 'th', 'tj', 'tk', 'tl',
54 'tm', 'tn', 'to', 'tp', 'tr', 'travel', 'tt', 'tv', 'tw',
55 'tz', 'ua', 'ug', 'uk', 'us', 'uy', 'uz', 'va', 'vc', 've',
56 'vg', 'vi', 'vn', 'vu', 'wf', 'ws', 'xn--0zwm56d',
57 'xn--11b5bs3a9aj6g', 'xn--80akhbyknj4f', 'xn--9t4b11yi5a',
58 'xn--deba0ad', 'xn--g6w251d', 'xn--hgbk6aj7f53bba',
59 'xn--hlcj6aya9esc7a', 'xn--jxalpdlp', 'xn--kgbechtv',
60 'xn--zckzah', 'ye', 'yt', 'yu', 'za', 'zm', 'zw']
62 # Build from RFC3986, section 3.2.2. Used to reject hosts with invalid
64 host_segment_re
= re
.compile(
65 r
"(?:[-a-zA-Z0-9!$&'\(\)\*+,;=._~]|%[a-zA-Z0-9]{2})*$")
67 class RealmVerificationRedirected(Exception):
68 """Attempting to verify this realm resulted in a redirect.
72 def __init__(self
, relying_party_url
, rp_url_after_redirects
):
73 self
.relying_party_url
= relying_party_url
74 self
.rp_url_after_redirects
= rp_url_after_redirects
77 return ("Attempting to verify %r resulted in "
79 (self
.relying_party_url
,
80 self
.rp_url_after_redirects
))
85 url
= urinorm
.urinorm(url
)
88 proto
, netloc
, path
, params
, query
, frag
= urlparse(url
)
90 # Python <2.4 does not parse URLs with no path properly
91 if not query
and '?' in netloc
:
92 netloc
, query
= netloc
.split('?', 1)
96 path
= urlunparse(('', '', path
, params
, query
, frag
))
100 host
, port
= netloc
.split(':')
104 if not re
.match(r
'\d+$', port
):
111 if not host_segment_re
.match(host
):
114 return proto
, host
, port
, path
116 class TrustRoot(object):
118 This class represents an OpenID trust root. The C{L{parse}}
119 classmethod accepts a trust root string, producing a
120 C{L{TrustRoot}} object. The method OpenID server implementers
121 would be most likely to use is the C{L{isSane}} method, which
122 checks the trust root for given patterns that indicate that the
123 trust root is too broad or points to a local network resource.
128 def __init__(self
, unparsed
, proto
, wildcard
, host
, port
, path
):
129 self
.unparsed
= unparsed
131 self
.wildcard
= wildcard
138 This method checks the to see if a trust root represents a
139 reasonable (sane) set of URLs. 'http://*.com/', for example
140 is not a reasonable pattern, as it cannot meaningfully specify
141 the site claiming it. This function attempts to find many
142 related examples, but it can only work via heuristics.
143 Negative responses from this method should be treated as
144 advisory, used only to alert the user to examine the trust
148 @return: Whether the trust root is sane
153 if self
.host
== 'localhost':
156 host_parts
= self
.host
.split('.')
158 assert host_parts
[0] == '', host_parts
161 # If it's an absolute domain name, remove the empty string
163 if host_parts
and not host_parts
[-1]:
169 # Do not allow adjacent dots
174 if tld
not in _top_level_domains
:
177 if len(host_parts
) == 1:
181 if len(tld
) == 2 and len(host_parts
[-2]) <= 3:
182 # It's a 2-letter tld with a short second to last segment
183 # so there needs to be more than two segments specified
184 # (e.g. *.co.uk is insane)
185 return len(host_parts
) > 2
187 # Passed all tests for insanity.
190 def validateURL(self
, url
):
192 Validates a URL against this trust root.
195 @param url: The URL to check
200 @return: Whether the given URL is within this trust root.
205 url_parts
= _parseURL(url
)
206 if url_parts
is None:
209 proto
, host
, port
, path
= url_parts
211 if proto
!= self
.proto
:
214 if port
!= self
.port
:
220 if not self
.wildcard
:
221 if host
!= self
.host
:
223 elif ((not host
.endswith(self
.host
)) and
224 ('.' + host
) != self
.host
):
227 if path
!= self
.path
:
228 path_len
= len(self
.path
)
229 trust_prefix
= self
.path
[:path_len
]
230 url_prefix
= path
[:path_len
]
232 # must be equal up to the length of the path, at least
233 if trust_prefix
!= url_prefix
:
236 # These characters must be on the boundary between the end
237 # of the trust root's path and the start of the URL's
244 return (self
.path
[-1] in allowed
or
245 path
[path_len
] in allowed
)
249 def parse(cls
, trust_root
):
251 This method creates a C{L{TrustRoot}} instance from the given
255 @param trust_root: This is the trust root to parse into a
256 C{L{TrustRoot}} object.
258 @type trust_root: C{str}
261 @return: A C{L{TrustRoot}} instance if trust_root parses as a
262 trust root, C{None} otherwise.
264 @rtype: C{NoneType} or C{L{TrustRoot}}
266 url_parts
= _parseURL(trust_root
)
267 if url_parts
is None:
270 proto
, host
, port
, path
= url_parts
272 # check for valid prototype
273 if proto
not in _protocols
:
276 # check for URI fragment
277 if path
.find('#') != -1:
280 # extract wildcard if it is there
281 if host
.find('*', 1) != -1:
282 # wildcard must be at start of domain: *.foo.com, not foo.*.com
285 if host
.startswith('*'):
286 # Starts with star, so must have a dot after it (if a
287 # domain is specified)
288 if len(host
) > 1 and host
[1] != '.':
296 # we have a valid trust root
297 tr
= cls(trust_root
, proto
, wilcard
, host
, port
, path
)
301 parse
= classmethod(parse
)
303 def checkSanity(cls
, trust_root_string
):
306 is this a sane trust root?
308 trust_root
= cls
.parse(trust_root_string
)
309 if trust_root
is None:
312 return trust_root
.isSane()
314 checkSanity
= classmethod(checkSanity
)
316 def checkURL(cls
, trust_root
, url
):
317 """quick func for validating a url against a trust root. See the
318 TrustRoot class if you need more control."""
319 tr
= cls
.parse(trust_root
)
320 return tr
is not None and tr
.validateURL(url
)
322 checkURL
= classmethod(checkURL
)
324 def buildDiscoveryURL(self
):
325 """Return a discovery URL for this realm.
327 This function does not check to make sure that the realm is
328 valid. Its behaviour on invalid inputs is undefined.
332 @returns: The URL upon which relying party discovery should be run
333 in order to verify the return_to URL
338 # Use "www." in place of the star
339 assert self
.host
.startswith('.'), self
.host
340 www_domain
= 'www' + self
.host
341 return '%s://%s%s' % (self
.proto
, www_domain
, self
.path
)
346 return "TrustRoot('%s', '%s', '%s', '%s', '%s', '%s')" % (
347 self
.unparsed
, self
.proto
, self
.wildcard
, self
.host
, self
.port
,
353 # The URI for relying party discovery, used in realm verification.
355 # XXX: This should probably live somewhere else (like in
356 # openid.consumer or openid.yadis somewhere)
357 RP_RETURN_TO_URL_TYPE
= 'http://specs.openid.net/auth/2.0/return_to'
359 def _extractReturnURL(endpoint
):
360 """If the endpoint is a relying party OpenID return_to endpoint,
361 return the endpoint URL. Otherwise, return None.
363 This function is intended to be used as a filter for the Yadis
366 @see: C{L{openid.yadis.services}}
367 @see: C{L{openid.yadis.filters}}
369 @param endpoint: An XRDS BasicServiceEndpoint, as returned by
370 performing Yadis dicovery.
372 @returns: The endpoint URL or None if the endpoint is not a
373 relying party endpoint.
374 @rtype: str or NoneType
376 if endpoint
.matchTypes([RP_RETURN_TO_URL_TYPE
]):
381 def returnToMatches(allowed_return_to_urls
, return_to
):
382 """Is the return_to URL under one of the supplied allowed
388 for allowed_return_to
in allowed_return_to_urls
:
389 # A return_to pattern works the same as a realm, except that
390 # it's not allowed to use a wildcard. We'll model this by
391 # parsing it as a realm, and not trying to match it if it has
394 return_realm
= TrustRoot
.parse(allowed_return_to
)
395 if (# Parses as a trust root
396 return_realm
is not None and
398 # Does not have a wildcard
399 not return_realm
.wildcard
and
401 # Matches the return_to that we passed in with it
402 return_realm
.validateURL(return_to
)
406 # No URL in the list matched
409 def getAllowedReturnURLs(relying_party_url
):
410 """Given a relying party discovery URL return a list of return_to URLs.
414 (rp_url_after_redirects
, return_to_urls
) = services
.getServiceEndpoints(
415 relying_party_url
, _extractReturnURL
)
417 if rp_url_after_redirects
!= relying_party_url
:
418 # Verification caused a redirect
419 raise RealmVerificationRedirected(
420 relying_party_url
, rp_url_after_redirects
)
422 return return_to_urls
424 # _vrfy parameter is there to make testing easier
425 def verifyReturnTo(realm_str
, return_to
, _vrfy
=getAllowedReturnURLs
):
426 """Verify that a return_to URL is valid for the given realm.
428 This function builds a discovery URL, performs Yadis discovery on
429 it, makes sure that the URL does not redirect, parses out the
430 return_to URLs, and finally checks to see if the current return_to
431 URL matches the return_to.
433 @raises DiscoveryFailure: When Yadis discovery fails
434 @returns: True if the return_to URL is valid for the realm
438 realm
= TrustRoot
.parse(realm_str
)
440 # The realm does not parse as a URL pattern
444 allowable_urls
= _vrfy(realm
.buildDiscoveryURL())
445 except RealmVerificationRedirected
, err
:
446 oidutil
.log(str(err
))
449 if returnToMatches(allowable_urls
, return_to
):
452 oidutil
.log("Failed to validate return_to %r for realm %r, was not "
453 "in %s" % (return_to
, realm_str
, allowable_urls
))