getting file size for all dict files to be downloaded. coming to be 400mb or so.
[worddb.git] / libs / openid / server / trustroot.py
blob42ab4bf7109b3d7d531b3b3ab8c768bdc6d59359
1 # -*- test-case-name: openid.test.test_rpverify -*-
2 """
3 This module contains the C{L{TrustRoot}} class, which helps handle
4 trust root checking. This module is used by the
5 C{L{openid.server.server}} module, but it is also available to server
6 implementers who wish to use it for additional trust root checking.
8 It also implements relying party return_to URL verification, based on
9 the realm.
10 """
12 __all__ = [
13 'TrustRoot',
14 'RP_RETURN_TO_URL_TYPE',
15 'extractReturnToURLs',
16 'returnToMatches',
17 'verifyReturnTo',
20 from openid import oidutil
21 from openid import urinorm
22 from openid.yadis import services
24 from urlparse import urlparse, urlunparse
25 import re
27 ############################################
28 _protocols = ['http', 'https']
29 _top_level_domains = [
30 'ac', 'ad', 'ae', 'aero', 'af', 'ag', 'ai', 'al', 'am', 'an',
31 'ao', 'aq', 'ar', 'arpa', 'as', 'asia', 'at', 'au', 'aw',
32 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh', 'bi',
33 'biz', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw',
34 'by', 'bz', 'ca', 'cat', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci',
35 'ck', 'cl', 'cm', 'cn', 'co', 'com', 'coop', 'cr', 'cu', 'cv',
36 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz', 'ec',
37 'edu', 'ee', 'eg', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk',
38 'fm', 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh',
39 'gi', 'gl', 'gm', 'gn', 'gov', 'gp', 'gq', 'gr', 'gs', 'gt',
40 'gu', 'gw', 'gy', 'hk', 'hm', 'hn', 'hr', 'ht', 'hu', 'id',
41 'ie', 'il', 'im', 'in', 'info', 'int', 'io', 'iq', 'ir', 'is',
42 'it', 'je', 'jm', 'jo', 'jobs', 'jp', 'ke', 'kg', 'kh', 'ki',
43 'km', 'kn', 'kp', 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc',
44 'li', 'lk', 'lr', 'ls', 'lt', 'lu', 'lv', 'ly', 'ma', 'mc',
45 'md', 'me', 'mg', 'mh', 'mil', 'mk', 'ml', 'mm', 'mn', 'mo',
46 'mobi', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'museum', 'mv',
47 'mw', 'mx', 'my', 'mz', 'na', 'name', 'nc', 'ne', 'net', 'nf',
48 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu', 'nz', 'om', 'org',
49 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr',
50 'pro', 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru',
51 'rw', 'sa', 'sb', 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj',
52 'sk', 'sl', 'sm', 'sn', 'so', 'sr', 'st', 'su', 'sv', 'sy',
53 'sz', 'tc', 'td', 'tel', 'tf', 'tg', 'th', 'tj', 'tk', 'tl',
54 'tm', 'tn', 'to', 'tp', 'tr', 'travel', 'tt', 'tv', 'tw',
55 'tz', 'ua', 'ug', 'uk', 'us', 'uy', 'uz', 'va', 'vc', 've',
56 'vg', 'vi', 'vn', 'vu', 'wf', 'ws', 'xn--0zwm56d',
57 'xn--11b5bs3a9aj6g', 'xn--80akhbyknj4f', 'xn--9t4b11yi5a',
58 'xn--deba0ad', 'xn--g6w251d', 'xn--hgbk6aj7f53bba',
59 'xn--hlcj6aya9esc7a', 'xn--jxalpdlp', 'xn--kgbechtv',
60 'xn--zckzah', 'ye', 'yt', 'yu', 'za', 'zm', 'zw']
62 # Build from RFC3986, section 3.2.2. Used to reject hosts with invalid
63 # characters.
64 host_segment_re = re.compile(
65 r"(?:[-a-zA-Z0-9!$&'\(\)\*+,;=._~]|%[a-zA-Z0-9]{2})*$")
67 class RealmVerificationRedirected(Exception):
68 """Attempting to verify this realm resulted in a redirect.
70 @since: 2.1.0
71 """
72 def __init__(self, relying_party_url, rp_url_after_redirects):
73 self.relying_party_url = relying_party_url
74 self.rp_url_after_redirects = rp_url_after_redirects
76 def __str__(self):
77 return ("Attempting to verify %r resulted in "
78 "redirect to %r" %
79 (self.relying_party_url,
80 self.rp_url_after_redirects))
83 def _parseURL(url):
84 try:
85 url = urinorm.urinorm(url)
86 except ValueError:
87 return None
88 proto, netloc, path, params, query, frag = urlparse(url)
89 if not path:
90 # Python <2.4 does not parse URLs with no path properly
91 if not query and '?' in netloc:
92 netloc, query = netloc.split('?', 1)
94 path = '/'
96 path = urlunparse(('', '', path, params, query, frag))
98 if ':' in netloc:
99 try:
100 host, port = netloc.split(':')
101 except ValueError:
102 return None
104 if not re.match(r'\d+$', port):
105 return None
106 else:
107 host = netloc
108 port = ''
110 host = host.lower()
111 if not host_segment_re.match(host):
112 return None
114 return proto, host, port, path
116 class TrustRoot(object):
118 This class represents an OpenID trust root. The C{L{parse}}
119 classmethod accepts a trust root string, producing a
120 C{L{TrustRoot}} object. The method OpenID server implementers
121 would be most likely to use is the C{L{isSane}} method, which
122 checks the trust root for given patterns that indicate that the
123 trust root is too broad or points to a local network resource.
125 @sort: parse, isSane
128 def __init__(self, unparsed, proto, wildcard, host, port, path):
129 self.unparsed = unparsed
130 self.proto = proto
131 self.wildcard = wildcard
132 self.host = host
133 self.port = port
134 self.path = path
136 def isSane(self):
138 This method checks the to see if a trust root represents a
139 reasonable (sane) set of URLs. 'http://*.com/', for example
140 is not a reasonable pattern, as it cannot meaningfully specify
141 the site claiming it. This function attempts to find many
142 related examples, but it can only work via heuristics.
143 Negative responses from this method should be treated as
144 advisory, used only to alert the user to examine the trust
145 root carefully.
148 @return: Whether the trust root is sane
150 @rtype: C{bool}
153 if self.host == 'localhost':
154 return True
156 host_parts = self.host.split('.')
157 if self.wildcard:
158 assert host_parts[0] == '', host_parts
159 del host_parts[0]
161 # If it's an absolute domain name, remove the empty string
162 # from the end.
163 if host_parts and not host_parts[-1]:
164 del host_parts[-1]
166 if not host_parts:
167 return False
169 # Do not allow adjacent dots
170 if '' in host_parts:
171 return False
173 tld = host_parts[-1]
174 if tld not in _top_level_domains:
175 return False
177 if len(host_parts) == 1:
178 return False
180 if self.wildcard:
181 if len(tld) == 2 and len(host_parts[-2]) <= 3:
182 # It's a 2-letter tld with a short second to last segment
183 # so there needs to be more than two segments specified
184 # (e.g. *.co.uk is insane)
185 return len(host_parts) > 2
187 # Passed all tests for insanity.
188 return True
190 def validateURL(self, url):
192 Validates a URL against this trust root.
195 @param url: The URL to check
197 @type url: C{str}
200 @return: Whether the given URL is within this trust root.
202 @rtype: C{bool}
205 url_parts = _parseURL(url)
206 if url_parts is None:
207 return False
209 proto, host, port, path = url_parts
211 if proto != self.proto:
212 return False
214 if port != self.port:
215 return False
217 if '*' in host:
218 return False
220 if not self.wildcard:
221 if host != self.host:
222 return False
223 elif ((not host.endswith(self.host)) and
224 ('.' + host) != self.host):
225 return False
227 if path != self.path:
228 path_len = len(self.path)
229 trust_prefix = self.path[:path_len]
230 url_prefix = path[:path_len]
232 # must be equal up to the length of the path, at least
233 if trust_prefix != url_prefix:
234 return False
236 # These characters must be on the boundary between the end
237 # of the trust root's path and the start of the URL's
238 # path.
239 if '?' in self.path:
240 allowed = '&'
241 else:
242 allowed = '?/'
244 return (self.path[-1] in allowed or
245 path[path_len] in allowed)
247 return True
249 def parse(cls, trust_root):
251 This method creates a C{L{TrustRoot}} instance from the given
252 input, if possible.
255 @param trust_root: This is the trust root to parse into a
256 C{L{TrustRoot}} object.
258 @type trust_root: C{str}
261 @return: A C{L{TrustRoot}} instance if trust_root parses as a
262 trust root, C{None} otherwise.
264 @rtype: C{NoneType} or C{L{TrustRoot}}
266 url_parts = _parseURL(trust_root)
267 if url_parts is None:
268 return None
270 proto, host, port, path = url_parts
272 # check for valid prototype
273 if proto not in _protocols:
274 return None
276 # check for URI fragment
277 if path.find('#') != -1:
278 return None
280 # extract wildcard if it is there
281 if host.find('*', 1) != -1:
282 # wildcard must be at start of domain: *.foo.com, not foo.*.com
283 return None
285 if host.startswith('*'):
286 # Starts with star, so must have a dot after it (if a
287 # domain is specified)
288 if len(host) > 1 and host[1] != '.':
289 return None
291 host = host[1:]
292 wilcard = True
293 else:
294 wilcard = False
296 # we have a valid trust root
297 tr = cls(trust_root, proto, wilcard, host, port, path)
299 return tr
301 parse = classmethod(parse)
303 def checkSanity(cls, trust_root_string):
304 """str -> bool
306 is this a sane trust root?
308 trust_root = cls.parse(trust_root_string)
309 if trust_root is None:
310 return False
311 else:
312 return trust_root.isSane()
314 checkSanity = classmethod(checkSanity)
316 def checkURL(cls, trust_root, url):
317 """quick func for validating a url against a trust root. See the
318 TrustRoot class if you need more control."""
319 tr = cls.parse(trust_root)
320 return tr is not None and tr.validateURL(url)
322 checkURL = classmethod(checkURL)
324 def buildDiscoveryURL(self):
325 """Return a discovery URL for this realm.
327 This function does not check to make sure that the realm is
328 valid. Its behaviour on invalid inputs is undefined.
330 @rtype: str
332 @returns: The URL upon which relying party discovery should be run
333 in order to verify the return_to URL
335 @since: 2.1.0
337 if self.wildcard:
338 # Use "www." in place of the star
339 assert self.host.startswith('.'), self.host
340 www_domain = 'www' + self.host
341 return '%s://%s%s' % (self.proto, www_domain, self.path)
342 else:
343 return self.unparsed
345 def __repr__(self):
346 return "TrustRoot('%s', '%s', '%s', '%s', '%s', '%s')" % (
347 self.unparsed, self.proto, self.wildcard, self.host, self.port,
348 self.path)
350 def __str__(self):
351 return repr(self)
353 # The URI for relying party discovery, used in realm verification.
355 # XXX: This should probably live somewhere else (like in
356 # openid.consumer or openid.yadis somewhere)
357 RP_RETURN_TO_URL_TYPE = 'http://specs.openid.net/auth/2.0/return_to'
359 def _extractReturnURL(endpoint):
360 """If the endpoint is a relying party OpenID return_to endpoint,
361 return the endpoint URL. Otherwise, return None.
363 This function is intended to be used as a filter for the Yadis
364 filtering interface.
366 @see: C{L{openid.yadis.services}}
367 @see: C{L{openid.yadis.filters}}
369 @param endpoint: An XRDS BasicServiceEndpoint, as returned by
370 performing Yadis dicovery.
372 @returns: The endpoint URL or None if the endpoint is not a
373 relying party endpoint.
374 @rtype: str or NoneType
376 if endpoint.matchTypes([RP_RETURN_TO_URL_TYPE]):
377 return endpoint.uri
378 else:
379 return None
381 def returnToMatches(allowed_return_to_urls, return_to):
382 """Is the return_to URL under one of the supplied allowed
383 return_to URLs?
385 @since: 2.1.0
388 for allowed_return_to in allowed_return_to_urls:
389 # A return_to pattern works the same as a realm, except that
390 # it's not allowed to use a wildcard. We'll model this by
391 # parsing it as a realm, and not trying to match it if it has
392 # a wildcard.
394 return_realm = TrustRoot.parse(allowed_return_to)
395 if (# Parses as a trust root
396 return_realm is not None and
398 # Does not have a wildcard
399 not return_realm.wildcard and
401 # Matches the return_to that we passed in with it
402 return_realm.validateURL(return_to)
404 return True
406 # No URL in the list matched
407 return False
409 def getAllowedReturnURLs(relying_party_url):
410 """Given a relying party discovery URL return a list of return_to URLs.
412 @since: 2.1.0
414 (rp_url_after_redirects, return_to_urls) = services.getServiceEndpoints(
415 relying_party_url, _extractReturnURL)
417 if rp_url_after_redirects != relying_party_url:
418 # Verification caused a redirect
419 raise RealmVerificationRedirected(
420 relying_party_url, rp_url_after_redirects)
422 return return_to_urls
424 # _vrfy parameter is there to make testing easier
425 def verifyReturnTo(realm_str, return_to, _vrfy=getAllowedReturnURLs):
426 """Verify that a return_to URL is valid for the given realm.
428 This function builds a discovery URL, performs Yadis discovery on
429 it, makes sure that the URL does not redirect, parses out the
430 return_to URLs, and finally checks to see if the current return_to
431 URL matches the return_to.
433 @raises DiscoveryFailure: When Yadis discovery fails
434 @returns: True if the return_to URL is valid for the realm
436 @since: 2.1.0
438 realm = TrustRoot.parse(realm_str)
439 if realm is None:
440 # The realm does not parse as a URL pattern
441 return False
443 try:
444 allowable_urls = _vrfy(realm.buildDiscoveryURL())
445 except RealmVerificationRedirected, err:
446 oidutil.log(str(err))
447 return False
449 if returnToMatches(allowable_urls, return_to):
450 return True
451 else:
452 oidutil.log("Failed to validate return_to %r for realm %r, was not "
453 "in %s" % (return_to, realm_str, allowable_urls))
454 return False