getting file size for all dict files to be downloaded. coming to be 400mb or so.
[worddb.git] / libs / openid / yadis / xri.py
blob3a39a6b8a71ea9f0030ca4b653589f9124e97365
1 # -*- test-case-name: openid.test.test_xri -*-
2 """Utility functions for handling XRIs.
4 @see: XRI Syntax v2.0 at the U{OASIS XRI Technical Committee<http://www.oasis-open.org/committees/tc_home.php?wg_abbrev=xri>}
5 """
7 import re
9 XRI_AUTHORITIES = ['!', '=', '@', '+', '$', '(']
11 try:
12 unichr(0x10000)
13 except ValueError:
14 # narrow python build
15 UCSCHAR = [
16 (0xA0, 0xD7FF),
17 (0xF900, 0xFDCF),
18 (0xFDF0, 0xFFEF),
21 IPRIVATE = [
22 (0xE000, 0xF8FF),
24 else:
25 UCSCHAR = [
26 (0xA0, 0xD7FF),
27 (0xF900, 0xFDCF),
28 (0xFDF0, 0xFFEF),
29 (0x10000, 0x1FFFD),
30 (0x20000, 0x2FFFD),
31 (0x30000, 0x3FFFD),
32 (0x40000, 0x4FFFD),
33 (0x50000, 0x5FFFD),
34 (0x60000, 0x6FFFD),
35 (0x70000, 0x7FFFD),
36 (0x80000, 0x8FFFD),
37 (0x90000, 0x9FFFD),
38 (0xA0000, 0xAFFFD),
39 (0xB0000, 0xBFFFD),
40 (0xC0000, 0xCFFFD),
41 (0xD0000, 0xDFFFD),
42 (0xE1000, 0xEFFFD),
45 IPRIVATE = [
46 (0xE000, 0xF8FF),
47 (0xF0000, 0xFFFFD),
48 (0x100000, 0x10FFFD),
52 _escapeme_re = re.compile('[%s]' % (''.join(
53 map(lambda (m, n): u'%s-%s' % (unichr(m), unichr(n)),
54 UCSCHAR + IPRIVATE)),))
57 def identifierScheme(identifier):
58 """Determine if this identifier is an XRI or URI.
60 @returns: C{"XRI"} or C{"URI"}
61 """
62 if identifier.startswith('xri://') or (
63 identifier and identifier[0] in XRI_AUTHORITIES):
64 return "XRI"
65 else:
66 return "URI"
69 def toIRINormal(xri):
70 """Transform an XRI to IRI-normal form."""
71 if not xri.startswith('xri://'):
72 xri = 'xri://' + xri
73 return escapeForIRI(xri)
76 _xref_re = re.compile('\((.*?)\)')
79 def _escape_xref(xref_match):
80 """Escape things that need to be escaped if they're in a cross-reference.
81 """
82 xref = xref_match.group()
83 xref = xref.replace('/', '%2F')
84 xref = xref.replace('?', '%3F')
85 xref = xref.replace('#', '%23')
86 return xref
89 def escapeForIRI(xri):
90 """Escape things that need to be escaped when transforming to an IRI."""
91 xri = xri.replace('%', '%25')
92 xri = _xref_re.sub(_escape_xref, xri)
93 return xri
96 def toURINormal(xri):
97 """Transform an XRI to URI normal form."""
98 return iriToURI(toIRINormal(xri))
101 def _percentEscapeUnicode(char_match):
102 c = char_match.group()
103 return ''.join(['%%%X' % (ord(octet),) for octet in c.encode('utf-8')])
106 def iriToURI(iri):
107 """Transform an IRI to a URI by escaping unicode."""
108 # According to RFC 3987, section 3.1, "Mapping of IRIs to URIs"
109 return _escapeme_re.sub(_percentEscapeUnicode, iri)
112 def providerIsAuthoritative(providerID, canonicalID):
113 """Is this provider ID authoritative for this XRI?
115 @returntype: bool
117 # XXX: can't use rsplit until we require python >= 2.4.
118 lastbang = canonicalID.rindex('!')
119 parent = canonicalID[:lastbang]
120 return parent == providerID
123 def rootAuthority(xri):
124 """Return the root authority for an XRI.
126 Example::
128 rootAuthority("xri://@example") == "xri://@"
130 @type xri: unicode
131 @returntype: unicode
133 if xri.startswith('xri://'):
134 xri = xri[6:]
135 authority = xri.split('/', 1)[0]
136 if authority[0] == '(':
137 # Cross-reference.
138 # XXX: This is incorrect if someone nests cross-references so there
139 # is another close-paren in there. Hopefully nobody does that
140 # before we have a real xriparse function. Hopefully nobody does
141 # that *ever*.
142 root = authority[:authority.index(')') + 1]
143 elif authority[0] in XRI_AUTHORITIES:
144 # Other XRI reference.
145 root = authority[0]
146 else:
147 # IRI reference. XXX: Can IRI authorities have segments?
148 segments = authority.split('!')
149 segments = reduce(list.__add__,
150 map(lambda s: s.split('*'), segments))
151 root = segments[0]
153 return XRI(root)
156 def XRI(xri):
157 """An XRI object allowing comparison of XRI.
159 Ideally, this would do full normalization and provide comparsion
160 operators as per XRI Syntax. Right now, it just does a bit of
161 canonicalization by ensuring the xri scheme is present.
163 @param xri: an xri string
164 @type xri: unicode
166 if not xri.startswith('xri://'):
167 xri = 'xri://' + xri
168 return xri