3 # from appendix B of rfc 3986 (http://www.ietf.org/rfc/rfc3986.txt)
4 uri_pattern
= r
'^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
5 uri_re
= re
.compile(uri_pattern
)
8 authority_pattern
= r
'^([^@]*@)?([^:]*)(:.*)?'
9 authority_re
= re
.compile(authority_pattern
)
12 pct_encoded_pattern
= r
'%([0-9A-Fa-f]{2})'
13 pct_encoded_re
= re
.compile(pct_encoded_pattern
)
56 _unreserved
= [False] * 256
57 for _
in range(ord('A'), ord('Z') + 1): _unreserved
[_
] = True
58 for _
in range(ord('0'), ord('9') + 1): _unreserved
[_
] = True
59 for _
in range(ord('a'), ord('z') + 1): _unreserved
[_
] = True
60 _unreserved
[ord('-')] = True
61 _unreserved
[ord('.')] = True
62 _unreserved
[ord('_')] = True
63 _unreserved
[ord('~')] = True
66 _escapeme_re
= re
.compile('[%s]' % (''.join(
67 map(lambda (m
, n
): u
'%s-%s' % (unichr(m
), unichr(n
)),
68 UCSCHAR
+ IPRIVATE
)),))
71 def _pct_escape_unicode(char_match
):
72 c
= char_match
.group()
73 return ''.join(['%%%X' % (ord(octet
),) for octet
in c
.encode('utf-8')])
76 def _pct_encoded_replace_unreserved(mo
):
78 i
= int(mo
.group(1), 16)
82 return mo
.group().upper()
88 def _pct_encoded_replace(mo
):
90 return chr(int(mo
.group(1), 16))
95 def remove_dot_segments(path
):
99 if path
.startswith('../'):
101 elif path
.startswith('./'):
103 elif path
.startswith('/./'):
107 elif path
.startswith('/../'):
110 result_segments
.pop()
114 result_segments
.pop()
115 elif path
== '..' or path
== '.':
121 i
= path
.find('/', i
)
124 result_segments
.append(path
[:i
])
127 return ''.join(result_segments
)
131 if isinstance(uri
, unicode):
132 uri
= _escapeme_re
.sub(_pct_escape_unicode
, uri
).encode('ascii')
134 uri_mo
= uri_re
.match(uri
)
136 scheme
= uri_mo
.group(2)
138 raise ValueError('No scheme specified')
140 scheme
= scheme
.lower()
141 if scheme
not in ('http', 'https'):
142 raise ValueError('Not an absolute HTTP or HTTPS URI: %r' % (uri
,))
144 authority
= uri_mo
.group(4)
145 if authority
is None:
146 raise ValueError('Not an absolute URI: %r' % (uri
,))
148 authority_mo
= authority_re
.match(authority
)
149 if authority_mo
is None:
150 raise ValueError('URI does not have a valid authority: %r' % (uri
,))
152 userinfo
, host
, port
= authority_mo
.groups()
159 host
= pct_encoded_re
.sub(_pct_encoded_replace
, host
)
160 host
= unicode(host
, 'utf-8').encode('idna')
166 (scheme
== 'http' and port
== ':80') or
167 (scheme
== 'https' and port
== ':443')):
172 authority
= userinfo
+ host
+ port
174 path
= uri_mo
.group(5)
175 path
= pct_encoded_re
.sub(_pct_encoded_replace_unreserved
, path
)
176 path
= remove_dot_segments(path
)
180 query
= uri_mo
.group(6)
184 fragment
= uri_mo
.group(8)
188 return scheme
+ '://' + authority
+ path
+ query
+ fragment