Make the maintenance page a hardcoded response
[Melange.git] / app / python25src / urllib.py
blob12be1c9c1bfc4dbda1c7bf16937e81baf2c2e515
1 """Open an arbitrary URL.
3 See the following document for more info on URLs:
4 "Names and Addresses, URIs, URLs, URNs, URCs", at
5 http://www.w3.org/pub/WWW/Addressing/Overview.html
7 See also the HTTP spec (from which the error codes are derived):
8 "HTTP - Hypertext Transfer Protocol", at
9 http://www.w3.org/pub/WWW/Protocols/
11 Related standards and specs:
12 - RFC1808: the "relative URL" spec. (authoritative status)
13 - RFC1738 - the "URL standard". (authoritative status)
14 - RFC1630 - the "URI spec". (informational status)
16 All code but that related to URL parsing has been removed (since it is not
17 compatible with Google App Engine)from this fork of the original file,
18 obtained from:
19 http://svn.python.org/view/*checkout*/python/tags/r252/Lib/urllib.py?content-type=text%2Fplain&rev=60915
20 """
22 import string
23 import sys
24 from urlparse import urljoin as basejoin
26 __all__ = ["quote", "quote_plus", "unquote", "unquote_plus",
27 "urlencode", "splittag",
28 "basejoin", "unwrap",
29 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
30 "splitnport", "splitquery", "splitattr", "splitvalue",
31 "splitgophertype",]
33 __version__ = '1.17' # XXX This version is not always updated :-(
36 # Utilities to parse URLs (most of these return None for missing parts):
37 # unwrap('<URL:type://host/path>') --> 'type://host/path'
38 # splittype('type:opaquestring') --> 'type', 'opaquestring'
39 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
40 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
41 # splitpasswd('user:passwd') -> 'user', 'passwd'
42 # splitport('host:port') --> 'host', 'port'
43 # splitquery('/path?query') --> '/path', 'query'
44 # splittag('/path#tag') --> '/path', 'tag'
45 # splitattr('/path;attr1=value1;attr2=value2;...') ->
46 # '/path', ['attr1=value1', 'attr2=value2', ...]
47 # splitvalue('attr=value') --> 'attr', 'value'
48 # splitgophertype('/Xselector') --> 'X', 'selector'
49 # unquote('abc%20def') -> 'abc def'
50 # quote('abc def') -> 'abc%20def')
52 try:
53 unicode
54 except NameError:
55 def _is_unicode(x):
56 return 0
57 else:
58 def _is_unicode(x):
59 return isinstance(x, unicode)
61 def toBytes(url):
62 """toBytes(u"URL") --> 'URL'."""
63 # Most URL schemes require ASCII. If that changes, the conversion
64 # can be relaxed
65 if _is_unicode(url):
66 try:
67 url = url.encode("ASCII")
68 except UnicodeError:
69 raise UnicodeError("URL " + repr(url) +
70 " contains non-ASCII characters")
71 return url
73 def unwrap(url):
74 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
75 url = url.strip()
76 if url[:1] == '<' and url[-1:] == '>':
77 url = url[1:-1].strip()
78 if url[:4] == 'URL:': url = url[4:].strip()
79 return url
81 _typeprog = None
82 def splittype(url):
83 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
84 global _typeprog
85 if _typeprog is None:
86 import re
87 _typeprog = re.compile('^([^/:]+):')
89 match = _typeprog.match(url)
90 if match:
91 scheme = match.group(1)
92 return scheme.lower(), url[len(scheme) + 1:]
93 return None, url
95 _hostprog = None
96 def splithost(url):
97 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
98 global _hostprog
99 if _hostprog is None:
100 import re
101 _hostprog = re.compile('^//([^/?]*)(.*)$')
103 match = _hostprog.match(url)
104 if match: return match.group(1, 2)
105 return None, url
107 _userprog = None
108 def splituser(host):
109 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
110 global _userprog
111 if _userprog is None:
112 import re
113 _userprog = re.compile('^(.*)@(.*)$')
115 match = _userprog.match(host)
116 if match: return map(unquote, match.group(1, 2))
117 return None, host
119 _passwdprog = None
120 def splitpasswd(user):
121 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
122 global _passwdprog
123 if _passwdprog is None:
124 import re
125 _passwdprog = re.compile('^([^:]*):(.*)$')
127 match = _passwdprog.match(user)
128 if match: return match.group(1, 2)
129 return user, None
131 # splittag('/path#tag') --> '/path', 'tag'
132 _portprog = None
133 def splitport(host):
134 """splitport('host:port') --> 'host', 'port'."""
135 global _portprog
136 if _portprog is None:
137 import re
138 _portprog = re.compile('^(.*):([0-9]+)$')
140 match = _portprog.match(host)
141 if match: return match.group(1, 2)
142 return host, None
144 _nportprog = None
145 def splitnport(host, defport=-1):
146 """Split host and port, returning numeric port.
147 Return given default port if no ':' found; defaults to -1.
148 Return numerical port if a valid number are found after ':'.
149 Return None if ':' but not a valid number."""
150 global _nportprog
151 if _nportprog is None:
152 import re
153 _nportprog = re.compile('^(.*):(.*)$')
155 match = _nportprog.match(host)
156 if match:
157 host, port = match.group(1, 2)
158 try:
159 if not port: raise ValueError, "no digits"
160 nport = int(port)
161 except ValueError:
162 nport = None
163 return host, nport
164 return host, defport
166 _queryprog = None
167 def splitquery(url):
168 """splitquery('/path?query') --> '/path', 'query'."""
169 global _queryprog
170 if _queryprog is None:
171 import re
172 _queryprog = re.compile('^(.*)\?([^?]*)$')
174 match = _queryprog.match(url)
175 if match: return match.group(1, 2)
176 return url, None
178 _tagprog = None
179 def splittag(url):
180 """splittag('/path#tag') --> '/path', 'tag'."""
181 global _tagprog
182 if _tagprog is None:
183 import re
184 _tagprog = re.compile('^(.*)#([^#]*)$')
186 match = _tagprog.match(url)
187 if match: return match.group(1, 2)
188 return url, None
190 def splitattr(url):
191 """splitattr('/path;attr1=value1;attr2=value2;...') ->
192 '/path', ['attr1=value1', 'attr2=value2', ...]."""
193 words = url.split(';')
194 return words[0], words[1:]
196 _valueprog = None
197 def splitvalue(attr):
198 """splitvalue('attr=value') --> 'attr', 'value'."""
199 global _valueprog
200 if _valueprog is None:
201 import re
202 _valueprog = re.compile('^([^=]*)=(.*)$')
204 match = _valueprog.match(attr)
205 if match: return match.group(1, 2)
206 return attr, None
208 def splitgophertype(selector):
209 """splitgophertype('/Xselector') --> 'X', 'selector'."""
210 if selector[:1] == '/' and selector[1:2]:
211 return selector[1], selector[2:]
212 return None, selector
214 _hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
215 _hextochr.update(('%02X' % i, chr(i)) for i in range(256))
217 def unquote(s):
218 """unquote('abc%20def') -> 'abc def'."""
219 res = s.split('%')
220 for i in xrange(1, len(res)):
221 item = res[i]
222 try:
223 res[i] = _hextochr[item[:2]] + item[2:]
224 except KeyError:
225 res[i] = '%' + item
226 except UnicodeDecodeError:
227 res[i] = unichr(int(item[:2], 16)) + item[2:]
228 return "".join(res)
230 def unquote_plus(s):
231 """unquote('%7e/abc+def') -> '~/abc def'"""
232 s = s.replace('+', ' ')
233 return unquote(s)
235 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
236 'abcdefghijklmnopqrstuvwxyz'
237 '0123456789' '_.-')
238 _safemaps = {}
240 def quote(s, safe = '/'):
241 """quote('abc def') -> 'abc%20def'
243 Each part of a URL, e.g. the path info, the query, etc., has a
244 different set of reserved characters that must be quoted.
246 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
247 the following reserved characters.
249 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
250 "$" | ","
252 Each of these characters is reserved in some component of a URL,
253 but not necessarily in all of them.
255 By default, the quote function is intended for quoting the path
256 section of a URL. Thus, it will not encode '/'. This character
257 is reserved, but in typical usage the quote function is being
258 called on a path where the existing slash characters are used as
259 reserved characters.
261 cachekey = (safe, always_safe)
262 try:
263 safe_map = _safemaps[cachekey]
264 except KeyError:
265 safe += always_safe
266 safe_map = {}
267 for i in range(256):
268 c = chr(i)
269 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
270 _safemaps[cachekey] = safe_map
271 res = map(safe_map.__getitem__, s)
272 return ''.join(res)
274 def quote_plus(s, safe = ''):
275 """Quote the query fragment of a URL; replacing ' ' with '+'"""
276 if ' ' in s:
277 s = quote(s, safe + ' ')
278 return s.replace(' ', '+')
279 return quote(s, safe)
281 def urlencode(query,doseq=0):
282 """Encode a sequence of two-element tuples or dictionary into a URL query string.
284 If any values in the query arg are sequences and doseq is true, each
285 sequence element is converted to a separate parameter.
287 If the query arg is a sequence of two-element tuples, the order of the
288 parameters in the output will match the order of parameters in the
289 input.
292 if hasattr(query,"items"):
293 # mapping objects
294 query = query.items()
295 else:
296 # it's a bother at times that strings and string-like objects are
297 # sequences...
298 try:
299 # non-sequence items should not work with len()
300 # non-empty strings will fail this
301 if len(query) and not isinstance(query[0], tuple):
302 raise TypeError
303 # zero-length sequences of all types will get here and succeed,
304 # but that's a minor nit - since the original implementation
305 # allowed empty dicts that type of behavior probably should be
306 # preserved for consistency
307 except TypeError:
308 ty,va,tb = sys.exc_info()
309 raise TypeError, "not a valid non-string sequence or mapping object", tb
311 l = []
312 if not doseq:
313 # preserve old behavior
314 for k, v in query:
315 k = quote_plus(str(k))
316 v = quote_plus(str(v))
317 l.append(k + '=' + v)
318 else:
319 for k, v in query:
320 k = quote_plus(str(k))
321 if isinstance(v, str):
322 v = quote_plus(v)
323 l.append(k + '=' + v)
324 elif _is_unicode(v):
325 # is there a reasonable way to convert to ASCII?
326 # encode generates a string, but "replace" or "ignore"
327 # lose information and "strict" can raise UnicodeError
328 v = quote_plus(v.encode("ASCII","replace"))
329 l.append(k + '=' + v)
330 else:
331 try:
332 # is this a sufficient test for sequence-ness?
333 x = len(v)
334 except TypeError:
335 # not a sequence
336 v = quote_plus(str(v))
337 l.append(k + '=' + v)
338 else:
339 # loop over the sequence
340 for elt in v:
341 l.append(k + '=' + quote_plus(str(elt)))
342 return '&'.join(l)