1 """Open an arbitrary URL.
3 See the following document for more info on URLs:
4 "Names and Addresses, URIs, URLs, URNs, URCs", at
5 http://www.w3.org/pub/WWW/Addressing/Overview.html
7 See also the HTTP spec (from which the error codes are derived):
8 "HTTP - Hypertext Transfer Protocol", at
9 http://www.w3.org/pub/WWW/Protocols/
11 Related standards and specs:
12 - RFC1808: the "relative URL" spec. (authoritative status)
13 - RFC1738 - the "URL standard". (authoritative status)
14 - RFC1630 - the "URI spec". (informational status)
16 The object returned by URLopener().open(file) will differ per
17 protocol. All you know is that is has methods read(), readline(),
18 readlines(), fileno(), close() and info(). The read*(), fileno()
19 and close() methods work like those of open files.
20 The info() method returns a mimetools.Message object which can be
21 used to query various info about the object, if available.
22 (mimetools.Message objects are queried with the getheader() method.)
31 __version__
= '1.12' # XXX This version is not always updated :-(
33 MAXFTPCACHE
= 10 # Trim the ftp cache beyond this size
35 # Helper for non-unix systems
37 from macurl2path
import url2pathname
, pathname2url
39 from nturl2path
import url2pathname
, pathname2url
41 def url2pathname(pathname
):
42 return unquote(pathname
)
43 def pathname2url(pathname
):
44 return quote(pathname
)
46 # This really consists of two pieces:
47 # (1) a class which handles opening of all sorts of URLs
48 # (plus assorted utilities etc.)
49 # (2) a set of functions for parsing URLs
50 # XXX Should these be separated out into different modules?
53 # Shortcut for basic usage
55 def urlopen(url
, data
=None):
58 _urlopener
= FancyURLopener()
60 return _urlopener
.open(url
)
62 return _urlopener
.open(url
, data
)
63 def urlretrieve(url
, filename
=None, reporthook
=None):
66 _urlopener
= FancyURLopener()
67 return _urlopener
.retrieve(url
, filename
, reporthook
)
75 """Class to open URLs.
76 This is a class rather than just a subroutine because we may need
77 more than one set of global protocol-specific options.
78 Note -- this is a base class for those who don't want the
79 automatic handling of errors type 302 (relocated) and 401
80 (authorization needed)."""
85 def __init__(self
, proxies
=None, **x509
):
87 proxies
= getproxies()
88 assert hasattr(proxies
, 'has_key'), "proxies must be a mapping"
89 self
.proxies
= proxies
90 self
.key_file
= x509
.get('key_file')
91 self
.cert_file
= x509
.get('cert_file')
92 server_version
= "Python-urllib/%s" % __version__
93 self
.addheaders
= [('User-agent', server_version
)]
95 self
.__unlink
= os
.unlink
# See cleanup()
97 # Undocumented feature: if you assign {} to tempcache,
98 # it is used to cache files retrieved with
99 # self.retrieve(). This is not enabled by default
100 # since it does not work for changing documents (and I
101 # haven't got the logic to check expiration headers
103 self
.ftpcache
= ftpcache
104 # Undocumented feature: you can use a different
105 # ftp cache by assigning to the .ftpcache member;
106 # in case you want logically independent URL openers
107 # XXX This is not threadsafe. Bah.
116 # This code sometimes runs when the rest of this module
117 # has already been deleted, so it can't use any globals
118 # or import anything.
120 for file in self
.__tempfiles
:
125 del self
.__tempfiles
[:]
127 self
.tempcache
.clear()
129 def addheader(self
, *args
):
130 """Add a header to be used by the HTTP interface only
131 e.g. u.addheader('Accept', 'sound/basic')"""
132 self
.addheaders
.append(args
)
135 def open(self
, fullurl
, data
=None):
136 """Use URLopener().open(file) instead of open(file, 'r')."""
137 fullurl
= unwrap(fullurl
)
138 if self
.tempcache
and self
.tempcache
.has_key(fullurl
):
139 filename
, headers
= self
.tempcache
[fullurl
]
140 fp
= open(filename
, 'rb')
141 return addinfourl(fp
, headers
, fullurl
)
142 type, url
= splittype(fullurl
)
143 if not type: type = 'file'
144 if self
.proxies
.has_key(type):
145 proxy
= self
.proxies
[type]
146 type, proxy
= splittype(proxy
)
147 host
, selector
= splithost(proxy
)
148 url
= (host
, fullurl
) # Signal special case to open_*()
149 name
= 'open_' + type
153 name
= string
.join(string
.split(name
, '-'), '_')
154 if not hasattr(self
, name
):
156 return self
.open_unknown(fullurl
)
158 return self
.open_unknown(fullurl
, data
)
161 return getattr(self
, name
)(url
)
163 return getattr(self
, name
)(url
, data
)
164 except socket
.error
, msg
:
165 raise IOError, ('socket error', msg
), sys
.exc_info()[2]
167 def open_unknown(self
, fullurl
, data
=None):
168 """Overridable interface to open unknown URL type."""
169 type, url
= splittype(fullurl
)
170 raise IOError, ('url error', 'unknown url type', type)
173 def retrieve(self
, url
, filename
=None, reporthook
=None):
174 """retrieve(url) returns (filename, None) for a local object
175 or (tempfilename, headers) for a remote object."""
177 if self
.tempcache
and self
.tempcache
.has_key(url
):
178 return self
.tempcache
[url
]
179 type, url1
= splittype(url
)
180 if not filename
and (not type or type == 'file'):
182 fp
= self
.open_local_file(url1
)
185 return url2pathname(splithost(url1
)[1]), hdrs
192 garbage
, path
= splittype(url
)
193 garbage
, path
= splithost(path
or "")
194 path
, garbage
= splitquery(path
or "")
195 path
, garbage
= splitattr(path
or "")
196 suffix
= os
.path
.splitext(path
)[1]
197 filename
= tempfile
.mktemp(suffix
)
198 self
.__tempfiles
.append(filename
)
199 result
= filename
, headers
200 if self
.tempcache
is not None:
201 self
.tempcache
[url
] = result
202 tfp
= open(filename
, 'wb')
207 if headers
.has_key("content-length"):
208 size
= int(headers
["Content-Length"])
209 reporthook(0, bs
, size
)
212 reporthook(1, bs
, size
)
216 blocknum
= blocknum
+ 1
218 reporthook(blocknum
, bs
, size
)
225 # Each method named open_<type> knows how to open that type of URL
227 def open_http(self
, url
, data
=None):
228 """Use HTTP protocol."""
231 if type(url
) is type(""):
232 host
, selector
= splithost(url
)
234 user_passwd
, host
= splituser(host
)
239 urltype
, rest
= splittype(selector
)
242 if string
.lower(urltype
) != 'http':
245 realhost
, rest
= splithost(rest
)
247 user_passwd
, realhost
= splituser(realhost
)
249 selector
= "%s://%s%s" % (urltype
, realhost
, rest
)
250 #print "proxy via http:", host, selector
251 if not host
: raise IOError, ('http error', 'no host given')
254 auth
= string
.strip(base64
.encodestring(user_passwd
))
257 h
= httplib
.HTTP(host
)
259 h
.putrequest('POST', selector
)
260 h
.putheader('Content-type', 'application/x-www-form-urlencoded')
261 h
.putheader('Content-length', '%d' % len(data
))
263 h
.putrequest('GET', selector
)
264 if auth
: h
.putheader('Authorization', 'Basic %s' % auth
)
265 if realhost
: h
.putheader('Host', realhost
)
266 for args
in self
.addheaders
: apply(h
.putheader
, args
)
269 h
.send(data
+ '\r\n')
270 errcode
, errmsg
, headers
= h
.getreply()
273 return addinfourl(fp
, headers
, "http:" + url
)
276 return self
.http_error(url
, fp
, errcode
, errmsg
, headers
)
278 return self
.http_error(url
, fp
, errcode
, errmsg
, headers
, data
)
280 def http_error(self
, url
, fp
, errcode
, errmsg
, headers
, data
=None):
281 """Handle http errors.
282 Derived class can override this, or provide specific handlers
283 named http_error_DDD where DDD is the 3-digit error code."""
284 # First check if there's a specific handler for this error
285 name
= 'http_error_%d' % errcode
286 if hasattr(self
, name
):
287 method
= getattr(self
, name
)
289 result
= method(url
, fp
, errcode
, errmsg
, headers
)
291 result
= method(url
, fp
, errcode
, errmsg
, headers
, data
)
292 if result
: return result
293 return self
.http_error_default(url
, fp
, errcode
, errmsg
, headers
)
295 def http_error_default(self
, url
, fp
, errcode
, errmsg
, headers
):
296 """Default error handler: close the connection and raise IOError."""
299 raise IOError, ('http error', errcode
, errmsg
, headers
)
301 if hasattr(socket
, "ssl"):
302 def open_https(self
, url
):
303 """Use HTTPS protocol."""
305 if type(url
) is type(""):
306 host
, selector
= splithost(url
)
307 user_passwd
, host
= splituser(host
)
310 urltype
, rest
= splittype(selector
)
311 if string
.lower(urltype
) == 'https':
312 realhost
, rest
= splithost(rest
)
313 user_passwd
, realhost
= splituser(realhost
)
315 selector
= "%s://%s%s" % (urltype
, realhost
, rest
)
316 print "proxy via https:", host
, selector
317 if not host
: raise IOError, ('https error', 'no host given')
320 auth
= string
.strip(base64
.encodestring(user_passwd
))
323 h
= httplib
.HTTPS(host
, 0,
324 key_file
=self
.key_file
,
325 cert_file
=self
.cert_file
)
326 h
.putrequest('GET', selector
)
327 if auth
: h
.putheader('Authorization: Basic %s' % auth
)
328 for args
in self
.addheaders
: apply(h
.putheader
, args
)
330 errcode
, errmsg
, headers
= h
.getreply()
333 return addinfourl(fp
, headers
, url
)
335 return self
.http_error(url
, fp
, errcode
, errmsg
, headers
)
337 def open_gopher(self
, url
):
338 """Use Gopher protocol."""
340 host
, selector
= splithost(url
)
341 if not host
: raise IOError, ('gopher error', 'no host given')
343 type, selector
= splitgophertype(selector
)
344 selector
, query
= splitquery(selector
)
345 selector
= unquote(selector
)
347 query
= unquote(query
)
348 fp
= gopherlib
.send_query(selector
, query
, host
)
350 fp
= gopherlib
.send_selector(selector
, host
)
351 return addinfourl(fp
, noheaders(), "gopher:" + url
)
353 def open_file(self
, url
):
354 """Use local file or FTP depending on form of URL."""
355 if url
[:2] == '//' and url
[2:3] != '/':
356 return self
.open_ftp(url
)
358 return self
.open_local_file(url
)
360 def open_local_file(self
, url
):
361 """Use local file."""
362 import mimetypes
, mimetools
, StringIO
363 mtype
= mimetypes
.guess_type(url
)[0]
364 headers
= mimetools
.Message(StringIO
.StringIO(
365 'Content-Type: %s\n' % (mtype
or 'text/plain')))
366 host
, file = splithost(url
)
370 urlfile
= 'file://' + file
371 return addinfourl(open(url2pathname(file), 'rb'),
373 host
, port
= splitport(host
)
375 and socket
.gethostbyname(host
) in (localhost(), thishost()):
378 urlfile
= 'file://' + file
379 return addinfourl(open(url2pathname(file), 'rb'),
381 raise IOError, ('local file error', 'not on local host')
383 def open_ftp(self
, url
):
384 """Use FTP protocol."""
385 host
, path
= splithost(url
)
386 if not host
: raise IOError, ('ftp error', 'no host given')
387 host
, port
= splitport(host
)
388 user
, host
= splituser(host
)
389 if user
: user
, passwd
= splitpasswd(user
)
392 user
= unquote(user
or '')
393 passwd
= unquote(passwd
or '')
394 host
= socket
.gethostbyname(host
)
397 port
= ftplib
.FTP_PORT
400 path
, attrs
= splitattr(path
)
402 dirs
= string
.splitfields(path
, '/')
403 dirs
, file = dirs
[:-1], dirs
[-1]
404 if dirs
and not dirs
[0]: dirs
= dirs
[1:]
405 if dirs
and not dirs
[0]: dirs
[0] = '/'
406 key
= (user
, host
, port
, string
.joinfields(dirs
, '/'))
408 if len(self
.ftpcache
) > MAXFTPCACHE
:
409 # Prune the cache, rather arbitrarily
410 for k
in self
.ftpcache
.keys():
416 if not self
.ftpcache
.has_key(key
):
417 self
.ftpcache
[key
] = \
418 ftpwrapper(user
, passwd
, host
, port
, dirs
)
419 if not file: type = 'D'
422 attr
, value
= splitvalue(attr
)
423 if string
.lower(attr
) == 'type' and \
424 value
in ('a', 'A', 'i', 'I', 'd', 'D'):
425 type = string
.upper(value
)
426 (fp
, retrlen
) = self
.ftpcache
[key
].retrfile(file, type)
427 if retrlen
is not None and retrlen
>= 0:
428 import mimetools
, StringIO
429 headers
= mimetools
.Message(StringIO
.StringIO(
430 'Content-Length: %d\n' % retrlen
))
432 headers
= noheaders()
433 return addinfourl(fp
, headers
, "ftp:" + url
)
434 except ftperrors(), msg
:
435 raise IOError, ('ftp error', msg
), sys
.exc_info()[2]
437 def open_data(self
, url
, data
=None):
438 """Use "data" URL."""
441 # syntax of data URLs:
442 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
443 # mediatype := [ type "/" subtype ] *( ";" parameter )
445 # parameter := attribute "=" value
446 import StringIO
, mimetools
, time
448 [type, data
] = string
.split(url
, ',', 1)
450 raise IOError, ('data error', 'bad data URL')
452 type = 'text/plain;charset=US-ASCII'
453 semi
= string
.rfind(type, ';')
454 if semi
>= 0 and '=' not in type[semi
:]:
455 encoding
= type[semi
+1:]
460 msg
.append('Date: %s'%time
.strftime('%a, %d %b %Y %T GMT',
461 time
.gmtime(time
.time())))
462 msg
.append('Content-type: %s' % type)
463 if encoding
== 'base64':
465 data
= base64
.decodestring(data
)
468 msg
.append('Content-length: %d' % len(data
))
471 msg
= string
.join(msg
, '\n')
472 f
= StringIO
.StringIO(msg
)
473 headers
= mimetools
.Message(f
, 0)
474 f
.fileno
= None # needed for addinfourl
475 return addinfourl(f
, headers
, url
)
478 class FancyURLopener(URLopener
):
479 """Derived class with handlers for errors we can handle (perhaps)."""
481 def __init__(self
, *args
):
482 apply(URLopener
.__init
__, (self
,) + args
)
485 def http_error_default(self
, url
, fp
, errcode
, errmsg
, headers
):
486 """Default error handling -- don't raise an exception."""
487 return addinfourl(fp
, headers
, "http:" + url
)
489 def http_error_302(self
, url
, fp
, errcode
, errmsg
, headers
, data
=None):
490 """Error 302 -- relocated (temporarily)."""
491 # XXX The server can force infinite recursion here!
492 if headers
.has_key('location'):
493 newurl
= headers
['location']
494 elif headers
.has_key('uri'):
495 newurl
= headers
['uri']
500 # In case the server sent a relative URL, join with original:
501 newurl
= basejoin("http:" + url
, newurl
)
503 return self
.open(newurl
)
505 return self
.open(newurl
, data
)
507 def http_error_301(self
, url
, fp
, errcode
, errmsg
, headers
, data
=None):
508 """Error 301 -- also relocated (permanently)."""
509 return self
.http_error_302(url
, fp
, errcode
, errmsg
, headers
, data
)
511 def http_error_401(self
, url
, fp
, errcode
, errmsg
, headers
, data
=None):
512 """Error 401 -- authentication required.
513 See this URL for a description of the basic authentication scheme:
514 http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
515 if headers
.has_key('www-authenticate'):
516 stuff
= headers
['www-authenticate']
518 match
= re
.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff
)
520 scheme
, realm
= match
.groups()
521 if string
.lower(scheme
) == 'basic':
522 name
= 'retry_' + self
.type + '_basic_auth'
524 return getattr(self
,name
)(url
, realm
)
526 return getattr(self
,name
)(url
, realm
, data
)
528 def retry_http_basic_auth(self
, url
, realm
, data
=None):
529 host
, selector
= splithost(url
)
530 i
= string
.find(host
, '@') + 1
532 user
, passwd
= self
.get_user_passwd(host
, realm
, i
)
533 if not (user
or passwd
): return None
534 host
= user
+ ':' + passwd
+ '@' + host
535 newurl
= 'http://' + host
+ selector
537 return self
.open(newurl
)
539 return self
.open(newurl
, data
)
541 def retry_https_basic_auth(self
, url
, realm
, data
=None):
542 host
, selector
= splithost(url
)
543 i
= string
.find(host
, '@') + 1
545 user
, passwd
= self
.get_user_passwd(host
, realm
, i
)
546 if not (user
or passwd
): return None
547 host
= user
+ ':' + passwd
+ '@' + host
548 newurl
= '//' + host
+ selector
549 return self
.open_https(newurl
)
551 def get_user_passwd(self
, host
, realm
, clear_cache
= 0):
552 key
= realm
+ '@' + string
.lower(host
)
553 if self
.auth_cache
.has_key(key
):
555 del self
.auth_cache
[key
]
557 return self
.auth_cache
[key
]
558 user
, passwd
= self
.prompt_user_passwd(host
, realm
)
559 if user
or passwd
: self
.auth_cache
[key
] = (user
, passwd
)
562 def prompt_user_passwd(self
, host
, realm
):
563 """Override this in a GUI environment!"""
566 user
= raw_input("Enter username for %s at %s: " % (realm
,
568 passwd
= getpass
.getpass("Enter password for %s in %s at %s: " %
571 except KeyboardInterrupt:
580 """Return the IP address of the magic hostname 'localhost'."""
583 _localhost
= socket
.gethostbyname('localhost')
588 """Return the IP address of the current host."""
591 _thishost
= socket
.gethostbyname(socket
.gethostname())
596 """Return the set of errors raised by the FTP class."""
600 _ftperrors
= ftplib
.all_errors
605 """Return an empty mimetools.Message object."""
610 _noheaders
= mimetools
.Message(StringIO
.StringIO(), 0)
611 _noheaders
.fp
.close() # Recycle file descriptor
618 """Class used by open_ftp() for cache of open FTP connections."""
620 def __init__(self
, user
, passwd
, host
, port
, dirs
):
631 self
.ftp
= ftplib
.FTP()
632 self
.ftp
.connect(self
.host
, self
.port
)
633 self
.ftp
.login(self
.user
, self
.passwd
)
634 for dir in self
.dirs
:
637 def retrfile(self
, file, type):
640 if type in ('d', 'D'): cmd
= 'TYPE A'; isdir
= 1
641 else: cmd
= 'TYPE ' + type; isdir
= 0
643 self
.ftp
.voidcmd(cmd
)
644 except ftplib
.all_errors
:
646 self
.ftp
.voidcmd(cmd
)
648 if file and not isdir
:
649 # Use nlst to see if the file exists at all
652 except ftplib
.error_perm
, reason
:
653 raise IOError, ('ftp error', reason
), sys
.exc_info()[2]
654 # Restore the transfer mode!
655 self
.ftp
.voidcmd(cmd
)
656 # Try to retrieve as a file
659 conn
= self
.ftp
.ntransfercmd(cmd
)
660 except ftplib
.error_perm
, reason
:
661 if reason
[:3] != '550':
662 raise IOError, ('ftp error', reason
), sys
.exc_info()[2]
664 # Set transfer mode to ASCII!
665 self
.ftp
.voidcmd('TYPE A')
666 # Try a directory listing
667 if file: cmd
= 'LIST ' + file
669 conn
= self
.ftp
.ntransfercmd(cmd
)
671 # Pass back both a suitably decorated object and a retrieval length
672 return (addclosehook(conn
[0].makefile('rb'),
673 self
.endtransfer
), conn
[1])
674 def endtransfer(self
):
691 """Base class for addinfo and addclosehook."""
693 def __init__(self
, fp
):
695 self
.read
= self
.fp
.read
696 self
.readline
= self
.fp
.readline
697 if hasattr(self
.fp
, "readlines"): self
.readlines
= self
.fp
.readlines
698 if hasattr(self
.fp
, "fileno"): self
.fileno
= self
.fp
.fileno
701 return '<%s at %s whose fp = %s>' % (self
.__class
__.__name
__,
702 `
id(self
)`
, `self
.fp`
)
707 self
.readlines
= None
709 if self
.fp
: self
.fp
.close()
712 class addclosehook(addbase
):
713 """Class to add a close hook to an open file."""
715 def __init__(self
, fp
, closehook
, *hookargs
):
716 addbase
.__init
__(self
, fp
)
717 self
.closehook
= closehook
718 self
.hookargs
= hookargs
722 apply(self
.closehook
, self
.hookargs
)
723 self
.closehook
= None
727 class addinfo(addbase
):
728 """class to add an info() method to an open file."""
730 def __init__(self
, fp
, headers
):
731 addbase
.__init
__(self
, fp
)
732 self
.headers
= headers
737 class addinfourl(addbase
):
738 """class to add info() and geturl() methods to an open file."""
740 def __init__(self
, fp
, headers
, url
):
741 addbase
.__init
__(self
, fp
)
742 self
.headers
= headers
752 def basejoin(base
, url
):
753 """Utility to combine a URL with a base URL to form a new URL."""
754 type, path
= splittype(url
)
756 # if url is complete (i.e., it contains a type), return it
758 host
, path
= splithost(path
)
759 type, basepath
= splittype(base
) # inherit type from base
761 # if url contains host, just inherit type
762 if type: return type + '://' + host
+ path
764 # no type inherited, so url must have started with //
767 host
, basepath
= splithost(basepath
) # inherit host
768 basepath
, basetag
= splittag(basepath
) # remove extraneuous cruft
769 basepath
, basequery
= splitquery(basepath
) # idem
771 # non-absolute path name
772 if path
[:1] in ('#', '?'):
773 # path is just a tag or query, attach to basepath
776 # else replace last component
777 i
= string
.rfind(basepath
, '/')
779 # basepath not absolute
781 # host present, make absolute
784 # else keep non-absolute
787 # remove last file component
788 basepath
= basepath
[:i
+1]
789 # Interpret ../ (important because of symlinks)
790 while basepath
and path
[:3] == '../':
792 i
= string
.rfind(basepath
[:-1], '/')
794 basepath
= basepath
[:i
+1]
801 path
= basepath
+ path
802 if type and host
: return type + '://' + host
+ path
803 elif type: return type + ':' + path
804 elif host
: return '//' + host
+ path
# don't know what this means
808 # Utilities to parse URLs (most of these return None for missing parts):
809 # unwrap('<URL:type://host/path>') --> 'type://host/path'
810 # splittype('type:opaquestring') --> 'type', 'opaquestring'
811 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
812 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
813 # splitpasswd('user:passwd') -> 'user', 'passwd'
814 # splitport('host:port') --> 'host', 'port'
815 # splitquery('/path?query') --> '/path', 'query'
816 # splittag('/path#tag') --> '/path', 'tag'
817 # splitattr('/path;attr1=value1;attr2=value2;...') ->
818 # '/path', ['attr1=value1', 'attr2=value2', ...]
819 # splitvalue('attr=value') --> 'attr', 'value'
820 # splitgophertype('/Xselector') --> 'X', 'selector'
821 # unquote('abc%20def') -> 'abc def'
822 # quote('abc def') -> 'abc%20def')
825 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
826 url
= string
.strip(url
)
827 if url
[:1] == '<' and url
[-1:] == '>':
828 url
= string
.strip(url
[1:-1])
829 if url
[:4] == 'URL:': url
= string
.strip(url
[4:])
834 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
836 if _typeprog
is None:
838 _typeprog
= re
.compile('^([^/:]+):')
840 match
= _typeprog
.match(url
)
842 scheme
= match
.group(1)
843 return scheme
, url
[len(scheme
) + 1:]
848 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
850 if _hostprog
is None:
852 _hostprog
= re
.compile('^//([^/]*)(.*)$')
854 match
= _hostprog
.match(url
)
855 if match
: return match
.group(1, 2)
860 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
862 if _userprog
is None:
864 _userprog
= re
.compile('^([^@]*)@(.*)$')
866 match
= _userprog
.match(host
)
867 if match
: return match
.group(1, 2)
871 def splitpasswd(user
):
872 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
874 if _passwdprog
is None:
876 _passwdprog
= re
.compile('^([^:]*):(.*)$')
878 match
= _passwdprog
.match(user
)
879 if match
: return match
.group(1, 2)
882 # splittag('/path#tag') --> '/path', 'tag'
885 """splitport('host:port') --> 'host', 'port'."""
887 if _portprog
is None:
889 _portprog
= re
.compile('^(.*):([0-9]+)$')
891 match
= _portprog
.match(host
)
892 if match
: return match
.group(1, 2)
896 def splitnport(host
, defport
=-1):
897 """Split host and port, returning numeric port.
898 Return given default port if no ':' found; defaults to -1.
899 Return numerical port if a valid number are found after ':'.
900 Return None if ':' but not a valid number."""
902 if _nportprog
is None:
904 _nportprog
= re
.compile('^(.*):(.*)$')
906 match
= _nportprog
.match(host
)
908 host
, port
= match
.group(1, 2)
910 if not port
: raise string
.atoi_error
, "no digits"
911 nport
= string
.atoi(port
)
912 except string
.atoi_error
:
919 """splitquery('/path?query') --> '/path', 'query'."""
921 if _queryprog
is None:
923 _queryprog
= re
.compile('^(.*)\?([^?]*)$')
925 match
= _queryprog
.match(url
)
926 if match
: return match
.group(1, 2)
931 """splittag('/path#tag') --> '/path', 'tag'."""
935 _tagprog
= re
.compile('^(.*)#([^#]*)$')
937 match
= _tagprog
.match(url
)
938 if match
: return match
.group(1, 2)
942 """splitattr('/path;attr1=value1;attr2=value2;...') ->
943 '/path', ['attr1=value1', 'attr2=value2', ...]."""
944 words
= string
.splitfields(url
, ';')
945 return words
[0], words
[1:]
948 def splitvalue(attr
):
949 """splitvalue('attr=value') --> 'attr', 'value'."""
951 if _valueprog
is None:
953 _valueprog
= re
.compile('^([^=]*)=(.*)$')
955 match
= _valueprog
.match(attr
)
956 if match
: return match
.group(1, 2)
959 def splitgophertype(selector
):
960 """splitgophertype('/Xselector') --> 'X', 'selector'."""
961 if selector
[:1] == '/' and selector
[1:2]:
962 return selector
[1], selector
[2:]
963 return None, selector
966 """unquote('abc%20def') -> 'abc def'."""
969 list = string
.split(s
, '%')
971 myappend
= res
.append
976 myappend(mychr(myatoi(item
[:2], 16))
982 return string
.join(res
, "")
986 # replace '+' with ' '
987 s
= string
.join(string
.split(s
, '+'), ' ')
990 always_safe
= string
.letters
+ string
.digits
+ '_,.-'
991 def quote(s
, safe
= '/'):
992 """quote('abc def') -> 'abc%20def')."""
993 # XXX Can speed this up an order of magnitude
994 safe
= always_safe
+ safe
996 for i
in range(len(res
)):
999 res
[i
] = '%%%02x' % ord(c
)
1000 return string
.joinfields(res
, '')
1002 def quote_plus(s
, safe
= '/'):
1003 # XXX Can speed this up an order of magnitude
1005 # replace ' ' with '+'
1006 l
= string
.split(s
, ' ')
1007 for i
in range(len(l
)):
1008 l
[i
] = quote(l
[i
], safe
)
1009 return string
.join(l
, '+')
1011 return quote(s
, safe
)
1013 def urlencode(dict):
1014 """Encode a dictionary of form entries into a URL query string."""
1016 for k
, v
in dict.items():
1017 k
= quote_plus(str(k
))
1018 v
= quote_plus(str(v
))
1019 l
.append(k
+ '=' + v
)
1020 return string
.join(l
, '&')
1024 if os
.name
== 'mac':
1026 """Return a dictionary of scheme -> proxy server URL mappings.
1028 By convention the mac uses Internet Config to store
1029 proxies. An HTTP proxy, for instance, is stored under
1044 if config
.has_key('UseHTTPProxy') and config
['UseHTTPProxy']:
1046 value
= config
['HTTPProxyHost']
1050 proxies
['http'] = 'http://%s' % value
1051 # FTP: XXXX To be done.
1052 # Gopher: XXXX To be done.
1057 """Return a dictionary of scheme -> proxy server URL mappings.
1059 Scan the environment for variables named <scheme>_proxy;
1060 this seems to be the standard convention. If you need a
1061 different way, you can pass a proxies dictionary to the
1062 [Fancy]URLopener constructor.
1066 for name
, value
in os
.environ
.items():
1067 name
= string
.lower(name
)
1068 if value
and name
[-6:] == '_proxy':
1069 proxies
[name
[:-6]] = value
1073 # Test and time quote() and unquote()
1077 for i
in range(256): s
= s
+ chr(i
)
1088 print round(t1
- t0
, 3), 'sec'
1091 def reporthook(blocknum
, blocksize
, totalsize
):
1092 # Report during remote transfers
1093 print "Block number: %d, Block size: %d, Total size: %d" % (blocknum
, blocksize
, totalsize
)
1101 'file://localhost/etc/passwd',
1102 'ftp://ftp.python.org/etc/passwd',
1103 ## 'gopher://gopher.micro.umn.edu/1/',
1104 'http://www.python.org/index.html',
1106 if hasattr(URLopener
, "open_https"):
1107 args
.append('https://synergy.as.cmu.edu/~geek/')
1110 print '-'*10, url
, '-'*10
1111 fn
, h
= urlretrieve(url
, None, reporthook
)
1115 for k
in h
.keys(): print k
+ ':', h
[k
]
1121 table
= string
.maketrans("", "")
1122 data
= string
.translate(data
, table
, "\r")
1132 opts
, args
= getopt
.getopt(sys
.argv
[1:], "th")
1133 except getopt
.error
, msg
:
1135 print "Use -h for help"
1142 print "Usage: python urllib.py [-t] [url ...]"
1143 print "-t runs self-test;",
1144 print "otherwise, contents of urls are printed"
1152 print "Use -h for help"
1154 print urlopen(url
).read(),
1156 # Run test program when run as a script
1157 if __name__
== '__main__':