Lib/urllib.py

   1 """Open an arbitrary URL.
   2
   3 See the following document for more info on URLs:
   4 "Names and Addresses, URIs, URLs, URNs, URCs", at
   5 http://www.w3.org/pub/WWW/Addressing/Overview.html
   6
   7 See also the HTTP spec (from which the error codes are derived):
   8 "HTTP - Hypertext Transfer Protocol", at
   9 http://www.w3.org/pub/WWW/Protocols/
  10
  11 Related standards and specs:
  12 - RFC1808: the "relative URL" spec. (authoritative status)
  13 - RFC1738 - the "URL standard". (authoritative status)
  14 - RFC1630 - the "URI spec". (informational status)
  15
  16 The object returned by URLopener().open(file) will differ per
  17 protocol.  All you know is that is has methods read(), readline(),
  18 readlines(), fileno(), close() and info().  The read*(), fileno()
  19 and close() methods work like those of open files.
  20 The info() method returns a mimetools.Message object which can be
  21 used to query various info about the object, if available.
  22 (mimetools.Message objects are queried with the getheader() method.)
  23 """
  24
  25 import string
  26 import socket
  27 import os
  28 import stat
  29 import time
  30 import sys
  31 import types
  32
  33 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
  34            "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
  35            "urlencode", "url2pathname", "pathname2url", "splittag",
  36            "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
  37            "splittype", "splithost", "splituser", "splitpasswd", "splitport",
  38            "splitnport", "splitquery", "splitattr", "splitvalue",
  39            "splitgophertype", "getproxies"]
  40
  41 __version__ = '1.15'    # XXX This version is not always updated :-(
  42
  43 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
  44
  45 # Helper for non-unix systems
  46 if os.name == 'mac':
  47     from macurl2path import url2pathname, pathname2url
  48 elif os.name == 'nt':
  49     from nturl2path import url2pathname, pathname2url
  50 elif os.name == 'riscos':
  51     from rourl2path import url2pathname, pathname2url
  52 else:
  53     def url2pathname(pathname):
  54         return unquote(pathname)
  55     def pathname2url(pathname):
  56         return quote(pathname)
  57
  58 # This really consists of two pieces:
  59 # (1) a class which handles opening of all sorts of URLs
  60 #     (plus assorted utilities etc.)
  61 # (2) a set of functions for parsing URLs
  62 # XXX Should these be separated out into different modules?
  63
  64
  65 # Shortcut for basic usage
  66 _urlopener = None
  67 def urlopen(url, data=None):
  68     """urlopen(url [, data]) -> open file-like object"""
  69     global _urlopener
  70     if not _urlopener:
  71         _urlopener = FancyURLopener()
  72     if data is None:
  73         return _urlopener.open(url)
  74     else:
  75         return _urlopener.open(url, data)
  76 def urlretrieve(url, filename=None, reporthook=None, data=None):
  77     global _urlopener
  78     if not _urlopener:
  79         _urlopener = FancyURLopener()
  80     return _urlopener.retrieve(url, filename, reporthook, data)
  81 def urlcleanup():
  82     if _urlopener:
  83         _urlopener.cleanup()
  84
  85
  86 ftpcache = {}
  87 class URLopener:
  88     """Class to open URLs.
  89     This is a class rather than just a subroutine because we may need
  90     more than one set of global protocol-specific options.
  91     Note -- this is a base class for those who don't want the
  92     automatic handling of errors type 302 (relocated) and 401
  93     (authorization needed)."""
  94
  95     __tempfiles = None
  96
  97     version = "Python-urllib/%s" % __version__
  98
  99     # Constructor
 100     def __init__(self, proxies=None, **x509):
 101         if proxies is None:
 102             proxies = getproxies()
 103         assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
 104         self.proxies = proxies
 105         self.key_file = x509.get('key_file')
 106         self.cert_file = x509.get('cert_file')
 107         self.addheaders = [('User-agent', self.version)]
 108         self.__tempfiles = []
 109         self.__unlink = os.unlink # See cleanup()
 110         self.tempcache = None
 111         # Undocumented feature: if you assign {} to tempcache,
 112         # it is used to cache files retrieved with
 113         # self.retrieve().  This is not enabled by default
 114         # since it does not work for changing documents (and I
 115         # haven't got the logic to check expiration headers
 116         # yet).
 117         self.ftpcache = ftpcache
 118         # Undocumented feature: you can use a different
 119         # ftp cache by assigning to the .ftpcache member;
 120         # in case you want logically independent URL openers
 121         # XXX This is not threadsafe.  Bah.
 122
 123     def __del__(self):
 124         self.close()
 125
 126     def close(self):
 127         self.cleanup()
 128
 129     def cleanup(self):
 130         # This code sometimes runs when the rest of this module
 131         # has already been deleted, so it can't use any globals
 132         # or import anything.
 133         if self.__tempfiles:
 134             for file in self.__tempfiles:
 135                 try:
 136                     self.__unlink(file)
 137                 except OSError:
 138                     pass
 139             del self.__tempfiles[:]
 140         if self.tempcache:
 141             self.tempcache.clear()
 142
 143     def addheader(self, *args):
 144         """Add a header to be used by the HTTP interface only
 145         e.g. u.addheader('Accept', 'sound/basic')"""
 146         self.addheaders.append(args)
 147
 148     # External interface
 149     def open(self, fullurl, data=None):
 150         """Use URLopener().open(file) instead of open(file, 'r')."""
 151         fullurl = unwrap(toBytes(fullurl))
 152         if self.tempcache and self.tempcache.has_key(fullurl):
 153             filename, headers = self.tempcache[fullurl]
 154             fp = open(filename, 'rb')
 155             return addinfourl(fp, headers, fullurl)
 156         urltype, url = splittype(fullurl)
 157         if not urltype:
 158             urltype = 'file'
 159         if self.proxies.has_key(urltype):
 160             proxy = self.proxies[urltype]
 161             urltype, proxyhost = splittype(proxy)
 162             host, selector = splithost(proxyhost)
 163             url = (host, fullurl) # Signal special case to open_*()
 164         else:
 165             proxy = None
 166         name = 'open_' + urltype
 167         self.type = urltype
 168         if '-' in name:
 169             # replace - with _
 170             name = '_'.join(name.split('-'))
 171         if not hasattr(self, name):
 172             if proxy:
 173                 return self.open_unknown_proxy(proxy, fullurl, data)
 174             else:
 175                 return self.open_unknown(fullurl, data)
 176         try:
 177             if data is None:
 178                 return getattr(self, name)(url)
 179             else:
 180                 return getattr(self, name)(url, data)
 181         except socket.error, msg:
 182             raise IOError, ('socket error', msg), sys.exc_info()[2]
 183
 184     def open_unknown(self, fullurl, data=None):
 185         """Overridable interface to open unknown URL type."""
 186         type, url = splittype(fullurl)
 187         raise IOError, ('url error', 'unknown url type', type)
 188
 189     def open_unknown_proxy(self, proxy, fullurl, data=None):
 190         """Overridable interface to open unknown URL type."""
 191         type, url = splittype(fullurl)
 192         raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
 193
 194     # External interface
 195     def retrieve(self, url, filename=None, reporthook=None, data=None):
 196         """retrieve(url) returns (filename, None) for a local object
 197         or (tempfilename, headers) for a remote object."""
 198         url = unwrap(toBytes(url))
 199         if self.tempcache and self.tempcache.has_key(url):
 200             return self.tempcache[url]
 201         type, url1 = splittype(url)
 202         if not filename and (not type or type == 'file'):
 203             try:
 204                 fp = self.open_local_file(url1)
 205                 hdrs = fp.info()
 206                 del fp
 207                 return url2pathname(splithost(url1)[1]), hdrs
 208             except IOError, msg:
 209                 pass
 210         fp = self.open(url, data)
 211         headers = fp.info()
 212         if not filename:
 213             import tempfile
 214             garbage, path = splittype(url)
 215             garbage, path = splithost(path or "")
 216             path, garbage = splitquery(path or "")
 217             path, garbage = splitattr(path or "")
 218             suffix = os.path.splitext(path)[1]
 219             filename = tempfile.mktemp(suffix)
 220             self.__tempfiles.append(filename)
 221         result = filename, headers
 222         if self.tempcache is not None:
 223             self.tempcache[url] = result
 224         tfp = open(filename, 'wb')
 225         bs = 1024*8
 226         size = -1
 227         blocknum = 1
 228         if reporthook:
 229             if headers.has_key("content-length"):
 230                 size = int(headers["Content-Length"])
 231             reporthook(0, bs, size)
 232         block = fp.read(bs)
 233         if reporthook:
 234             reporthook(1, bs, size)
 235         while block:
 236             tfp.write(block)
 237             block = fp.read(bs)
 238             blocknum = blocknum + 1
 239             if reporthook:
 240                 reporthook(blocknum, bs, size)
 241         fp.close()
 242         tfp.close()
 243         del fp
 244         del tfp
 245         return result
 246
 247     # Each method named open_<type> knows how to open that type of URL
 248
 249     def open_http(self, url, data=None):
 250         """Use HTTP protocol."""
 251         import httplib
 252         user_passwd = None
 253         if type(url) is types.StringType:
 254             host, selector = splithost(url)
 255             if host:
 256                 user_passwd, host = splituser(host)
 257                 host = unquote(host)
 258             realhost = host
 259         else:
 260             host, selector = url
 261             urltype, rest = splittype(selector)
 262             url = rest
 263             user_passwd = None
 264             if urltype.lower() != 'http':
 265                 realhost = None
 266             else:
 267                 realhost, rest = splithost(rest)
 268                 if realhost:
 269                     user_passwd, realhost = splituser(realhost)
 270                 if user_passwd:
 271                     selector = "%s://%s%s" % (urltype, realhost, rest)
 272                 if proxy_bypass(realhost):
 273                     host = realhost
 274
 275             #print "proxy via http:", host, selector
 276         if not host: raise IOError, ('http error', 'no host given')
 277         if user_passwd:
 278             import base64
 279             auth = base64.encodestring(user_passwd).strip()
 280         else:
 281             auth = None
 282         h = httplib.HTTP(host)
 283         if data is not None:
 284             h.putrequest('POST', selector)
 285             h.putheader('Content-type', 'application/x-www-form-urlencoded')
 286             h.putheader('Content-length', '%d' % len(data))
 287         else:
 288             h.putrequest('GET', selector)
 289         if auth: h.putheader('Authorization', 'Basic %s' % auth)
 290         if realhost: h.putheader('Host', realhost)
 291         for args in self.addheaders: apply(h.putheader, args)
 292         h.endheaders()
 293         if data is not None:
 294             h.send(data)
 295         errcode, errmsg, headers = h.getreply()
 296         fp = h.getfile()
 297         if errcode == 200:
 298             return addinfourl(fp, headers, "http:" + url)
 299         else:
 300             if data is None:
 301                 return self.http_error(url, fp, errcode, errmsg, headers)
 302             else:
 303                 return self.http_error(url, fp, errcode, errmsg, headers, data)
 304
 305     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
 306         """Handle http errors.
 307         Derived class can override this, or provide specific handlers
 308         named http_error_DDD where DDD is the 3-digit error code."""
 309         # First check if there's a specific handler for this error
 310         name = 'http_error_%d' % errcode
 311         if hasattr(self, name):
 312             method = getattr(self, name)
 313             if data is None:
 314                 result = method(url, fp, errcode, errmsg, headers)
 315             else:
 316                 result = method(url, fp, errcode, errmsg, headers, data)
 317             if result: return result
 318         return self.http_error_default(url, fp, errcode, errmsg, headers)
 319
 320     def http_error_default(self, url, fp, errcode, errmsg, headers):
 321         """Default error handler: close the connection and raise IOError."""
 322         void = fp.read()
 323         fp.close()
 324         raise IOError, ('http error', errcode, errmsg, headers)
 325
 326     if hasattr(socket, "ssl"):
 327         def open_https(self, url, data=None):
 328             """Use HTTPS protocol."""
 329             import httplib
 330             user_passwd = None
 331             if type(url) is types.StringType:
 332                 host, selector = splithost(url)
 333                 if host:
 334                     user_passwd, host = splituser(host)
 335                     host = unquote(host)
 336                 realhost = host
 337             else:
 338                 host, selector = url
 339                 urltype, rest = splittype(selector)
 340                 url = rest
 341                 user_passwd = None
 342                 if urltype.lower() != 'https':
 343                     realhost = None
 344                 else:
 345                     realhost, rest = splithost(rest)
 346                     if realhost:
 347                         user_passwd, realhost = splituser(realhost)
 348                     if user_passwd:
 349                         selector = "%s://%s%s" % (urltype, realhost, rest)
 350                 #print "proxy via https:", host, selector
 351             if not host: raise IOError, ('https error', 'no host given')
 352             if user_passwd:
 353                 import base64
 354                 auth = base64.encodestring(user_passwd).strip()
 355             else:
 356                 auth = None
 357             h = httplib.HTTPS(host, 0,
 358                               key_file=self.key_file,
 359                               cert_file=self.cert_file)
 360             if data is not None:
 361                 h.putrequest('POST', selector)
 362                 h.putheader('Content-type',
 363                             'application/x-www-form-urlencoded')
 364                 h.putheader('Content-length', '%d' % len(data))
 365             else:
 366                 h.putrequest('GET', selector)
 367             if auth: h.putheader('Authorization: Basic %s' % auth)
 368             if realhost: h.putheader('Host', realhost)
 369             for args in self.addheaders: apply(h.putheader, args)
 370             h.endheaders()
 371             if data is not None:
 372                 h.send(data)
 373             errcode, errmsg, headers = h.getreply()
 374             fp = h.getfile()
 375             if errcode == 200:
 376                 return addinfourl(fp, headers, "https:" + url)
 377             else:
 378                 if data is None:
 379                     return self.http_error(url, fp, errcode, errmsg, headers)
 380                 else:
 381                     return self.http_error(url, fp, errcode, errmsg, headers,
 382                                            data)
 383
 384     def open_gopher(self, url):
 385         """Use Gopher protocol."""
 386         import gopherlib
 387         host, selector = splithost(url)
 388         if not host: raise IOError, ('gopher error', 'no host given')
 389         host = unquote(host)
 390         type, selector = splitgophertype(selector)
 391         selector, query = splitquery(selector)
 392         selector = unquote(selector)
 393         if query:
 394             query = unquote(query)
 395             fp = gopherlib.send_query(selector, query, host)
 396         else:
 397             fp = gopherlib.send_selector(selector, host)
 398         return addinfourl(fp, noheaders(), "gopher:" + url)
 399
 400     def open_file(self, url):
 401         """Use local file or FTP depending on form of URL."""
 402         if url[:2] == '//' and url[2:3] != '/':
 403             return self.open_ftp(url)
 404         else:
 405             return self.open_local_file(url)
 406
 407     def open_local_file(self, url):
 408         """Use local file."""
 409         import mimetypes, mimetools, rfc822, StringIO
 410         host, file = splithost(url)
 411         localname = url2pathname(file)
 412         stats = os.stat(localname)
 413         size = stats[stat.ST_SIZE]
 414         modified = rfc822.formatdate(stats[stat.ST_MTIME])
 415         mtype = mimetypes.guess_type(url)[0]
 416         headers = mimetools.Message(StringIO.StringIO(
 417             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
 418             (mtype or 'text/plain', size, modified)))
 419         if not host:
 420             urlfile = file
 421             if file[:1] == '/':
 422                 urlfile = 'file://' + file
 423             return addinfourl(open(localname, 'rb'),
 424                               headers, urlfile)
 425         host, port = splitport(host)
 426         if not port \
 427            and socket.gethostbyname(host) in (localhost(), thishost()):
 428             urlfile = file
 429             if file[:1] == '/':
 430                 urlfile = 'file://' + file
 431             return addinfourl(open(localname, 'rb'),
 432                               headers, urlfile)
 433         raise IOError, ('local file error', 'not on local host')
 434
 435     def open_ftp(self, url):
 436         """Use FTP protocol."""
 437         import mimetypes, mimetools, StringIO
 438         host, path = splithost(url)
 439         if not host: raise IOError, ('ftp error', 'no host given')
 440         host, port = splitport(host)
 441         user, host = splituser(host)
 442         if user: user, passwd = splitpasswd(user)
 443         else: passwd = None
 444         host = unquote(host)
 445         user = unquote(user or '')
 446         passwd = unquote(passwd or '')
 447         host = socket.gethostbyname(host)
 448         if not port:
 449             import ftplib
 450             port = ftplib.FTP_PORT
 451         else:
 452             port = int(port)
 453         path, attrs = splitattr(path)
 454         path = unquote(path)
 455         dirs = path.split('/')
 456         dirs, file = dirs[:-1], dirs[-1]
 457         if dirs and not dirs[0]: dirs = dirs[1:]
 458         if dirs and not dirs[0]: dirs[0] = '/'
 459         key = user, host, port, '/'.join(dirs)
 460         # XXX thread unsafe!
 461         if len(self.ftpcache) > MAXFTPCACHE:
 462             # Prune the cache, rather arbitrarily
 463             for k in self.ftpcache.keys():
 464                 if k != key:
 465                     v = self.ftpcache[k]
 466                     del self.ftpcache[k]
 467                     v.close()
 468         try:
 469             if not self.ftpcache.has_key(key):
 470                 self.ftpcache[key] = \
 471                     ftpwrapper(user, passwd, host, port, dirs)
 472             if not file: type = 'D'
 473             else: type = 'I'
 474             for attr in attrs:
 475                 attr, value = splitvalue(attr)
 476                 if attr.lower() == 'type' and \
 477                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
 478                     type = value.upper()
 479             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
 480             mtype = mimetypes.guess_type("ftp:" + url)[0]
 481             headers = ""
 482             if mtype:
 483                 headers += "Content-Type: %s\n" % mtype
 484             if retrlen is not None and retrlen >= 0:
 485                 headers += "Content-Length: %d\n" % retrlen
 486             headers = mimetools.Message(StringIO.StringIO(headers))
 487             return addinfourl(fp, headers, "ftp:" + url)
 488         except ftperrors(), msg:
 489             raise IOError, ('ftp error', msg), sys.exc_info()[2]
 490
 491     def open_data(self, url, data=None):
 492         """Use "data" URL."""
 493         # ignore POSTed data
 494         #
 495         # syntax of data URLs:
 496         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
 497         # mediatype := [ type "/" subtype ] *( ";" parameter )
 498         # data      := *urlchar
 499         # parameter := attribute "=" value
 500         import StringIO, mimetools, time
 501         try:
 502             [type, data] = url.split(',', 1)
 503         except ValueError:
 504             raise IOError, ('data error', 'bad data URL')
 505         if not type:
 506             type = 'text/plain;charset=US-ASCII'
 507         semi = type.rfind(';')
 508         if semi >= 0 and '=' not in type[semi:]:
 509             encoding = type[semi+1:]
 510             type = type[:semi]
 511         else:
 512             encoding = ''
 513         msg = []
 514         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
 515                                             time.gmtime(time.time())))
 516         msg.append('Content-type: %s' % type)
 517         if encoding == 'base64':
 518             import base64
 519             data = base64.decodestring(data)
 520         else:
 521             data = unquote(data)
 522         msg.append('Content-length: %d' % len(data))
 523         msg.append('')
 524         msg.append(data)
 525         msg = '\n'.join(msg)
 526         f = StringIO.StringIO(msg)
 527         headers = mimetools.Message(f, 0)
 528         f.fileno = None     # needed for addinfourl
 529         return addinfourl(f, headers, url)
 530
 531
 532 class FancyURLopener(URLopener):
 533     """Derived class with handlers for errors we can handle (perhaps)."""
 534
 535     def __init__(self, *args):
 536         apply(URLopener.__init__, (self,) + args)
 537         self.auth_cache = {}
 538         self.tries = 0
 539         self.maxtries = 10
 540
 541     def http_error_default(self, url, fp, errcode, errmsg, headers):
 542         """Default error handling -- don't raise an exception."""
 543         return addinfourl(fp, headers, "http:" + url)
 544
 545     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
 546         """Error 302 -- relocated (temporarily)."""
 547         self.tries += 1
 548         if self.maxtries and self.tries >= self.maxtries:
 549             if hasattr(self, "http_error_500"):
 550                 meth = self.http_error_500
 551             else:
 552                 meth = self.http_error_default
 553             self.tries = 0
 554             return meth(url, fp, 500,
 555                         "Internal Server Error: Redirect Recursion", headers)
 556         result = self.redirect_internal(url, fp, errcode, errmsg, headers,
 557                                         data)
 558         self.tries = 0
 559         return result
 560
 561     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
 562         if headers.has_key('location'):
 563             newurl = headers['location']
 564         elif headers.has_key('uri'):
 565             newurl = headers['uri']
 566         else:
 567             return
 568         void = fp.read()
 569         fp.close()
 570         # In case the server sent a relative URL, join with original:
 571         newurl = basejoin(self.type + ":" + url, newurl)
 572         if data is None:
 573             return self.open(newurl)
 574         else:
 575             return self.open(newurl, data)
 576
 577     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
 578         """Error 301 -- also relocated (permanently)."""
 579         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 580
 581     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
 582         """Error 401 -- authentication required.
 583         See this URL for a description of the basic authentication scheme:
 584         http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
 585         if not headers.has_key('www-authenticate'):
 586             URLopener.http_error_default(self, url, fp,
 587                                          errcode, errmsg, headers)
 588         stuff = headers['www-authenticate']
 589         import re
 590         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 591         if not match:
 592             URLopener.http_error_default(self, url, fp,
 593                                          errcode, errmsg, headers)
 594         scheme, realm = match.groups()
 595         if scheme.lower() != 'basic':
 596             URLopener.http_error_default(self, url, fp,
 597                                          errcode, errmsg, headers)
 598         name = 'retry_' + self.type + '_basic_auth'
 599         if data is None:
 600             return getattr(self,name)(url, realm)
 601         else:
 602             return getattr(self,name)(url, realm, data)
 603
 604     def retry_http_basic_auth(self, url, realm, data=None):
 605         host, selector = splithost(url)
 606         i = host.find('@') + 1
 607         host = host[i:]
 608         user, passwd = self.get_user_passwd(host, realm, i)
 609         if not (user or passwd): return None
 610         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 611         newurl = 'http://' + host + selector
 612         if data is None:
 613             return self.open(newurl)
 614         else:
 615             return self.open(newurl, data)
 616
 617     def retry_https_basic_auth(self, url, realm, data=None):
 618         host, selector = splithost(url)
 619         i = host.find('@') + 1
 620         host = host[i:]
 621         user, passwd = self.get_user_passwd(host, realm, i)
 622         if not (user or passwd): return None
 623         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 624         newurl = '//' + host + selector
 625         return self.open_https(newurl, data)
 626
 627     def get_user_passwd(self, host, realm, clear_cache = 0):
 628         key = realm + '@' + host.lower()
 629         if self.auth_cache.has_key(key):
 630             if clear_cache:
 631                 del self.auth_cache[key]
 632             else:
 633                 return self.auth_cache[key]
 634         user, passwd = self.prompt_user_passwd(host, realm)
 635         if user or passwd: self.auth_cache[key] = (user, passwd)
 636         return user, passwd
 637
 638     def prompt_user_passwd(self, host, realm):
 639         """Override this in a GUI environment!"""
 640         import getpass
 641         try:
 642             user = raw_input("Enter username for %s at %s: " % (realm,
 643                                                                 host))
 644             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
 645                 (user, realm, host))
 646             return user, passwd
 647         except KeyboardInterrupt:
 648             print
 649             return None, None
 650
 651
 652 # Utility functions
 653
 654 _localhost = None
 655 def localhost():
 656     """Return the IP address of the magic hostname 'localhost'."""
 657     global _localhost
 658     if not _localhost:
 659         _localhost = socket.gethostbyname('localhost')
 660     return _localhost
 661
 662 _thishost = None
 663 def thishost():
 664     """Return the IP address of the current host."""
 665     global _thishost
 666     if not _thishost:
 667         _thishost = socket.gethostbyname(socket.gethostname())
 668     return _thishost
 669
 670 _ftperrors = None
 671 def ftperrors():
 672     """Return the set of errors raised by the FTP class."""
 673     global _ftperrors
 674     if not _ftperrors:
 675         import ftplib
 676         _ftperrors = ftplib.all_errors
 677     return _ftperrors
 678
 679 _noheaders = None
 680 def noheaders():
 681     """Return an empty mimetools.Message object."""
 682     global _noheaders
 683     if not _noheaders:
 684         import mimetools
 685         import StringIO
 686         _noheaders = mimetools.Message(StringIO.StringIO(), 0)
 687         _noheaders.fp.close()   # Recycle file descriptor
 688     return _noheaders
 689
 690
 691 # Utility classes
 692
 693 class ftpwrapper:
 694     """Class used by open_ftp() for cache of open FTP connections."""
 695
 696     def __init__(self, user, passwd, host, port, dirs):
 697         self.user = user
 698         self.passwd = passwd
 699         self.host = host
 700         self.port = port
 701         self.dirs = dirs
 702         self.init()
 703
 704     def init(self):
 705         import ftplib
 706         self.busy = 0
 707         self.ftp = ftplib.FTP()
 708         self.ftp.connect(self.host, self.port)
 709         self.ftp.login(self.user, self.passwd)
 710         for dir in self.dirs:
 711             self.ftp.cwd(dir)
 712
 713     def retrfile(self, file, type):
 714         import ftplib
 715         self.endtransfer()
 716         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
 717         else: cmd = 'TYPE ' + type; isdir = 0
 718         try:
 719             self.ftp.voidcmd(cmd)
 720         except ftplib.all_errors:
 721             self.init()
 722             self.ftp.voidcmd(cmd)
 723         conn = None
 724         if file and not isdir:
 725             # Use nlst to see if the file exists at all
 726             try:
 727                 self.ftp.nlst(file)
 728             except ftplib.error_perm, reason:
 729                 raise IOError, ('ftp error', reason), sys.exc_info()[2]
 730             # Restore the transfer mode!
 731             self.ftp.voidcmd(cmd)
 732             # Try to retrieve as a file
 733             try:
 734                 cmd = 'RETR ' + file
 735                 conn = self.ftp.ntransfercmd(cmd)
 736             except ftplib.error_perm, reason:
 737                 if str(reason)[:3] != '550':
 738                     raise IOError, ('ftp error', reason), sys.exc_info()[2]
 739         if not conn:
 740             # Set transfer mode to ASCII!
 741             self.ftp.voidcmd('TYPE A')
 742             # Try a directory listing
 743             if file: cmd = 'LIST ' + file
 744             else: cmd = 'LIST'
 745             conn = self.ftp.ntransfercmd(cmd)
 746         self.busy = 1
 747         # Pass back both a suitably decorated object and a retrieval length
 748         return (addclosehook(conn[0].makefile('rb'),
 749                              self.endtransfer), conn[1])
 750     def endtransfer(self):
 751         if not self.busy:
 752             return
 753         self.busy = 0
 754         try:
 755             self.ftp.voidresp()
 756         except ftperrors():
 757             pass
 758
 759     def close(self):
 760         self.endtransfer()
 761         try:
 762             self.ftp.close()
 763         except ftperrors():
 764             pass
 765
 766 class addbase:
 767     """Base class for addinfo and addclosehook."""
 768
 769     def __init__(self, fp):
 770         self.fp = fp
 771         self.read = self.fp.read
 772         self.readline = self.fp.readline
 773         if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
 774         if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
 775
 776     def __repr__(self):
 777         return '<%s at %s whose fp = %s>' % (self.__class__.__name__,
 778                                              `id(self)`, `self.fp`)
 779
 780     def close(self):
 781         self.read = None
 782         self.readline = None
 783         self.readlines = None
 784         self.fileno = None
 785         if self.fp: self.fp.close()
 786         self.fp = None
 787
 788 class addclosehook(addbase):
 789     """Class to add a close hook to an open file."""
 790
 791     def __init__(self, fp, closehook, *hookargs):
 792         addbase.__init__(self, fp)
 793         self.closehook = closehook
 794         self.hookargs = hookargs
 795
 796     def close(self):
 797         addbase.close(self)
 798         if self.closehook:
 799             apply(self.closehook, self.hookargs)
 800             self.closehook = None
 801             self.hookargs = None
 802
 803 class addinfo(addbase):
 804     """class to add an info() method to an open file."""
 805
 806     def __init__(self, fp, headers):
 807         addbase.__init__(self, fp)
 808         self.headers = headers
 809
 810     def info(self):
 811         return self.headers
 812
 813 class addinfourl(addbase):
 814     """class to add info() and geturl() methods to an open file."""
 815
 816     def __init__(self, fp, headers, url):
 817         addbase.__init__(self, fp)
 818         self.headers = headers
 819         self.url = url
 820
 821     def info(self):
 822         return self.headers
 823
 824     def geturl(self):
 825         return self.url
 826
 827
 828 def basejoin(base, url):
 829     """Utility to combine a URL with a base URL to form a new URL."""
 830     type, path = splittype(url)
 831     if type:
 832         # if url is complete (i.e., it contains a type), return it
 833         return url
 834     host, path = splithost(path)
 835     type, basepath = splittype(base) # inherit type from base
 836     if host:
 837         # if url contains host, just inherit type
 838         if type: return type + '://' + host + path
 839         else:
 840             # no type inherited, so url must have started with //
 841             # just return it
 842             return url
 843     host, basepath = splithost(basepath) # inherit host
 844     basepath, basetag = splittag(basepath) # remove extraneous cruft
 845     basepath, basequery = splitquery(basepath) # idem
 846     if path[:1] != '/':
 847         # non-absolute path name
 848         if path[:1] in ('#', '?'):
 849             # path is just a tag or query, attach to basepath
 850             i = len(basepath)
 851         else:
 852             # else replace last component
 853             i = basepath.rfind('/')
 854         if i < 0:
 855             # basepath not absolute
 856             if host:
 857                 # host present, make absolute
 858                 basepath = '/'
 859             else:
 860                 # else keep non-absolute
 861                 basepath = ''
 862         else:
 863             # remove last file component
 864             basepath = basepath[:i+1]
 865         # Interpret ../ (important because of symlinks)
 866         while basepath and path[:3] == '../':
 867             path = path[3:]
 868             i = basepath[:-1].rfind('/')
 869             if i > 0:
 870                 basepath = basepath[:i+1]
 871             elif i == 0:
 872                 basepath = '/'
 873                 break
 874             else:
 875                 basepath = ''
 876
 877         path = basepath + path
 878     if host and path and path[0] != '/':
 879         path = '/' + path
 880     if type and host: return type + '://' + host + path
 881     elif type: return type + ':' + path
 882     elif host: return '//' + host + path # don't know what this means
 883     else: return path
 884
 885
 886 # Utilities to parse URLs (most of these return None for missing parts):
 887 # unwrap('<URL:type://host/path>') --> 'type://host/path'
 888 # splittype('type:opaquestring') --> 'type', 'opaquestring'
 889 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
 890 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
 891 # splitpasswd('user:passwd') -> 'user', 'passwd'
 892 # splitport('host:port') --> 'host', 'port'
 893 # splitquery('/path?query') --> '/path', 'query'
 894 # splittag('/path#tag') --> '/path', 'tag'
 895 # splitattr('/path;attr1=value1;attr2=value2;...') ->
 896 #   '/path', ['attr1=value1', 'attr2=value2', ...]
 897 # splitvalue('attr=value') --> 'attr', 'value'
 898 # splitgophertype('/Xselector') --> 'X', 'selector'
 899 # unquote('abc%20def') -> 'abc def'
 900 # quote('abc def') -> 'abc%20def')
 901
 902 def toBytes(url):
 903     """toBytes(u"URL") --> 'URL'."""
 904     # Most URL schemes require ASCII. If that changes, the conversion
 905     # can be relaxed
 906     if type(url) is types.UnicodeType:
 907         try:
 908             url = url.encode("ASCII")
 909         except UnicodeError:
 910             raise UnicodeError("URL " + repr(url) +
 911                                " contains non-ASCII characters")
 912     return url
 913
 914 def unwrap(url):
 915     """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
 916     url = url.strip()
 917     if url[:1] == '<' and url[-1:] == '>':
 918         url = url[1:-1].strip()
 919     if url[:4] == 'URL:': url = url[4:].strip()
 920     return url
 921
 922 _typeprog = None
 923 def splittype(url):
 924     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
 925     global _typeprog
 926     if _typeprog is None:
 927         import re
 928         _typeprog = re.compile('^([^/:]+):')
 929
 930     match = _typeprog.match(url)
 931     if match:
 932         scheme = match.group(1)
 933         return scheme.lower(), url[len(scheme) + 1:]
 934     return None, url
 935
 936 _hostprog = None
 937 def splithost(url):
 938     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
 939     global _hostprog
 940     if _hostprog is None:
 941         import re
 942         _hostprog = re.compile('^//([^/]*)(.*)$')
 943
 944     match = _hostprog.match(url)
 945     if match: return match.group(1, 2)
 946     return None, url
 947
 948 _userprog = None
 949 def splituser(host):
 950     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
 951     global _userprog
 952     if _userprog is None:
 953         import re
 954         _userprog = re.compile('^([^@]*)@(.*)$')
 955
 956     match = _userprog.match(host)
 957     if match: return map(unquote, match.group(1, 2))
 958     return None, host
 959
 960 _passwdprog = None
 961 def splitpasswd(user):
 962     """splitpasswd('user:passwd') -> 'user', 'passwd'."""
 963     global _passwdprog
 964     if _passwdprog is None:
 965         import re
 966         _passwdprog = re.compile('^([^:]*):(.*)$')
 967
 968     match = _passwdprog.match(user)
 969     if match: return match.group(1, 2)
 970     return user, None
 971
 972 # splittag('/path#tag') --> '/path', 'tag'
 973 _portprog = None
 974 def splitport(host):
 975     """splitport('host:port') --> 'host', 'port'."""
 976     global _portprog
 977     if _portprog is None:
 978         import re
 979         _portprog = re.compile('^(.*):([0-9]+)$')
 980
 981     match = _portprog.match(host)
 982     if match: return match.group(1, 2)
 983     return host, None
 984
 985 _nportprog = None
 986 def splitnport(host, defport=-1):
 987     """Split host and port, returning numeric port.
 988     Return given default port if no ':' found; defaults to -1.
 989     Return numerical port if a valid number are found after ':'.
 990     Return None if ':' but not a valid number."""
 991     global _nportprog
 992     if _nportprog is None:
 993         import re
 994         _nportprog = re.compile('^(.*):(.*)$')
 995
 996     match = _nportprog.match(host)
 997     if match:
 998         host, port = match.group(1, 2)
 999         try:
1000             if not port: raise ValueError, "no digits"
1001             nport = int(port)
1002         except ValueError:
1003             nport = None
1004         return host, nport
1005     return host, defport
1006
1007 _queryprog = None
1008 def splitquery(url):
1009     """splitquery('/path?query') --> '/path', 'query'."""
1010     global _queryprog
1011     if _queryprog is None:
1012         import re
1013         _queryprog = re.compile('^(.*)\?([^?]*)$')
1014
1015     match = _queryprog.match(url)
1016     if match: return match.group(1, 2)
1017     return url, None
1018
1019 _tagprog = None
1020 def splittag(url):
1021     """splittag('/path#tag') --> '/path', 'tag'."""
1022     global _tagprog
1023     if _tagprog is None:
1024         import re
1025         _tagprog = re.compile('^(.*)#([^#]*)$')
1026
1027     match = _tagprog.match(url)
1028     if match: return match.group(1, 2)
1029     return url, None
1030
1031 def splitattr(url):
1032     """splitattr('/path;attr1=value1;attr2=value2;...') ->
1033         '/path', ['attr1=value1', 'attr2=value2', ...]."""
1034     words = url.split(';')
1035     return words[0], words[1:]
1036
1037 _valueprog = None
1038 def splitvalue(attr):
1039     """splitvalue('attr=value') --> 'attr', 'value'."""
1040     global _valueprog
1041     if _valueprog is None:
1042         import re
1043         _valueprog = re.compile('^([^=]*)=(.*)$')
1044
1045     match = _valueprog.match(attr)
1046     if match: return match.group(1, 2)
1047     return attr, None
1048
1049 def splitgophertype(selector):
1050     """splitgophertype('/Xselector') --> 'X', 'selector'."""
1051     if selector[:1] == '/' and selector[1:2]:
1052         return selector[1], selector[2:]
1053     return None, selector
1054
1055 def unquote(s):
1056     """unquote('abc%20def') -> 'abc def'."""
1057     mychr = chr
1058     myatoi = int
1059     list = s.split('%')
1060     res = [list[0]]
1061     myappend = res.append
1062     del list[0]
1063     for item in list:
1064         if item[1:2]:
1065             try:
1066                 myappend(mychr(myatoi(item[:2], 16))
1067                      + item[2:])
1068             except ValueError:
1069                 myappend('%' + item)
1070         else:
1071             myappend('%' + item)
1072     return "".join(res)
1073
1074 def unquote_plus(s):
1075     """unquote('%7e/abc+def') -> '~/abc def'"""
1076     if '+' in s:
1077         # replace '+' with ' '
1078         s = ' '.join(s.split('+'))
1079     return unquote(s)
1080
1081 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1082                'abcdefghijklmnopqrstuvwxyz'
1083                '0123456789' '_.-')
1084
1085 _fast_safe_test = always_safe + '/'
1086 _fast_safe = None
1087
1088 def _fast_quote(s):
1089     global _fast_safe
1090     if _fast_safe is None:
1091         _fast_safe = {}
1092         for c in _fast_safe_test:
1093             _fast_safe[c] = c
1094     res = list(s)
1095     for i in range(len(res)):
1096         c = res[i]
1097         if not _fast_safe.has_key(c):
1098             res[i] = '%%%02X' % ord(c)
1099     return ''.join(res)
1100
1101 def quote(s, safe = '/'):
1102     """quote('abc def') -> 'abc%20def'
1103
1104     Each part of a URL, e.g. the path info, the query, etc., has a
1105     different set of reserved characters that must be quoted.
1106
1107     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1108     the following reserved characters.
1109
1110     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1111                   "$" | ","
1112
1113     Each of these characters is reserved in some component of a URL,
1114     but not necessarily in all of them.
1115
1116     By default, the quote function is intended for quoting the path
1117     section of a URL.  Thus, it will not encode '/'.  This character
1118     is reserved, but in typical usage the quote function is being
1119     called on a path where the existing slash characters are used as
1120     reserved characters.
1121     """
1122     safe = always_safe + safe
1123     if _fast_safe_test == safe:
1124         return _fast_quote(s)
1125     res = list(s)
1126     for i in range(len(res)):
1127         c = res[i]
1128         if c not in safe:
1129             res[i] = '%%%02X' % ord(c)
1130     return ''.join(res)
1131
1132 def quote_plus(s, safe = ''):
1133     """Quote the query fragment of a URL; replacing ' ' with '+'"""
1134     if ' ' in s:
1135         l = s.split(' ')
1136         for i in range(len(l)):
1137             l[i] = quote(l[i], safe)
1138         return '+'.join(l)
1139     else:
1140         return quote(s, safe)
1141
1142 def urlencode(query,doseq=0):
1143     """Encode a sequence of two-element tuples or dictionary into a URL query string.
1144
1145     If any values in the query arg are sequences and doseq is true, each
1146     sequence element is converted to a separate parameter.
1147
1148     If the query arg is a sequence of two-element tuples, the order of the
1149     parameters in the output will match the order of parameters in the
1150     input.
1151     """
1152
1153     if hasattr(query,"items"):
1154         # mapping objects
1155         query = query.items()
1156     else:
1157         # it's a bother at times that strings and string-like objects are
1158         # sequences...
1159         try:
1160             # non-sequence items should not work with len()
1161             x = len(query)
1162             # non-empty strings will fail this
1163             if len(query) and type(query[0]) != types.TupleType:
1164                 raise TypeError
1165             # zero-length sequences of all types will get here and succeed,
1166             # but that's a minor nit - since the original implementation
1167             # allowed empty dicts that type of behavior probably should be
1168             # preserved for consistency
1169         except TypeError:
1170             ty,va,tb = sys.exc_info()
1171             raise TypeError, "not a valid non-string sequence or mapping object", tb
1172
1173     l = []
1174     if not doseq:
1175         # preserve old behavior
1176         for k, v in query:
1177             k = quote_plus(str(k))
1178             v = quote_plus(str(v))
1179             l.append(k + '=' + v)
1180     else:
1181         for k, v in query:
1182             k = quote_plus(str(k))
1183             if type(v) == types.StringType:
1184                 v = quote_plus(v)
1185                 l.append(k + '=' + v)
1186             elif type(v) == types.UnicodeType:
1187                 # is there a reasonable way to convert to ASCII?
1188                 # encode generates a string, but "replace" or "ignore"
1189                 # lose information and "strict" can raise UnicodeError
1190                 v = quote_plus(v.encode("ASCII","replace"))
1191                 l.append(k + '=' + v)
1192             else:
1193                 try:
1194                     # is this a sufficient test for sequence-ness?
1195                     x = len(v)
1196                 except TypeError:
1197                     # not a sequence
1198                     v = quote_plus(str(v))
1199                     l.append(k + '=' + v)
1200                 else:
1201                     # loop over the sequence
1202                     for elt in v:
1203                         l.append(k + '=' + quote_plus(str(elt)))
1204     return '&'.join(l)
1205
1206 # Proxy handling
1207 def getproxies_environment():
1208     """Return a dictionary of scheme -> proxy server URL mappings.
1209
1210     Scan the environment for variables named <scheme>_proxy;
1211     this seems to be the standard convention.  If you need a
1212     different way, you can pass a proxies dictionary to the
1213     [Fancy]URLopener constructor.
1214
1215     """
1216     proxies = {}
1217     for name, value in os.environ.items():
1218         name = name.lower()
1219         if value and name[-6:] == '_proxy':
1220             proxies[name[:-6]] = value
1221     return proxies
1222
1223 if os.name == 'mac':
1224     def getproxies():
1225         """Return a dictionary of scheme -> proxy server URL mappings.
1226
1227         By convention the mac uses Internet Config to store
1228         proxies.  An HTTP proxy, for instance, is stored under
1229         the HttpProxy key.
1230
1231         """
1232         try:
1233             import ic
1234         except ImportError:
1235             return {}
1236
1237         try:
1238             config = ic.IC()
1239         except ic.error:
1240             return {}
1241         proxies = {}
1242         # HTTP:
1243         if config.has_key('UseHTTPProxy') and config['UseHTTPProxy']:
1244             try:
1245                 value = config['HTTPProxyHost']
1246             except ic.error:
1247                 pass
1248             else:
1249                 proxies['http'] = 'http://%s' % value
1250         # FTP: XXXX To be done.
1251         # Gopher: XXXX To be done.
1252         return proxies
1253
1254     def proxy_bypass(x):
1255         return 0
1256
1257 elif os.name == 'nt':
1258     def getproxies_registry():
1259         """Return a dictionary of scheme -> proxy server URL mappings.
1260
1261         Win32 uses the registry to store proxies.
1262
1263         """
1264         proxies = {}
1265         try:
1266             import _winreg
1267         except ImportError:
1268             # Std module, so should be around - but you never know!
1269             return proxies
1270         try:
1271             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1272                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1273             proxyEnable = _winreg.QueryValueEx(internetSettings,
1274                                                'ProxyEnable')[0]
1275             if proxyEnable:
1276                 # Returned as Unicode but problems if not converted to ASCII
1277                 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1278                                                        'ProxyServer')[0])
1279                 if '=' in proxyServer:
1280                     # Per-protocol settings
1281                     for p in proxyServer.split(';'):
1282                         protocol, address = p.split('=', 1)
1283                         proxies[protocol] = '%s://%s' % (protocol, address)
1284                 else:
1285                     # Use one setting for all protocols
1286                     if proxyServer[:5] == 'http:':
1287                         proxies['http'] = proxyServer
1288                     else:
1289                         proxies['http'] = 'http://%s' % proxyServer
1290                         proxies['ftp'] = 'ftp://%s' % proxyServer
1291             internetSettings.Close()
1292         except (WindowsError, ValueError, TypeError):
1293             # Either registry key not found etc, or the value in an
1294             # unexpected format.
1295             # proxies already set up to be empty so nothing to do
1296             pass
1297         return proxies
1298
1299     def getproxies():
1300         """Return a dictionary of scheme -> proxy server URL mappings.
1301
1302         Returns settings gathered from the environment, if specified,
1303         or the registry.
1304
1305         """
1306         return getproxies_environment() or getproxies_registry()
1307
1308     def proxy_bypass(host):
1309         try:
1310             import _winreg
1311             import re
1312             import socket
1313         except ImportError:
1314             # Std modules, so should be around - but you never know!
1315             return 0
1316         try:
1317             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1318                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1319             proxyEnable = _winreg.QueryValueEx(internetSettings,
1320                                                'ProxyEnable')[0]
1321             proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1322                                                      'ProxyOverride')[0])
1323             # ^^^^ Returned as Unicode but problems if not converted to ASCII
1324         except WindowsError:
1325             return 0
1326         if not proxyEnable or not proxyOverride:
1327             return 0
1328         # try to make a host list from name and IP address.
1329         host = [host]
1330         try:
1331             addr = socket.gethostbyname(host[0])
1332             if addr != host:
1333                 host.append(addr)
1334         except socket.error:
1335             pass
1336         # make a check value list from the registry entry: replace the
1337         # '<local>' string by the localhost entry and the corresponding
1338         # canonical entry.
1339         proxyOverride = proxyOverride.split(';')
1340         i = 0
1341         while i < len(proxyOverride):
1342             if proxyOverride[i] == '<local>':
1343                 proxyOverride[i:i+1] = ['localhost',
1344                                         '127.0.0.1',
1345                                         socket.gethostname(),
1346                                         socket.gethostbyname(
1347                                             socket.gethostname())]
1348             i += 1
1349         # print proxyOverride
1350         # now check if we match one of the registry values.
1351         for test in proxyOverride:
1352             test = test.replace(".", r"\.")     # mask dots
1353             test = test.replace("*", r".*")     # change glob sequence
1354             test = test.replace("?", r".")      # change glob char
1355             for val in host:
1356                 # print "%s <--> %s" %( test, val )
1357                 if re.match(test, val, re.I):
1358                     return 1
1359         return 0
1360
1361 else:
1362     # By default use environment variables
1363     getproxies = getproxies_environment
1364
1365     def proxy_bypass(host):
1366         return 0
1367
1368 # Test and time quote() and unquote()
1369 def test1():
1370     import time
1371     s = ''
1372     for i in range(256): s = s + chr(i)
1373     s = s*4
1374     t0 = time.time()
1375     qs = quote(s)
1376     uqs = unquote(qs)
1377     t1 = time.time()
1378     if uqs != s:
1379         print 'Wrong!'
1380     print `s`
1381     print `qs`
1382     print `uqs`
1383     print round(t1 - t0, 3), 'sec'
1384
1385
1386 def reporthook(blocknum, blocksize, totalsize):
1387     # Report during remote transfers
1388     print "Block number: %d, Block size: %d, Total size: %d" % (
1389         blocknum, blocksize, totalsize)
1390
1391 # Test program
1392 def test(args=[]):
1393     if not args:
1394         args = [
1395             '/etc/passwd',
1396             'file:/etc/passwd',
1397             'file://localhost/etc/passwd',
1398             'ftp://ftp.python.org/etc/passwd',
1399 ##          'gopher://gopher.micro.umn.edu/1/',
1400             'http://www.python.org/index.html',
1401             ]
1402         if hasattr(URLopener, "open_https"):
1403             args.append('https://synergy.as.cmu.edu/~geek/')
1404     try:
1405         for url in args:
1406             print '-'*10, url, '-'*10
1407             fn, h = urlretrieve(url, None, reporthook)
1408             print fn
1409             if h:
1410                 print '======'
1411                 for k in h.keys(): print k + ':', h[k]
1412                 print '======'
1413             fp = open(fn, 'rb')
1414             data = fp.read()
1415             del fp
1416             if '\r' in data:
1417                 table = string.maketrans("", "")
1418                 data = data.translate(table, "\r")
1419             print data
1420             fn, h = None, None
1421         print '-'*40
1422     finally:
1423         urlcleanup()
1424
1425 def main():
1426     import getopt, sys
1427     try:
1428         opts, args = getopt.getopt(sys.argv[1:], "th")
1429     except getopt.error, msg:
1430         print msg
1431         print "Use -h for help"
1432         return
1433     t = 0
1434     for o, a in opts:
1435         if o == '-t':
1436             t = t + 1
1437         if o == '-h':
1438             print "Usage: python urllib.py [-t] [url ...]"
1439             print "-t runs self-test;",
1440             print "otherwise, contents of urls are printed"
1441             return
1442     if t:
1443         if t > 1:
1444             test1()
1445         test(args)
1446     else:
1447         if not args:
1448             print "Use -h for help"
1449         for url in args:
1450             print urlopen(url).read(),
1451
1452 # Run test program when run as a script
1453 if __name__ == '__main__':
1454     main()