Lib/urllib.py

   1 """Open an arbitrary URL.
   2
   3 See the following document for more info on URLs:
   4 "Names and Addresses, URIs, URLs, URNs, URCs", at
   5 http://www.w3.org/pub/WWW/Addressing/Overview.html
   6
   7 See also the HTTP spec (from which the error codes are derived):
   8 "HTTP - Hypertext Transfer Protocol", at
   9 http://www.w3.org/pub/WWW/Protocols/
  10
  11 Related standards and specs:
  12 - RFC1808: the "relative URL" spec. (authoritative status)
  13 - RFC1738 - the "URL standard". (authoritative status)
  14 - RFC1630 - the "URI spec". (informational status)
  15
  16 The object returned by URLopener().open(file) will differ per
  17 protocol.  All you know is that is has methods read(), readline(),
  18 readlines(), fileno(), close() and info().  The read*(), fileno()
  19 and close() methods work like those of open files.
  20 The info() method returns a mimetools.Message object which can be
  21 used to query various info about the object, if available.
  22 (mimetools.Message objects are queried with the getheader() method.)
  23 """
  24
  25 import string
  26 import socket
  27 import os
  28 import time
  29 import sys
  30
  31 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
  32            "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
  33            "urlencode", "url2pathname", "pathname2url", "splittag",
  34            "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
  35            "splittype", "splithost", "splituser", "splitpasswd", "splitport",
  36            "splitnport", "splitquery", "splitattr", "splitvalue",
  37            "splitgophertype", "getproxies"]
  38
  39 __version__ = '1.15'    # XXX This version is not always updated :-(
  40
  41 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
  42
  43 # Helper for non-unix systems
  44 if os.name == 'mac':
  45     from macurl2path import url2pathname, pathname2url
  46 elif os.name == 'nt':
  47     from nturl2path import url2pathname, pathname2url
  48 elif os.name == 'riscos':
  49     from rourl2path import url2pathname, pathname2url
  50 else:
  51     def url2pathname(pathname):
  52         return unquote(pathname)
  53     def pathname2url(pathname):
  54         return quote(pathname)
  55
  56 # This really consists of two pieces:
  57 # (1) a class which handles opening of all sorts of URLs
  58 #     (plus assorted utilities etc.)
  59 # (2) a set of functions for parsing URLs
  60 # XXX Should these be separated out into different modules?
  61
  62
  63 # Shortcut for basic usage
  64 _urlopener = None
  65 def urlopen(url, data=None, proxies=None):
  66     """urlopen(url [, data]) -> open file-like object"""
  67     global _urlopener
  68     if proxies is not None:
  69         opener = FancyURLopener(proxies=proxies)
  70     elif not _urlopener:
  71         opener = FancyURLopener()
  72         _urlopener = opener
  73     else:
  74         opener = _urlopener
  75     if data is None:
  76         return opener.open(url)
  77     else:
  78         return opener.open(url, data)
  79 def urlretrieve(url, filename=None, reporthook=None, data=None):
  80     global _urlopener
  81     if not _urlopener:
  82         _urlopener = FancyURLopener()
  83     return _urlopener.retrieve(url, filename, reporthook, data)
  84 def urlcleanup():
  85     if _urlopener:
  86         _urlopener.cleanup()
  87
  88
  89 ftpcache = {}
  90 class URLopener:
  91     """Class to open URLs.
  92     This is a class rather than just a subroutine because we may need
  93     more than one set of global protocol-specific options.
  94     Note -- this is a base class for those who don't want the
  95     automatic handling of errors type 302 (relocated) and 401
  96     (authorization needed)."""
  97
  98     __tempfiles = None
  99
 100     version = "Python-urllib/%s" % __version__
 101
 102     # Constructor
 103     def __init__(self, proxies=None, **x509):
 104         if proxies is None:
 105             proxies = getproxies()
 106         assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
 107         self.proxies = proxies
 108         self.key_file = x509.get('key_file')
 109         self.cert_file = x509.get('cert_file')
 110         self.addheaders = [('User-agent', self.version)]
 111         self.__tempfiles = []
 112         self.__unlink = os.unlink # See cleanup()
 113         self.tempcache = None
 114         # Undocumented feature: if you assign {} to tempcache,
 115         # it is used to cache files retrieved with
 116         # self.retrieve().  This is not enabled by default
 117         # since it does not work for changing documents (and I
 118         # haven't got the logic to check expiration headers
 119         # yet).
 120         self.ftpcache = ftpcache
 121         # Undocumented feature: you can use a different
 122         # ftp cache by assigning to the .ftpcache member;
 123         # in case you want logically independent URL openers
 124         # XXX This is not threadsafe.  Bah.
 125
 126     def __del__(self):
 127         self.close()
 128
 129     def close(self):
 130         self.cleanup()
 131
 132     def cleanup(self):
 133         # This code sometimes runs when the rest of this module
 134         # has already been deleted, so it can't use any globals
 135         # or import anything.
 136         if self.__tempfiles:
 137             for file in self.__tempfiles:
 138                 try:
 139                     self.__unlink(file)
 140                 except OSError:
 141                     pass
 142             del self.__tempfiles[:]
 143         if self.tempcache:
 144             self.tempcache.clear()
 145
 146     def addheader(self, *args):
 147         """Add a header to be used by the HTTP interface only
 148         e.g. u.addheader('Accept', 'sound/basic')"""
 149         self.addheaders.append(args)
 150
 151     # External interface
 152     def open(self, fullurl, data=None):
 153         """Use URLopener().open(file) instead of open(file, 'r')."""
 154         fullurl = unwrap(toBytes(fullurl))
 155         if self.tempcache and fullurl in self.tempcache:
 156             filename, headers = self.tempcache[fullurl]
 157             fp = open(filename, 'rb')
 158             return addinfourl(fp, headers, fullurl)
 159         urltype, url = splittype(fullurl)
 160         if not urltype:
 161             urltype = 'file'
 162         if urltype in self.proxies:
 163             proxy = self.proxies[urltype]
 164             urltype, proxyhost = splittype(proxy)
 165             host, selector = splithost(proxyhost)
 166             url = (host, fullurl) # Signal special case to open_*()
 167         else:
 168             proxy = None
 169         name = 'open_' + urltype
 170         self.type = urltype
 171         if '-' in name:
 172             # replace - with _
 173             name = '_'.join(name.split('-'))
 174         if not hasattr(self, name):
 175             if proxy:
 176                 return self.open_unknown_proxy(proxy, fullurl, data)
 177             else:
 178                 return self.open_unknown(fullurl, data)
 179         try:
 180             if data is None:
 181                 return getattr(self, name)(url)
 182             else:
 183                 return getattr(self, name)(url, data)
 184         except socket.error, msg:
 185             raise IOError, ('socket error', msg), sys.exc_info()[2]
 186
 187     def open_unknown(self, fullurl, data=None):
 188         """Overridable interface to open unknown URL type."""
 189         type, url = splittype(fullurl)
 190         raise IOError, ('url error', 'unknown url type', type)
 191
 192     def open_unknown_proxy(self, proxy, fullurl, data=None):
 193         """Overridable interface to open unknown URL type."""
 194         type, url = splittype(fullurl)
 195         raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
 196
 197     # External interface
 198     def retrieve(self, url, filename=None, reporthook=None, data=None):
 199         """retrieve(url) returns (filename, headers) for a local object
 200         or (tempfilename, headers) for a remote object."""
 201         url = unwrap(toBytes(url))
 202         if self.tempcache and url in self.tempcache:
 203             return self.tempcache[url]
 204         type, url1 = splittype(url)
 205         if filename is None and (not type or type == 'file'):
 206             try:
 207                 fp = self.open_local_file(url1)
 208                 hdrs = fp.info()
 209                 del fp
 210                 return url2pathname(splithost(url1)[1]), hdrs
 211             except IOError, msg:
 212                 pass
 213         fp = self.open(url, data)
 214         headers = fp.info()
 215         if filename:
 216             tfp = open(filename, 'wb')
 217         else:
 218             import tempfile
 219             garbage, path = splittype(url)
 220             garbage, path = splithost(path or "")
 221             path, garbage = splitquery(path or "")
 222             path, garbage = splitattr(path or "")
 223             suffix = os.path.splitext(path)[1]
 224             (fd, filename) = tempfile.mkstemp(suffix)
 225             self.__tempfiles.append(filename)
 226             tfp = os.fdopen(fd, 'wb')
 227         result = filename, headers
 228         if self.tempcache is not None:
 229             self.tempcache[url] = result
 230         bs = 1024*8
 231         size = -1
 232         blocknum = 1
 233         if reporthook:
 234             if "content-length" in headers:
 235                 size = int(headers["Content-Length"])
 236             reporthook(0, bs, size)
 237         block = fp.read(bs)
 238         if reporthook:
 239             reporthook(1, bs, size)
 240         while block:
 241             tfp.write(block)
 242             block = fp.read(bs)
 243             blocknum = blocknum + 1
 244             if reporthook:
 245                 reporthook(blocknum, bs, size)
 246         fp.close()
 247         tfp.close()
 248         del fp
 249         del tfp
 250         return result
 251
 252     # Each method named open_<type> knows how to open that type of URL
 253
 254     def open_http(self, url, data=None):
 255         """Use HTTP protocol."""
 256         import httplib
 257         user_passwd = None
 258         if isinstance(url, str):
 259             host, selector = splithost(url)
 260             if host:
 261                 user_passwd, host = splituser(host)
 262                 host = unquote(host)
 263             realhost = host
 264         else:
 265             host, selector = url
 266             urltype, rest = splittype(selector)
 267             url = rest
 268             user_passwd = None
 269             if urltype.lower() != 'http':
 270                 realhost = None
 271             else:
 272                 realhost, rest = splithost(rest)
 273                 if realhost:
 274                     user_passwd, realhost = splituser(realhost)
 275                 if user_passwd:
 276                     selector = "%s://%s%s" % (urltype, realhost, rest)
 277                 if proxy_bypass(realhost):
 278                     host = realhost
 279
 280             #print "proxy via http:", host, selector
 281         if not host: raise IOError, ('http error', 'no host given')
 282         if user_passwd:
 283             import base64
 284             auth = base64.encodestring(user_passwd).strip()
 285         else:
 286             auth = None
 287         h = httplib.HTTP(host)
 288         if data is not None:
 289             h.putrequest('POST', selector)
 290             h.putheader('Content-type', 'application/x-www-form-urlencoded')
 291             h.putheader('Content-length', '%d' % len(data))
 292         else:
 293             h.putrequest('GET', selector)
 294         if auth: h.putheader('Authorization', 'Basic %s' % auth)
 295         if realhost: h.putheader('Host', realhost)
 296         for args in self.addheaders: h.putheader(*args)
 297         h.endheaders()
 298         if data is not None:
 299             h.send(data)
 300         errcode, errmsg, headers = h.getreply()
 301         fp = h.getfile()
 302         if errcode == 200:
 303             return addinfourl(fp, headers, "http:" + url)
 304         else:
 305             if data is None:
 306                 return self.http_error(url, fp, errcode, errmsg, headers)
 307             else:
 308                 return self.http_error(url, fp, errcode, errmsg, headers, data)
 309
 310     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
 311         """Handle http errors.
 312         Derived class can override this, or provide specific handlers
 313         named http_error_DDD where DDD is the 3-digit error code."""
 314         # First check if there's a specific handler for this error
 315         name = 'http_error_%d' % errcode
 316         if hasattr(self, name):
 317             method = getattr(self, name)
 318             if data is None:
 319                 result = method(url, fp, errcode, errmsg, headers)
 320             else:
 321                 result = method(url, fp, errcode, errmsg, headers, data)
 322             if result: return result
 323         return self.http_error_default(url, fp, errcode, errmsg, headers)
 324
 325     def http_error_default(self, url, fp, errcode, errmsg, headers):
 326         """Default error handler: close the connection and raise IOError."""
 327         void = fp.read()
 328         fp.close()
 329         raise IOError, ('http error', errcode, errmsg, headers)
 330
 331     if hasattr(socket, "ssl"):
 332         def open_https(self, url, data=None):
 333             """Use HTTPS protocol."""
 334             import httplib
 335             user_passwd = None
 336             if isinstance(url, str):
 337                 host, selector = splithost(url)
 338                 if host:
 339                     user_passwd, host = splituser(host)
 340                     host = unquote(host)
 341                 realhost = host
 342             else:
 343                 host, selector = url
 344                 urltype, rest = splittype(selector)
 345                 url = rest
 346                 user_passwd = None
 347                 if urltype.lower() != 'https':
 348                     realhost = None
 349                 else:
 350                     realhost, rest = splithost(rest)
 351                     if realhost:
 352                         user_passwd, realhost = splituser(realhost)
 353                     if user_passwd:
 354                         selector = "%s://%s%s" % (urltype, realhost, rest)
 355                 #print "proxy via https:", host, selector
 356             if not host: raise IOError, ('https error', 'no host given')
 357             if user_passwd:
 358                 import base64
 359                 auth = base64.encodestring(user_passwd).strip()
 360             else:
 361                 auth = None
 362             h = httplib.HTTPS(host, 0,
 363                               key_file=self.key_file,
 364                               cert_file=self.cert_file)
 365             if data is not None:
 366                 h.putrequest('POST', selector)
 367                 h.putheader('Content-type',
 368                             'application/x-www-form-urlencoded')
 369                 h.putheader('Content-length', '%d' % len(data))
 370             else:
 371                 h.putrequest('GET', selector)
 372             if auth: h.putheader('Authorization: Basic %s' % auth)
 373             if realhost: h.putheader('Host', realhost)
 374             for args in self.addheaders: h.putheader(*args)
 375             h.endheaders()
 376             if data is not None:
 377                 h.send(data)
 378             errcode, errmsg, headers = h.getreply()
 379             fp = h.getfile()
 380             if errcode == 200:
 381                 return addinfourl(fp, headers, "https:" + url)
 382             else:
 383                 if data is None:
 384                     return self.http_error(url, fp, errcode, errmsg, headers)
 385                 else:
 386                     return self.http_error(url, fp, errcode, errmsg, headers,
 387                                            data)
 388
 389     def open_gopher(self, url):
 390         """Use Gopher protocol."""
 391         import gopherlib
 392         host, selector = splithost(url)
 393         if not host: raise IOError, ('gopher error', 'no host given')
 394         host = unquote(host)
 395         type, selector = splitgophertype(selector)
 396         selector, query = splitquery(selector)
 397         selector = unquote(selector)
 398         if query:
 399             query = unquote(query)
 400             fp = gopherlib.send_query(selector, query, host)
 401         else:
 402             fp = gopherlib.send_selector(selector, host)
 403         return addinfourl(fp, noheaders(), "gopher:" + url)
 404
 405     def open_file(self, url):
 406         """Use local file or FTP depending on form of URL."""
 407         if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
 408             return self.open_ftp(url)
 409         else:
 410             return self.open_local_file(url)
 411
 412     def open_local_file(self, url):
 413         """Use local file."""
 414         import mimetypes, mimetools, rfc822, StringIO
 415         host, file = splithost(url)
 416         localname = url2pathname(file)
 417         try:
 418             stats = os.stat(localname)
 419         except OSError, e:
 420             raise IOError(e.errno, e.strerror, e.filename)
 421         size = stats.st_size
 422         modified = rfc822.formatdate(stats.st_mtime)
 423         mtype = mimetypes.guess_type(url)[0]
 424         headers = mimetools.Message(StringIO.StringIO(
 425             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
 426             (mtype or 'text/plain', size, modified)))
 427         if not host:
 428             urlfile = file
 429             if file[:1] == '/':
 430                 urlfile = 'file://' + file
 431             return addinfourl(open(localname, 'rb'),
 432                               headers, urlfile)
 433         host, port = splitport(host)
 434         if not port \
 435            and socket.gethostbyname(host) in (localhost(), thishost()):
 436             urlfile = file
 437             if file[:1] == '/':
 438                 urlfile = 'file://' + file
 439             return addinfourl(open(localname, 'rb'),
 440                               headers, urlfile)
 441         raise IOError, ('local file error', 'not on local host')
 442
 443     def open_ftp(self, url):
 444         """Use FTP protocol."""
 445         import mimetypes, mimetools, StringIO
 446         host, path = splithost(url)
 447         if not host: raise IOError, ('ftp error', 'no host given')
 448         host, port = splitport(host)
 449         user, host = splituser(host)
 450         if user: user, passwd = splitpasswd(user)
 451         else: passwd = None
 452         host = unquote(host)
 453         user = unquote(user or '')
 454         passwd = unquote(passwd or '')
 455         host = socket.gethostbyname(host)
 456         if not port:
 457             import ftplib
 458             port = ftplib.FTP_PORT
 459         else:
 460             port = int(port)
 461         path, attrs = splitattr(path)
 462         path = unquote(path)
 463         dirs = path.split('/')
 464         dirs, file = dirs[:-1], dirs[-1]
 465         if dirs and not dirs[0]: dirs = dirs[1:]
 466         if dirs and not dirs[0]: dirs[0] = '/'
 467         key = user, host, port, '/'.join(dirs)
 468         # XXX thread unsafe!
 469         if len(self.ftpcache) > MAXFTPCACHE:
 470             # Prune the cache, rather arbitrarily
 471             for k in self.ftpcache.keys():
 472                 if k != key:
 473                     v = self.ftpcache[k]
 474                     del self.ftpcache[k]
 475                     v.close()
 476         try:
 477             if not key in self.ftpcache:
 478                 self.ftpcache[key] = \
 479                     ftpwrapper(user, passwd, host, port, dirs)
 480             if not file: type = 'D'
 481             else: type = 'I'
 482             for attr in attrs:
 483                 attr, value = splitvalue(attr)
 484                 if attr.lower() == 'type' and \
 485                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
 486                     type = value.upper()
 487             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
 488             mtype = mimetypes.guess_type("ftp:" + url)[0]
 489             headers = ""
 490             if mtype:
 491                 headers += "Content-Type: %s\n" % mtype
 492             if retrlen is not None and retrlen >= 0:
 493                 headers += "Content-Length: %d\n" % retrlen
 494             headers = mimetools.Message(StringIO.StringIO(headers))
 495             return addinfourl(fp, headers, "ftp:" + url)
 496         except ftperrors(), msg:
 497             raise IOError, ('ftp error', msg), sys.exc_info()[2]
 498
 499     def open_data(self, url, data=None):
 500         """Use "data" URL."""
 501         # ignore POSTed data
 502         #
 503         # syntax of data URLs:
 504         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
 505         # mediatype := [ type "/" subtype ] *( ";" parameter )
 506         # data      := *urlchar
 507         # parameter := attribute "=" value
 508         import StringIO, mimetools
 509         try:
 510             [type, data] = url.split(',', 1)
 511         except ValueError:
 512             raise IOError, ('data error', 'bad data URL')
 513         if not type:
 514             type = 'text/plain;charset=US-ASCII'
 515         semi = type.rfind(';')
 516         if semi >= 0 and '=' not in type[semi:]:
 517             encoding = type[semi+1:]
 518             type = type[:semi]
 519         else:
 520             encoding = ''
 521         msg = []
 522         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
 523                                             time.gmtime(time.time())))
 524         msg.append('Content-type: %s' % type)
 525         if encoding == 'base64':
 526             import base64
 527             data = base64.decodestring(data)
 528         else:
 529             data = unquote(data)
 530         msg.append('Content-length: %d' % len(data))
 531         msg.append('')
 532         msg.append(data)
 533         msg = '\n'.join(msg)
 534         f = StringIO.StringIO(msg)
 535         headers = mimetools.Message(f, 0)
 536         f.fileno = None     # needed for addinfourl
 537         return addinfourl(f, headers, url)
 538
 539
 540 class FancyURLopener(URLopener):
 541     """Derived class with handlers for errors we can handle (perhaps)."""
 542
 543     def __init__(self, *args, **kwargs):
 544         URLopener.__init__(self, *args, **kwargs)
 545         self.auth_cache = {}
 546         self.tries = 0
 547         self.maxtries = 10
 548
 549     def http_error_default(self, url, fp, errcode, errmsg, headers):
 550         """Default error handling -- don't raise an exception."""
 551         return addinfourl(fp, headers, "http:" + url)
 552
 553     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
 554         """Error 302 -- relocated (temporarily)."""
 555         self.tries += 1
 556         if self.maxtries and self.tries >= self.maxtries:
 557             if hasattr(self, "http_error_500"):
 558                 meth = self.http_error_500
 559             else:
 560                 meth = self.http_error_default
 561             self.tries = 0
 562             return meth(url, fp, 500,
 563                         "Internal Server Error: Redirect Recursion", headers)
 564         result = self.redirect_internal(url, fp, errcode, errmsg, headers,
 565                                         data)
 566         self.tries = 0
 567         return result
 568
 569     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
 570         if 'location' in headers:
 571             newurl = headers['location']
 572         elif 'uri' in headers:
 573             newurl = headers['uri']
 574         else:
 575             return
 576         void = fp.read()
 577         fp.close()
 578         # In case the server sent a relative URL, join with original:
 579         newurl = basejoin(self.type + ":" + url, newurl)
 580         if data is None:
 581             return self.open(newurl)
 582         else:
 583             return self.open(newurl, data)
 584
 585     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
 586         """Error 301 -- also relocated (permanently)."""
 587         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 588
 589     def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
 590         """Error 303 -- also relocated (essentially identical to 302)."""
 591         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 592
 593     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
 594         """Error 401 -- authentication required.
 595         See this URL for a description of the basic authentication scheme:
 596         http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
 597         if not 'www-authenticate' in headers:
 598             URLopener.http_error_default(self, url, fp,
 599                                          errcode, errmsg, headers)
 600         stuff = headers['www-authenticate']
 601         import re
 602         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 603         if not match:
 604             URLopener.http_error_default(self, url, fp,
 605                                          errcode, errmsg, headers)
 606         scheme, realm = match.groups()
 607         if scheme.lower() != 'basic':
 608             URLopener.http_error_default(self, url, fp,
 609                                          errcode, errmsg, headers)
 610         name = 'retry_' + self.type + '_basic_auth'
 611         if data is None:
 612             return getattr(self,name)(url, realm)
 613         else:
 614             return getattr(self,name)(url, realm, data)
 615
 616     def retry_http_basic_auth(self, url, realm, data=None):
 617         host, selector = splithost(url)
 618         i = host.find('@') + 1
 619         host = host[i:]
 620         user, passwd = self.get_user_passwd(host, realm, i)
 621         if not (user or passwd): return None
 622         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 623         newurl = 'http://' + host + selector
 624         if data is None:
 625             return self.open(newurl)
 626         else:
 627             return self.open(newurl, data)
 628
 629     def retry_https_basic_auth(self, url, realm, data=None):
 630         host, selector = splithost(url)
 631         i = host.find('@') + 1
 632         host = host[i:]
 633         user, passwd = self.get_user_passwd(host, realm, i)
 634         if not (user or passwd): return None
 635         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 636         newurl = '//' + host + selector
 637         return self.open_https(newurl, data)
 638
 639     def get_user_passwd(self, host, realm, clear_cache = 0):
 640         key = realm + '@' + host.lower()
 641         if key in self.auth_cache:
 642             if clear_cache:
 643                 del self.auth_cache[key]
 644             else:
 645                 return self.auth_cache[key]
 646         user, passwd = self.prompt_user_passwd(host, realm)
 647         if user or passwd: self.auth_cache[key] = (user, passwd)
 648         return user, passwd
 649
 650     def prompt_user_passwd(self, host, realm):
 651         """Override this in a GUI environment!"""
 652         import getpass
 653         try:
 654             user = raw_input("Enter username for %s at %s: " % (realm,
 655                                                                 host))
 656             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
 657                 (user, realm, host))
 658             return user, passwd
 659         except KeyboardInterrupt:
 660             print
 661             return None, None
 662
 663
 664 # Utility functions
 665
 666 _localhost = None
 667 def localhost():
 668     """Return the IP address of the magic hostname 'localhost'."""
 669     global _localhost
 670     if _localhost is None:
 671         _localhost = socket.gethostbyname('localhost')
 672     return _localhost
 673
 674 _thishost = None
 675 def thishost():
 676     """Return the IP address of the current host."""
 677     global _thishost
 678     if _thishost is None:
 679         _thishost = socket.gethostbyname(socket.gethostname())
 680     return _thishost
 681
 682 _ftperrors = None
 683 def ftperrors():
 684     """Return the set of errors raised by the FTP class."""
 685     global _ftperrors
 686     if _ftperrors is None:
 687         import ftplib
 688         _ftperrors = ftplib.all_errors
 689     return _ftperrors
 690
 691 _noheaders = None
 692 def noheaders():
 693     """Return an empty mimetools.Message object."""
 694     global _noheaders
 695     if _noheaders is None:
 696         import mimetools
 697         import StringIO
 698         _noheaders = mimetools.Message(StringIO.StringIO(), 0)
 699         _noheaders.fp.close()   # Recycle file descriptor
 700     return _noheaders
 701
 702
 703 # Utility classes
 704
 705 class ftpwrapper:
 706     """Class used by open_ftp() for cache of open FTP connections."""
 707
 708     def __init__(self, user, passwd, host, port, dirs):
 709         self.user = user
 710         self.passwd = passwd
 711         self.host = host
 712         self.port = port
 713         self.dirs = dirs
 714         self.init()
 715
 716     def init(self):
 717         import ftplib
 718         self.busy = 0
 719         self.ftp = ftplib.FTP()
 720         self.ftp.connect(self.host, self.port)
 721         self.ftp.login(self.user, self.passwd)
 722         for dir in self.dirs:
 723             self.ftp.cwd(dir)
 724
 725     def retrfile(self, file, type):
 726         import ftplib
 727         self.endtransfer()
 728         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
 729         else: cmd = 'TYPE ' + type; isdir = 0
 730         try:
 731             self.ftp.voidcmd(cmd)
 732         except ftplib.all_errors:
 733             self.init()
 734             self.ftp.voidcmd(cmd)
 735         conn = None
 736         if file and not isdir:
 737             # Use nlst to see if the file exists at all
 738             try:
 739                 self.ftp.nlst(file)
 740             except ftplib.error_perm, reason:
 741                 raise IOError, ('ftp error', reason), sys.exc_info()[2]
 742             # Restore the transfer mode!
 743             self.ftp.voidcmd(cmd)
 744             # Try to retrieve as a file
 745             try:
 746                 cmd = 'RETR ' + file
 747                 conn = self.ftp.ntransfercmd(cmd)
 748             except ftplib.error_perm, reason:
 749                 if str(reason)[:3] != '550':
 750                     raise IOError, ('ftp error', reason), sys.exc_info()[2]
 751         if not conn:
 752             # Set transfer mode to ASCII!
 753             self.ftp.voidcmd('TYPE A')
 754             # Try a directory listing
 755             if file: cmd = 'LIST ' + file
 756             else: cmd = 'LIST'
 757             conn = self.ftp.ntransfercmd(cmd)
 758         self.busy = 1
 759         # Pass back both a suitably decorated object and a retrieval length
 760         return (addclosehook(conn[0].makefile('rb'),
 761                              self.endtransfer), conn[1])
 762     def endtransfer(self):
 763         if not self.busy:
 764             return
 765         self.busy = 0
 766         try:
 767             self.ftp.voidresp()
 768         except ftperrors():
 769             pass
 770
 771     def close(self):
 772         self.endtransfer()
 773         try:
 774             self.ftp.close()
 775         except ftperrors():
 776             pass
 777
 778 class addbase:
 779     """Base class for addinfo and addclosehook."""
 780
 781     def __init__(self, fp):
 782         self.fp = fp
 783         self.read = self.fp.read
 784         self.readline = self.fp.readline
 785         if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
 786         if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
 787         if hasattr(self.fp, "__iter__"):
 788             self.__iter__ = self.fp.__iter__
 789             if hasattr(self.fp, "next"):
 790                 self.next = self.fp.next
 791
 792     def __repr__(self):
 793         return '<%s at %s whose fp = %s>' % (self.__class__.__name__,
 794                                              `id(self)`, `self.fp`)
 795
 796     def close(self):
 797         self.read = None
 798         self.readline = None
 799         self.readlines = None
 800         self.fileno = None
 801         if self.fp: self.fp.close()
 802         self.fp = None
 803
 804 class addclosehook(addbase):
 805     """Class to add a close hook to an open file."""
 806
 807     def __init__(self, fp, closehook, *hookargs):
 808         addbase.__init__(self, fp)
 809         self.closehook = closehook
 810         self.hookargs = hookargs
 811
 812     def close(self):
 813         addbase.close(self)
 814         if self.closehook:
 815             self.closehook(*self.hookargs)
 816             self.closehook = None
 817             self.hookargs = None
 818
 819 class addinfo(addbase):
 820     """class to add an info() method to an open file."""
 821
 822     def __init__(self, fp, headers):
 823         addbase.__init__(self, fp)
 824         self.headers = headers
 825
 826     def info(self):
 827         return self.headers
 828
 829 class addinfourl(addbase):
 830     """class to add info() and geturl() methods to an open file."""
 831
 832     def __init__(self, fp, headers, url):
 833         addbase.__init__(self, fp)
 834         self.headers = headers
 835         self.url = url
 836
 837     def info(self):
 838         return self.headers
 839
 840     def geturl(self):
 841         return self.url
 842
 843
 844 def basejoin(base, url):
 845     """Utility to combine a URL with a base URL to form a new URL."""
 846     type, path = splittype(url)
 847     if type:
 848         # if url is complete (i.e., it contains a type), return it
 849         return url
 850     host, path = splithost(path)
 851     type, basepath = splittype(base) # inherit type from base
 852     if host:
 853         # if url contains host, just inherit type
 854         if type: return type + '://' + host + path
 855         else:
 856             # no type inherited, so url must have started with //
 857             # just return it
 858             return url
 859     host, basepath = splithost(basepath) # inherit host
 860     basepath, basetag = splittag(basepath) # remove extraneous cruft
 861     basepath, basequery = splitquery(basepath) # idem
 862     if path[:1] != '/':
 863         # non-absolute path name
 864         if path[:1] in ('#', '?'):
 865             # path is just a tag or query, attach to basepath
 866             i = len(basepath)
 867         else:
 868             # else replace last component
 869             i = basepath.rfind('/')
 870         if i < 0:
 871             # basepath not absolute
 872             if host:
 873                 # host present, make absolute
 874                 basepath = '/'
 875             else:
 876                 # else keep non-absolute
 877                 basepath = ''
 878         else:
 879             # remove last file component
 880             basepath = basepath[:i+1]
 881         # Interpret ../ (important because of symlinks)
 882         while basepath and path[:3] == '../':
 883             path = path[3:]
 884             i = basepath[:-1].rfind('/')
 885             if i > 0:
 886                 basepath = basepath[:i+1]
 887             elif i == 0:
 888                 basepath = '/'
 889                 break
 890             else:
 891                 basepath = ''
 892
 893         path = basepath + path
 894     if host and path and path[0] != '/':
 895         path = '/' + path
 896     if type and host: return type + '://' + host + path
 897     elif type: return type + ':' + path
 898     elif host: return '//' + host + path # don't know what this means
 899     else: return path
 900
 901
 902 # Utilities to parse URLs (most of these return None for missing parts):
 903 # unwrap('<URL:type://host/path>') --> 'type://host/path'
 904 # splittype('type:opaquestring') --> 'type', 'opaquestring'
 905 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
 906 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
 907 # splitpasswd('user:passwd') -> 'user', 'passwd'
 908 # splitport('host:port') --> 'host', 'port'
 909 # splitquery('/path?query') --> '/path', 'query'
 910 # splittag('/path#tag') --> '/path', 'tag'
 911 # splitattr('/path;attr1=value1;attr2=value2;...') ->
 912 #   '/path', ['attr1=value1', 'attr2=value2', ...]
 913 # splitvalue('attr=value') --> 'attr', 'value'
 914 # splitgophertype('/Xselector') --> 'X', 'selector'
 915 # unquote('abc%20def') -> 'abc def'
 916 # quote('abc def') -> 'abc%20def')
 917
 918 try:
 919     unicode
 920 except NameError:
 921     def _is_unicode(x):
 922         return 0
 923 else:
 924     def _is_unicode(x):
 925         return isinstance(x, unicode)
 926
 927 def toBytes(url):
 928     """toBytes(u"URL") --> 'URL'."""
 929     # Most URL schemes require ASCII. If that changes, the conversion
 930     # can be relaxed
 931     if _is_unicode(url):
 932         try:
 933             url = url.encode("ASCII")
 934         except UnicodeError:
 935             raise UnicodeError("URL " + repr(url) +
 936                                " contains non-ASCII characters")
 937     return url
 938
 939 def unwrap(url):
 940     """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
 941     url = url.strip()
 942     if url[:1] == '<' and url[-1:] == '>':
 943         url = url[1:-1].strip()
 944     if url[:4] == 'URL:': url = url[4:].strip()
 945     return url
 946
 947 _typeprog = None
 948 def splittype(url):
 949     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
 950     global _typeprog
 951     if _typeprog is None:
 952         import re
 953         _typeprog = re.compile('^([^/:]+):')
 954
 955     match = _typeprog.match(url)
 956     if match:
 957         scheme = match.group(1)
 958         return scheme.lower(), url[len(scheme) + 1:]
 959     return None, url
 960
 961 _hostprog = None
 962 def splithost(url):
 963     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
 964     global _hostprog
 965     if _hostprog is None:
 966         import re
 967         _hostprog = re.compile('^//([^/]*)(.*)$')
 968
 969     match = _hostprog.match(url)
 970     if match: return match.group(1, 2)
 971     return None, url
 972
 973 _userprog = None
 974 def splituser(host):
 975     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
 976     global _userprog
 977     if _userprog is None:
 978         import re
 979         _userprog = re.compile('^(.*)@(.*)$')
 980
 981     match = _userprog.match(host)
 982     if match: return map(unquote, match.group(1, 2))
 983     return None, host
 984
 985 _passwdprog = None
 986 def splitpasswd(user):
 987     """splitpasswd('user:passwd') -> 'user', 'passwd'."""
 988     global _passwdprog
 989     if _passwdprog is None:
 990         import re
 991         _passwdprog = re.compile('^([^:]*):(.*)$')
 992
 993     match = _passwdprog.match(user)
 994     if match: return match.group(1, 2)
 995     return user, None
 996
 997 # splittag('/path#tag') --> '/path', 'tag'
 998 _portprog = None
 999 def splitport(host):
1000     """splitport('host:port') --> 'host', 'port'."""
1001     global _portprog
1002     if _portprog is None:
1003         import re
1004         _portprog = re.compile('^(.*):([0-9]+)$')
1005
1006     match = _portprog.match(host)
1007     if match: return match.group(1, 2)
1008     return host, None
1009
1010 _nportprog = None
1011 def splitnport(host, defport=-1):
1012     """Split host and port, returning numeric port.
1013     Return given default port if no ':' found; defaults to -1.
1014     Return numerical port if a valid number are found after ':'.
1015     Return None if ':' but not a valid number."""
1016     global _nportprog
1017     if _nportprog is None:
1018         import re
1019         _nportprog = re.compile('^(.*):(.*)$')
1020
1021     match = _nportprog.match(host)
1022     if match:
1023         host, port = match.group(1, 2)
1024         try:
1025             if not port: raise ValueError, "no digits"
1026             nport = int(port)
1027         except ValueError:
1028             nport = None
1029         return host, nport
1030     return host, defport
1031
1032 _queryprog = None
1033 def splitquery(url):
1034     """splitquery('/path?query') --> '/path', 'query'."""
1035     global _queryprog
1036     if _queryprog is None:
1037         import re
1038         _queryprog = re.compile('^(.*)\?([^?]*)$')
1039
1040     match = _queryprog.match(url)
1041     if match: return match.group(1, 2)
1042     return url, None
1043
1044 _tagprog = None
1045 def splittag(url):
1046     """splittag('/path#tag') --> '/path', 'tag'."""
1047     global _tagprog
1048     if _tagprog is None:
1049         import re
1050         _tagprog = re.compile('^(.*)#([^#]*)$')
1051
1052     match = _tagprog.match(url)
1053     if match: return match.group(1, 2)
1054     return url, None
1055
1056 def splitattr(url):
1057     """splitattr('/path;attr1=value1;attr2=value2;...') ->
1058         '/path', ['attr1=value1', 'attr2=value2', ...]."""
1059     words = url.split(';')
1060     return words[0], words[1:]
1061
1062 _valueprog = None
1063 def splitvalue(attr):
1064     """splitvalue('attr=value') --> 'attr', 'value'."""
1065     global _valueprog
1066     if _valueprog is None:
1067         import re
1068         _valueprog = re.compile('^([^=]*)=(.*)$')
1069
1070     match = _valueprog.match(attr)
1071     if match: return match.group(1, 2)
1072     return attr, None
1073
1074 def splitgophertype(selector):
1075     """splitgophertype('/Xselector') --> 'X', 'selector'."""
1076     if selector[:1] == '/' and selector[1:2]:
1077         return selector[1], selector[2:]
1078     return None, selector
1079
1080 def unquote(s):
1081     """unquote('abc%20def') -> 'abc def'."""
1082     mychr = chr
1083     myatoi = int
1084     list = s.split('%')
1085     res = [list[0]]
1086     myappend = res.append
1087     del list[0]
1088     for item in list:
1089         if item[1:2]:
1090             try:
1091                 myappend(mychr(myatoi(item[:2], 16))
1092                      + item[2:])
1093             except ValueError:
1094                 myappend('%' + item)
1095         else:
1096             myappend('%' + item)
1097     return "".join(res)
1098
1099 def unquote_plus(s):
1100     """unquote('%7e/abc+def') -> '~/abc def'"""
1101     if '+' in s:
1102         # replace '+' with ' '
1103         s = ' '.join(s.split('+'))
1104     return unquote(s)
1105
1106 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1107                'abcdefghijklmnopqrstuvwxyz'
1108                '0123456789' '_.-')
1109
1110 _fast_safe_test = always_safe + '/'
1111 _fast_safe = None
1112
1113 def _fast_quote(s):
1114     global _fast_safe
1115     if _fast_safe is None:
1116         _fast_safe = {}
1117         for c in _fast_safe_test:
1118             _fast_safe[c] = c
1119     res = list(s)
1120     for i in range(len(res)):
1121         c = res[i]
1122         if not c in _fast_safe:
1123             res[i] = '%%%02X' % ord(c)
1124     return ''.join(res)
1125
1126 def quote(s, safe = '/'):
1127     """quote('abc def') -> 'abc%20def'
1128
1129     Each part of a URL, e.g. the path info, the query, etc., has a
1130     different set of reserved characters that must be quoted.
1131
1132     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1133     the following reserved characters.
1134
1135     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1136                   "$" | ","
1137
1138     Each of these characters is reserved in some component of a URL,
1139     but not necessarily in all of them.
1140
1141     By default, the quote function is intended for quoting the path
1142     section of a URL.  Thus, it will not encode '/'.  This character
1143     is reserved, but in typical usage the quote function is being
1144     called on a path where the existing slash characters are used as
1145     reserved characters.
1146     """
1147     safe = always_safe + safe
1148     if _fast_safe_test == safe:
1149         return _fast_quote(s)
1150     res = list(s)
1151     for i in range(len(res)):
1152         c = res[i]
1153         if c not in safe:
1154             res[i] = '%%%02X' % ord(c)
1155     return ''.join(res)
1156
1157 def quote_plus(s, safe = ''):
1158     """Quote the query fragment of a URL; replacing ' ' with '+'"""
1159     if ' ' in s:
1160         l = s.split(' ')
1161         for i in range(len(l)):
1162             l[i] = quote(l[i], safe)
1163         return '+'.join(l)
1164     else:
1165         return quote(s, safe)
1166
1167 def urlencode(query,doseq=0):
1168     """Encode a sequence of two-element tuples or dictionary into a URL query string.
1169
1170     If any values in the query arg are sequences and doseq is true, each
1171     sequence element is converted to a separate parameter.
1172
1173     If the query arg is a sequence of two-element tuples, the order of the
1174     parameters in the output will match the order of parameters in the
1175     input.
1176     """
1177
1178     if hasattr(query,"items"):
1179         # mapping objects
1180         query = query.items()
1181     else:
1182         # it's a bother at times that strings and string-like objects are
1183         # sequences...
1184         try:
1185             # non-sequence items should not work with len()
1186             # non-empty strings will fail this
1187             if len(query) and not isinstance(query[0], tuple):
1188                 raise TypeError
1189             # zero-length sequences of all types will get here and succeed,
1190             # but that's a minor nit - since the original implementation
1191             # allowed empty dicts that type of behavior probably should be
1192             # preserved for consistency
1193         except TypeError:
1194             ty,va,tb = sys.exc_info()
1195             raise TypeError, "not a valid non-string sequence or mapping object", tb
1196
1197     l = []
1198     if not doseq:
1199         # preserve old behavior
1200         for k, v in query:
1201             k = quote_plus(str(k))
1202             v = quote_plus(str(v))
1203             l.append(k + '=' + v)
1204     else:
1205         for k, v in query:
1206             k = quote_plus(str(k))
1207             if isinstance(v, str):
1208                 v = quote_plus(v)
1209                 l.append(k + '=' + v)
1210             elif _is_unicode(v):
1211                 # is there a reasonable way to convert to ASCII?
1212                 # encode generates a string, but "replace" or "ignore"
1213                 # lose information and "strict" can raise UnicodeError
1214                 v = quote_plus(v.encode("ASCII","replace"))
1215                 l.append(k + '=' + v)
1216             else:
1217                 try:
1218                     # is this a sufficient test for sequence-ness?
1219                     x = len(v)
1220                 except TypeError:
1221                     # not a sequence
1222                     v = quote_plus(str(v))
1223                     l.append(k + '=' + v)
1224                 else:
1225                     # loop over the sequence
1226                     for elt in v:
1227                         l.append(k + '=' + quote_plus(str(elt)))
1228     return '&'.join(l)
1229
1230 # Proxy handling
1231 def getproxies_environment():
1232     """Return a dictionary of scheme -> proxy server URL mappings.
1233
1234     Scan the environment for variables named <scheme>_proxy;
1235     this seems to be the standard convention.  If you need a
1236     different way, you can pass a proxies dictionary to the
1237     [Fancy]URLopener constructor.
1238
1239     """
1240     proxies = {}
1241     for name, value in os.environ.items():
1242         name = name.lower()
1243         if value and name[-6:] == '_proxy':
1244             proxies[name[:-6]] = value
1245     return proxies
1246
1247 if os.name == 'mac':
1248     def getproxies():
1249         """Return a dictionary of scheme -> proxy server URL mappings.
1250
1251         By convention the mac uses Internet Config to store
1252         proxies.  An HTTP proxy, for instance, is stored under
1253         the HttpProxy key.
1254
1255         """
1256         try:
1257             import ic
1258         except ImportError:
1259             return {}
1260
1261         try:
1262             config = ic.IC()
1263         except ic.error:
1264             return {}
1265         proxies = {}
1266         # HTTP:
1267         if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
1268             try:
1269                 value = config['HTTPProxyHost']
1270             except ic.error:
1271                 pass
1272             else:
1273                 proxies['http'] = 'http://%s' % value
1274         # FTP: XXXX To be done.
1275         # Gopher: XXXX To be done.
1276         return proxies
1277
1278     def proxy_bypass(x):
1279         return 0
1280
1281 elif os.name == 'nt':
1282     def getproxies_registry():
1283         """Return a dictionary of scheme -> proxy server URL mappings.
1284
1285         Win32 uses the registry to store proxies.
1286
1287         """
1288         proxies = {}
1289         try:
1290             import _winreg
1291         except ImportError:
1292             # Std module, so should be around - but you never know!
1293             return proxies
1294         try:
1295             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1296                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1297             proxyEnable = _winreg.QueryValueEx(internetSettings,
1298                                                'ProxyEnable')[0]
1299             if proxyEnable:
1300                 # Returned as Unicode but problems if not converted to ASCII
1301                 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1302                                                        'ProxyServer')[0])
1303                 if '=' in proxyServer:
1304                     # Per-protocol settings
1305                     for p in proxyServer.split(';'):
1306                         protocol, address = p.split('=', 1)
1307                         # See if address has a type:// prefix
1308                         import re
1309                         if not re.match('^([^/:]+)://', address):
1310                             address = '%s://%s' % (protocol, address)
1311                         proxies[protocol] = address
1312                 else:
1313                     # Use one setting for all protocols
1314                     if proxyServer[:5] == 'http:':
1315                         proxies['http'] = proxyServer
1316                     else:
1317                         proxies['http'] = 'http://%s' % proxyServer
1318                         proxies['ftp'] = 'ftp://%s' % proxyServer
1319             internetSettings.Close()
1320         except (WindowsError, ValueError, TypeError):
1321             # Either registry key not found etc, or the value in an
1322             # unexpected format.
1323             # proxies already set up to be empty so nothing to do
1324             pass
1325         return proxies
1326
1327     def getproxies():
1328         """Return a dictionary of scheme -> proxy server URL mappings.
1329
1330         Returns settings gathered from the environment, if specified,
1331         or the registry.
1332
1333         """
1334         return getproxies_environment() or getproxies_registry()
1335
1336     def proxy_bypass(host):
1337         try:
1338             import _winreg
1339             import re
1340         except ImportError:
1341             # Std modules, so should be around - but you never know!
1342             return 0
1343         try:
1344             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1345                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1346             proxyEnable = _winreg.QueryValueEx(internetSettings,
1347                                                'ProxyEnable')[0]
1348             proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1349                                                      'ProxyOverride')[0])
1350             # ^^^^ Returned as Unicode but problems if not converted to ASCII
1351         except WindowsError:
1352             return 0
1353         if not proxyEnable or not proxyOverride:
1354             return 0
1355         # try to make a host list from name and IP address.
1356         host = [host]
1357         try:
1358             addr = socket.gethostbyname(host[0])
1359             if addr != host:
1360                 host.append(addr)
1361         except socket.error:
1362             pass
1363         # make a check value list from the registry entry: replace the
1364         # '<local>' string by the localhost entry and the corresponding
1365         # canonical entry.
1366         proxyOverride = proxyOverride.split(';')
1367         i = 0
1368         while i < len(proxyOverride):
1369             if proxyOverride[i] == '<local>':
1370                 proxyOverride[i:i+1] = ['localhost',
1371                                         '127.0.0.1',
1372                                         socket.gethostname(),
1373                                         socket.gethostbyname(
1374                                             socket.gethostname())]
1375             i += 1
1376         # print proxyOverride
1377         # now check if we match one of the registry values.
1378         for test in proxyOverride:
1379             test = test.replace(".", r"\.")     # mask dots
1380             test = test.replace("*", r".*")     # change glob sequence
1381             test = test.replace("?", r".")      # change glob char
1382             for val in host:
1383                 # print "%s <--> %s" %( test, val )
1384                 if re.match(test, val, re.I):
1385                     return 1
1386         return 0
1387
1388 else:
1389     # By default use environment variables
1390     getproxies = getproxies_environment
1391
1392     def proxy_bypass(host):
1393         return 0
1394
1395 # Test and time quote() and unquote()
1396 def test1():
1397     s = ''
1398     for i in range(256): s = s + chr(i)
1399     s = s*4
1400     t0 = time.time()
1401     qs = quote(s)
1402     uqs = unquote(qs)
1403     t1 = time.time()
1404     if uqs != s:
1405         print 'Wrong!'
1406     print `s`
1407     print `qs`
1408     print `uqs`
1409     print round(t1 - t0, 3), 'sec'
1410
1411
1412 def reporthook(blocknum, blocksize, totalsize):
1413     # Report during remote transfers
1414     print "Block number: %d, Block size: %d, Total size: %d" % (
1415         blocknum, blocksize, totalsize)
1416
1417 # Test program
1418 def test(args=[]):
1419     if not args:
1420         args = [
1421             '/etc/passwd',
1422             'file:/etc/passwd',
1423             'file://localhost/etc/passwd',
1424             'ftp://ftp.python.org/pub/python/README',
1425 ##          'gopher://gopher.micro.umn.edu/1/',
1426             'http://www.python.org/index.html',
1427             ]
1428         if hasattr(URLopener, "open_https"):
1429             args.append('https://synergy.as.cmu.edu/~geek/')
1430     try:
1431         for url in args:
1432             print '-'*10, url, '-'*10
1433             fn, h = urlretrieve(url, None, reporthook)
1434             print fn
1435             if h:
1436                 print '======'
1437                 for k in h.keys(): print k + ':', h[k]
1438                 print '======'
1439             fp = open(fn, 'rb')
1440             data = fp.read()
1441             del fp
1442             if '\r' in data:
1443                 table = string.maketrans("", "")
1444                 data = data.translate(table, "\r")
1445             print data
1446             fn, h = None, None
1447         print '-'*40
1448     finally:
1449         urlcleanup()
1450
1451 def main():
1452     import getopt, sys
1453     try:
1454         opts, args = getopt.getopt(sys.argv[1:], "th")
1455     except getopt.error, msg:
1456         print msg
1457         print "Use -h for help"
1458         return
1459     t = 0
1460     for o, a in opts:
1461         if o == '-t':
1462             t = t + 1
1463         if o == '-h':
1464             print "Usage: python urllib.py [-t] [url ...]"
1465             print "-t runs self-test;",
1466             print "otherwise, contents of urls are printed"
1467             return
1468     if t:
1469         if t > 1:
1470             test1()
1471         test(args)
1472     else:
1473         if not args:
1474             print "Use -h for help"
1475         for url in args:
1476             print urlopen(url).read(),
1477
1478 # Run test program when run as a script
1479 if __name__ == '__main__':
1480     main()