Lib/urllib.py

   1 """Open an arbitrary URL.
   2
   3 See the following document for more info on URLs:
   4 "Names and Addresses, URIs, URLs, URNs, URCs", at
   5 http://www.w3.org/pub/WWW/Addressing/Overview.html
   6
   7 See also the HTTP spec (from which the error codes are derived):
   8 "HTTP - Hypertext Transfer Protocol", at
   9 http://www.w3.org/pub/WWW/Protocols/
  10
  11 Related standards and specs:
  12 - RFC1808: the "relative URL" spec. (authoritative status)
  13 - RFC1738 - the "URL standard". (authoritative status)
  14 - RFC1630 - the "URI spec". (informational status)
  15
  16 The object returned by URLopener().open(file) will differ per
  17 protocol.  All you know is that is has methods read(), readline(),
  18 readlines(), fileno(), close() and info().  The read*(), fileno()
  19 and close() methods work like those of open files.
  20 The info() method returns a mimetools.Message object which can be
  21 used to query various info about the object, if available.
  22 (mimetools.Message objects are queried with the getheader() method.)
  23 """
  24
  25 import string
  26 import socket
  27 import os
  28 import time
  29 import sys
  30
  31 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
  32            "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
  33            "urlencode", "url2pathname", "pathname2url", "splittag",
  34            "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
  35            "splittype", "splithost", "splituser", "splitpasswd", "splitport",
  36            "splitnport", "splitquery", "splitattr", "splitvalue",
  37            "splitgophertype", "getproxies"]
  38
  39 __version__ = '1.15'    # XXX This version is not always updated :-(
  40
  41 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
  42
  43 # Helper for non-unix systems
  44 if os.name == 'mac':
  45     from macurl2path import url2pathname, pathname2url
  46 elif os.name == 'nt':
  47     from nturl2path import url2pathname, pathname2url
  48 elif os.name == 'riscos':
  49     from rourl2path import url2pathname, pathname2url
  50 else:
  51     def url2pathname(pathname):
  52         return unquote(pathname)
  53     def pathname2url(pathname):
  54         return quote(pathname)
  55
  56 # This really consists of two pieces:
  57 # (1) a class which handles opening of all sorts of URLs
  58 #     (plus assorted utilities etc.)
  59 # (2) a set of functions for parsing URLs
  60 # XXX Should these be separated out into different modules?
  61
  62
  63 # Shortcut for basic usage
  64 _urlopener = None
  65 def urlopen(url, data=None, proxies=None):
  66     """urlopen(url [, data]) -> open file-like object"""
  67     global _urlopener
  68     if proxies is not None:
  69         opener = FancyURLopener(proxies=proxies)
  70     elif not _urlopener:
  71         opener = FancyURLopener()
  72         _urlopener = opener
  73     else:
  74         opener = _urlopener
  75     if data is None:
  76         return opener.open(url)
  77     else:
  78         return opener.open(url, data)
  79 def urlretrieve(url, filename=None, reporthook=None, data=None):
  80     global _urlopener
  81     if not _urlopener:
  82         _urlopener = FancyURLopener()
  83     return _urlopener.retrieve(url, filename, reporthook, data)
  84 def urlcleanup():
  85     if _urlopener:
  86         _urlopener.cleanup()
  87
  88
  89 ftpcache = {}
  90 class URLopener:
  91     """Class to open URLs.
  92     This is a class rather than just a subroutine because we may need
  93     more than one set of global protocol-specific options.
  94     Note -- this is a base class for those who don't want the
  95     automatic handling of errors type 302 (relocated) and 401
  96     (authorization needed)."""
  97
  98     __tempfiles = None
  99
 100     version = "Python-urllib/%s" % __version__
 101
 102     # Constructor
 103     def __init__(self, proxies=None, **x509):
 104         if proxies is None:
 105             proxies = getproxies()
 106         assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
 107         self.proxies = proxies
 108         self.key_file = x509.get('key_file')
 109         self.cert_file = x509.get('cert_file')
 110         self.addheaders = [('User-agent', self.version)]
 111         self.__tempfiles = []
 112         self.__unlink = os.unlink # See cleanup()
 113         self.tempcache = None
 114         # Undocumented feature: if you assign {} to tempcache,
 115         # it is used to cache files retrieved with
 116         # self.retrieve().  This is not enabled by default
 117         # since it does not work for changing documents (and I
 118         # haven't got the logic to check expiration headers
 119         # yet).
 120         self.ftpcache = ftpcache
 121         # Undocumented feature: you can use a different
 122         # ftp cache by assigning to the .ftpcache member;
 123         # in case you want logically independent URL openers
 124         # XXX This is not threadsafe.  Bah.
 125
 126     def __del__(self):
 127         self.close()
 128
 129     def close(self):
 130         self.cleanup()
 131
 132     def cleanup(self):
 133         # This code sometimes runs when the rest of this module
 134         # has already been deleted, so it can't use any globals
 135         # or import anything.
 136         if self.__tempfiles:
 137             for file in self.__tempfiles:
 138                 try:
 139                     self.__unlink(file)
 140                 except OSError:
 141                     pass
 142             del self.__tempfiles[:]
 143         if self.tempcache:
 144             self.tempcache.clear()
 145
 146     def addheader(self, *args):
 147         """Add a header to be used by the HTTP interface only
 148         e.g. u.addheader('Accept', 'sound/basic')"""
 149         self.addheaders.append(args)
 150
 151     # External interface
 152     def open(self, fullurl, data=None):
 153         """Use URLopener().open(file) instead of open(file, 'r')."""
 154         fullurl = unwrap(toBytes(fullurl))
 155         if self.tempcache and fullurl in self.tempcache:
 156             filename, headers = self.tempcache[fullurl]
 157             fp = open(filename, 'rb')
 158             return addinfourl(fp, headers, fullurl)
 159         urltype, url = splittype(fullurl)
 160         if not urltype:
 161             urltype = 'file'
 162         if urltype in self.proxies:
 163             proxy = self.proxies[urltype]
 164             urltype, proxyhost = splittype(proxy)
 165             host, selector = splithost(proxyhost)
 166             url = (host, fullurl) # Signal special case to open_*()
 167         else:
 168             proxy = None
 169         name = 'open_' + urltype
 170         self.type = urltype
 171         if '-' in name:
 172             # replace - with _
 173             name = '_'.join(name.split('-'))
 174         if not hasattr(self, name):
 175             if proxy:
 176                 return self.open_unknown_proxy(proxy, fullurl, data)
 177             else:
 178                 return self.open_unknown(fullurl, data)
 179         try:
 180             if data is None:
 181                 return getattr(self, name)(url)
 182             else:
 183                 return getattr(self, name)(url, data)
 184         except socket.error, msg:
 185             raise IOError, ('socket error', msg), sys.exc_info()[2]
 186
 187     def open_unknown(self, fullurl, data=None):
 188         """Overridable interface to open unknown URL type."""
 189         type, url = splittype(fullurl)
 190         raise IOError, ('url error', 'unknown url type', type)
 191
 192     def open_unknown_proxy(self, proxy, fullurl, data=None):
 193         """Overridable interface to open unknown URL type."""
 194         type, url = splittype(fullurl)
 195         raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
 196
 197     # External interface
 198     def retrieve(self, url, filename=None, reporthook=None, data=None):
 199         """retrieve(url) returns (filename, None) for a local object
 200         or (tempfilename, headers) for a remote object."""
 201         url = unwrap(toBytes(url))
 202         if self.tempcache and url in self.tempcache:
 203             return self.tempcache[url]
 204         type, url1 = splittype(url)
 205         if filename is None and (not type or type == 'file'):
 206             try:
 207                 fp = self.open_local_file(url1)
 208                 hdrs = fp.info()
 209                 del fp
 210                 return url2pathname(splithost(url1)[1]), hdrs
 211             except IOError, msg:
 212                 pass
 213         fp = self.open(url, data)
 214         headers = fp.info()
 215         if filename:
 216             tfp = open(filename, 'wb')
 217         else:
 218             import tempfile
 219             garbage, path = splittype(url)
 220             garbage, path = splithost(path or "")
 221             path, garbage = splitquery(path or "")
 222             path, garbage = splitattr(path or "")
 223             suffix = os.path.splitext(path)[1]
 224             (fd, filename) = tempfile.mkstemp(suffix)
 225             self.__tempfiles.append(filename)
 226             tfp = os.fdopen(fd, 'wb')
 227         result = filename, headers
 228         if self.tempcache is not None:
 229             self.tempcache[url] = result
 230         bs = 1024*8
 231         size = -1
 232         blocknum = 1
 233         if reporthook:
 234             if "content-length" in headers:
 235                 size = int(headers["Content-Length"])
 236             reporthook(0, bs, size)
 237         block = fp.read(bs)
 238         if reporthook:
 239             reporthook(1, bs, size)
 240         while block:
 241             tfp.write(block)
 242             block = fp.read(bs)
 243             blocknum = blocknum + 1
 244             if reporthook:
 245                 reporthook(blocknum, bs, size)
 246         fp.close()
 247         tfp.close()
 248         del fp
 249         del tfp
 250         return result
 251
 252     # Each method named open_<type> knows how to open that type of URL
 253
 254     def open_http(self, url, data=None):
 255         """Use HTTP protocol."""
 256         import httplib
 257         user_passwd = None
 258         if isinstance(url, str):
 259             host, selector = splithost(url)
 260             if host:
 261                 user_passwd, host = splituser(host)
 262                 host = unquote(host)
 263             realhost = host
 264         else:
 265             host, selector = url
 266             urltype, rest = splittype(selector)
 267             url = rest
 268             user_passwd = None
 269             if urltype.lower() != 'http':
 270                 realhost = None
 271             else:
 272                 realhost, rest = splithost(rest)
 273                 if realhost:
 274                     user_passwd, realhost = splituser(realhost)
 275                 if user_passwd:
 276                     selector = "%s://%s%s" % (urltype, realhost, rest)
 277                 if proxy_bypass(realhost):
 278                     host = realhost
 279
 280             #print "proxy via http:", host, selector
 281         if not host: raise IOError, ('http error', 'no host given')
 282         if user_passwd:
 283             import base64
 284             auth = base64.encodestring(user_passwd).strip()
 285         else:
 286             auth = None
 287         h = httplib.HTTP(host)
 288         if data is not None:
 289             h.putrequest('POST', selector)
 290             h.putheader('Content-type', 'application/x-www-form-urlencoded')
 291             h.putheader('Content-length', '%d' % len(data))
 292         else:
 293             h.putrequest('GET', selector)
 294         if auth: h.putheader('Authorization', 'Basic %s' % auth)
 295         if realhost: h.putheader('Host', realhost)
 296         for args in self.addheaders: h.putheader(*args)
 297         h.endheaders()
 298         if data is not None:
 299             h.send(data)
 300         errcode, errmsg, headers = h.getreply()
 301         fp = h.getfile()
 302         if errcode == 200:
 303             return addinfourl(fp, headers, "http:" + url)
 304         else:
 305             if data is None:
 306                 return self.http_error(url, fp, errcode, errmsg, headers)
 307             else:
 308                 return self.http_error(url, fp, errcode, errmsg, headers, data)
 309
 310     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
 311         """Handle http errors.
 312         Derived class can override this, or provide specific handlers
 313         named http_error_DDD where DDD is the 3-digit error code."""
 314         # First check if there's a specific handler for this error
 315         name = 'http_error_%d' % errcode
 316         if hasattr(self, name):
 317             method = getattr(self, name)
 318             if data is None:
 319                 result = method(url, fp, errcode, errmsg, headers)
 320             else:
 321                 result = method(url, fp, errcode, errmsg, headers, data)
 322             if result: return result
 323         return self.http_error_default(url, fp, errcode, errmsg, headers)
 324
 325     def http_error_default(self, url, fp, errcode, errmsg, headers):
 326         """Default error handler: close the connection and raise IOError."""
 327         void = fp.read()
 328         fp.close()
 329         raise IOError, ('http error', errcode, errmsg, headers)
 330
 331     if hasattr(socket, "ssl"):
 332         def open_https(self, url, data=None):
 333             """Use HTTPS protocol."""
 334             import httplib
 335             user_passwd = None
 336             if isinstance(url, str):
 337                 host, selector = splithost(url)
 338                 if host:
 339                     user_passwd, host = splituser(host)
 340                     host = unquote(host)
 341                 realhost = host
 342             else:
 343                 host, selector = url
 344                 urltype, rest = splittype(selector)
 345                 url = rest
 346                 user_passwd = None
 347                 if urltype.lower() != 'https':
 348                     realhost = None
 349                 else:
 350                     realhost, rest = splithost(rest)
 351                     if realhost:
 352                         user_passwd, realhost = splituser(realhost)
 353                     if user_passwd:
 354                         selector = "%s://%s%s" % (urltype, realhost, rest)
 355                 #print "proxy via https:", host, selector
 356             if not host: raise IOError, ('https error', 'no host given')
 357             if user_passwd:
 358                 import base64
 359                 auth = base64.encodestring(user_passwd).strip()
 360             else:
 361                 auth = None
 362             h = httplib.HTTPS(host, 0,
 363                               key_file=self.key_file,
 364                               cert_file=self.cert_file)
 365             if data is not None:
 366                 h.putrequest('POST', selector)
 367                 h.putheader('Content-type',
 368                             'application/x-www-form-urlencoded')
 369                 h.putheader('Content-length', '%d' % len(data))
 370             else:
 371                 h.putrequest('GET', selector)
 372             if auth: h.putheader('Authorization: Basic %s' % auth)
 373             if realhost: h.putheader('Host', realhost)
 374             for args in self.addheaders: h.putheader(*args)
 375             h.endheaders()
 376             if data is not None:
 377                 h.send(data)
 378             errcode, errmsg, headers = h.getreply()
 379             fp = h.getfile()
 380             if errcode == 200:
 381                 return addinfourl(fp, headers, "https:" + url)
 382             else:
 383                 if data is None:
 384                     return self.http_error(url, fp, errcode, errmsg, headers)
 385                 else:
 386                     return self.http_error(url, fp, errcode, errmsg, headers,
 387                                            data)
 388
 389     def open_gopher(self, url):
 390         """Use Gopher protocol."""
 391         import gopherlib
 392         host, selector = splithost(url)
 393         if not host: raise IOError, ('gopher error', 'no host given')
 394         host = unquote(host)
 395         type, selector = splitgophertype(selector)
 396         selector, query = splitquery(selector)
 397         selector = unquote(selector)
 398         if query:
 399             query = unquote(query)
 400             fp = gopherlib.send_query(selector, query, host)
 401         else:
 402             fp = gopherlib.send_selector(selector, host)
 403         return addinfourl(fp, noheaders(), "gopher:" + url)
 404
 405     def open_file(self, url):
 406         """Use local file or FTP depending on form of URL."""
 407         if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
 408             return self.open_ftp(url)
 409         else:
 410             return self.open_local_file(url)
 411
 412     def open_local_file(self, url):
 413         """Use local file."""
 414         import mimetypes, mimetools, rfc822, StringIO
 415         host, file = splithost(url)
 416         localname = url2pathname(file)
 417         try:
 418             stats = os.stat(localname)
 419         except OSError, e:
 420             raise IOError(e.errno, e.strerror, e.filename)
 421         size = stats.st_size
 422         modified = rfc822.formatdate(stats.st_mtime)
 423         mtype = mimetypes.guess_type(url)[0]
 424         headers = mimetools.Message(StringIO.StringIO(
 425             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
 426             (mtype or 'text/plain', size, modified)))
 427         if not host:
 428             urlfile = file
 429             if file[:1] == '/':
 430                 urlfile = 'file://' + file
 431             return addinfourl(open(localname, 'rb'),
 432                               headers, urlfile)
 433         host, port = splitport(host)
 434         if not port \
 435            and socket.gethostbyname(host) in (localhost(), thishost()):
 436             urlfile = file
 437             if file[:1] == '/':
 438                 urlfile = 'file://' + file
 439             return addinfourl(open(localname, 'rb'),
 440                               headers, urlfile)
 441         raise IOError, ('local file error', 'not on local host')
 442
 443     def open_ftp(self, url):
 444         """Use FTP protocol."""
 445         import mimetypes, mimetools, StringIO
 446         host, path = splithost(url)
 447         if not host: raise IOError, ('ftp error', 'no host given')
 448         host, port = splitport(host)
 449         user, host = splituser(host)
 450         if user: user, passwd = splitpasswd(user)
 451         else: passwd = None
 452         host = unquote(host)
 453         user = unquote(user or '')
 454         passwd = unquote(passwd or '')
 455         host = socket.gethostbyname(host)
 456         if not port:
 457             import ftplib
 458             port = ftplib.FTP_PORT
 459         else:
 460             port = int(port)
 461         path, attrs = splitattr(path)
 462         path = unquote(path)
 463         dirs = path.split('/')
 464         dirs, file = dirs[:-1], dirs[-1]
 465         if dirs and not dirs[0]: dirs = dirs[1:]
 466         if dirs and not dirs[0]: dirs[0] = '/'
 467         key = user, host, port, '/'.join(dirs)
 468         # XXX thread unsafe!
 469         if len(self.ftpcache) > MAXFTPCACHE:
 470             # Prune the cache, rather arbitrarily
 471             for k in self.ftpcache.keys():
 472                 if k != key:
 473                     v = self.ftpcache[k]
 474                     del self.ftpcache[k]
 475                     v.close()
 476         try:
 477             if not key in self.ftpcache:
 478                 self.ftpcache[key] = \
 479                     ftpwrapper(user, passwd, host, port, dirs)
 480             if not file: type = 'D'
 481             else: type = 'I'
 482             for attr in attrs:
 483                 attr, value = splitvalue(attr)
 484                 if attr.lower() == 'type' and \
 485                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
 486                     type = value.upper()
 487             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
 488             mtype = mimetypes.guess_type("ftp:" + url)[0]
 489             headers = ""
 490             if mtype:
 491                 headers += "Content-Type: %s\n" % mtype
 492             if retrlen is not None and retrlen >= 0:
 493                 headers += "Content-Length: %d\n" % retrlen
 494             headers = mimetools.Message(StringIO.StringIO(headers))
 495             return addinfourl(fp, headers, "ftp:" + url)
 496         except ftperrors(), msg:
 497             raise IOError, ('ftp error', msg), sys.exc_info()[2]
 498
 499     def open_data(self, url, data=None):
 500         """Use "data" URL."""
 501         # ignore POSTed data
 502         #
 503         # syntax of data URLs:
 504         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
 505         # mediatype := [ type "/" subtype ] *( ";" parameter )
 506         # data      := *urlchar
 507         # parameter := attribute "=" value
 508         import StringIO, mimetools
 509         try:
 510             [type, data] = url.split(',', 1)
 511         except ValueError:
 512             raise IOError, ('data error', 'bad data URL')
 513         if not type:
 514             type = 'text/plain;charset=US-ASCII'
 515         semi = type.rfind(';')
 516         if semi >= 0 and '=' not in type[semi:]:
 517             encoding = type[semi+1:]
 518             type = type[:semi]
 519         else:
 520             encoding = ''
 521         msg = []
 522         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
 523                                             time.gmtime(time.time())))
 524         msg.append('Content-type: %s' % type)
 525         if encoding == 'base64':
 526             import base64
 527             data = base64.decodestring(data)
 528         else:
 529             data = unquote(data)
 530         msg.append('Content-length: %d' % len(data))
 531         msg.append('')
 532         msg.append(data)
 533         msg = '\n'.join(msg)
 534         f = StringIO.StringIO(msg)
 535         headers = mimetools.Message(f, 0)
 536         f.fileno = None     # needed for addinfourl
 537         return addinfourl(f, headers, url)
 538
 539
 540 class FancyURLopener(URLopener):
 541     """Derived class with handlers for errors we can handle (perhaps)."""
 542
 543     def __init__(self, *args, **kwargs):
 544         URLopener.__init__(self, *args, **kwargs)
 545         self.auth_cache = {}
 546         self.tries = 0
 547         self.maxtries = 10
 548
 549     def http_error_default(self, url, fp, errcode, errmsg, headers):
 550         """Default error handling -- don't raise an exception."""
 551         return addinfourl(fp, headers, "http:" + url)
 552
 553     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
 554         """Error 302 -- relocated (temporarily)."""
 555         self.tries += 1
 556         if self.maxtries and self.tries >= self.maxtries:
 557             if hasattr(self, "http_error_500"):
 558                 meth = self.http_error_500
 559             else:
 560                 meth = self.http_error_default
 561             self.tries = 0
 562             return meth(url, fp, 500,
 563                         "Internal Server Error: Redirect Recursion", headers)
 564         result = self.redirect_internal(url, fp, errcode, errmsg, headers,
 565                                         data)
 566         self.tries = 0
 567         return result
 568
 569     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
 570         if 'location' in headers:
 571             newurl = headers['location']
 572         elif 'uri' in headers:
 573             newurl = headers['uri']
 574         else:
 575             return
 576         void = fp.read()
 577         fp.close()
 578         # In case the server sent a relative URL, join with original:
 579         newurl = basejoin(self.type + ":" + url, newurl)
 580         if data is None:
 581             return self.open(newurl)
 582         else:
 583             return self.open(newurl, data)
 584
 585     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
 586         """Error 301 -- also relocated (permanently)."""
 587         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 588
 589     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
 590         """Error 401 -- authentication required.
 591         See this URL for a description of the basic authentication scheme:
 592         http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
 593         if not 'www-authenticate' in headers:
 594             URLopener.http_error_default(self, url, fp,
 595                                          errcode, errmsg, headers)
 596         stuff = headers['www-authenticate']
 597         import re
 598         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 599         if not match:
 600             URLopener.http_error_default(self, url, fp,
 601                                          errcode, errmsg, headers)
 602         scheme, realm = match.groups()
 603         if scheme.lower() != 'basic':
 604             URLopener.http_error_default(self, url, fp,
 605                                          errcode, errmsg, headers)
 606         name = 'retry_' + self.type + '_basic_auth'
 607         if data is None:
 608             return getattr(self,name)(url, realm)
 609         else:
 610             return getattr(self,name)(url, realm, data)
 611
 612     def retry_http_basic_auth(self, url, realm, data=None):
 613         host, selector = splithost(url)
 614         i = host.find('@') + 1
 615         host = host[i:]
 616         user, passwd = self.get_user_passwd(host, realm, i)
 617         if not (user or passwd): return None
 618         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 619         newurl = 'http://' + host + selector
 620         if data is None:
 621             return self.open(newurl)
 622         else:
 623             return self.open(newurl, data)
 624
 625     def retry_https_basic_auth(self, url, realm, data=None):
 626         host, selector = splithost(url)
 627         i = host.find('@') + 1
 628         host = host[i:]
 629         user, passwd = self.get_user_passwd(host, realm, i)
 630         if not (user or passwd): return None
 631         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 632         newurl = '//' + host + selector
 633         return self.open_https(newurl, data)
 634
 635     def get_user_passwd(self, host, realm, clear_cache = 0):
 636         key = realm + '@' + host.lower()
 637         if key in self.auth_cache:
 638             if clear_cache:
 639                 del self.auth_cache[key]
 640             else:
 641                 return self.auth_cache[key]
 642         user, passwd = self.prompt_user_passwd(host, realm)
 643         if user or passwd: self.auth_cache[key] = (user, passwd)
 644         return user, passwd
 645
 646     def prompt_user_passwd(self, host, realm):
 647         """Override this in a GUI environment!"""
 648         import getpass
 649         try:
 650             user = raw_input("Enter username for %s at %s: " % (realm,
 651                                                                 host))
 652             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
 653                 (user, realm, host))
 654             return user, passwd
 655         except KeyboardInterrupt:
 656             print
 657             return None, None
 658
 659
 660 # Utility functions
 661
 662 _localhost = None
 663 def localhost():
 664     """Return the IP address of the magic hostname 'localhost'."""
 665     global _localhost
 666     if _localhost is None:
 667         _localhost = socket.gethostbyname('localhost')
 668     return _localhost
 669
 670 _thishost = None
 671 def thishost():
 672     """Return the IP address of the current host."""
 673     global _thishost
 674     if _thishost is None:
 675         _thishost = socket.gethostbyname(socket.gethostname())
 676     return _thishost
 677
 678 _ftperrors = None
 679 def ftperrors():
 680     """Return the set of errors raised by the FTP class."""
 681     global _ftperrors
 682     if _ftperrors is None:
 683         import ftplib
 684         _ftperrors = ftplib.all_errors
 685     return _ftperrors
 686
 687 _noheaders = None
 688 def noheaders():
 689     """Return an empty mimetools.Message object."""
 690     global _noheaders
 691     if _noheaders is None:
 692         import mimetools
 693         import StringIO
 694         _noheaders = mimetools.Message(StringIO.StringIO(), 0)
 695         _noheaders.fp.close()   # Recycle file descriptor
 696     return _noheaders
 697
 698
 699 # Utility classes
 700
 701 class ftpwrapper:
 702     """Class used by open_ftp() for cache of open FTP connections."""
 703
 704     def __init__(self, user, passwd, host, port, dirs):
 705         self.user = user
 706         self.passwd = passwd
 707         self.host = host
 708         self.port = port
 709         self.dirs = dirs
 710         self.init()
 711
 712     def init(self):
 713         import ftplib
 714         self.busy = 0
 715         self.ftp = ftplib.FTP()
 716         self.ftp.connect(self.host, self.port)
 717         self.ftp.login(self.user, self.passwd)
 718         for dir in self.dirs:
 719             self.ftp.cwd(dir)
 720
 721     def retrfile(self, file, type):
 722         import ftplib
 723         self.endtransfer()
 724         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
 725         else: cmd = 'TYPE ' + type; isdir = 0
 726         try:
 727             self.ftp.voidcmd(cmd)
 728         except ftplib.all_errors:
 729             self.init()
 730             self.ftp.voidcmd(cmd)
 731         conn = None
 732         if file and not isdir:
 733             # Use nlst to see if the file exists at all
 734             try:
 735                 self.ftp.nlst(file)
 736             except ftplib.error_perm, reason:
 737                 raise IOError, ('ftp error', reason), sys.exc_info()[2]
 738             # Restore the transfer mode!
 739             self.ftp.voidcmd(cmd)
 740             # Try to retrieve as a file
 741             try:
 742                 cmd = 'RETR ' + file
 743                 conn = self.ftp.ntransfercmd(cmd)
 744             except ftplib.error_perm, reason:
 745                 if str(reason)[:3] != '550':
 746                     raise IOError, ('ftp error', reason), sys.exc_info()[2]
 747         if not conn:
 748             # Set transfer mode to ASCII!
 749             self.ftp.voidcmd('TYPE A')
 750             # Try a directory listing
 751             if file: cmd = 'LIST ' + file
 752             else: cmd = 'LIST'
 753             conn = self.ftp.ntransfercmd(cmd)
 754         self.busy = 1
 755         # Pass back both a suitably decorated object and a retrieval length
 756         return (addclosehook(conn[0].makefile('rb'),
 757                              self.endtransfer), conn[1])
 758     def endtransfer(self):
 759         if not self.busy:
 760             return
 761         self.busy = 0
 762         try:
 763             self.ftp.voidresp()
 764         except ftperrors():
 765             pass
 766
 767     def close(self):
 768         self.endtransfer()
 769         try:
 770             self.ftp.close()
 771         except ftperrors():
 772             pass
 773
 774 class addbase:
 775     """Base class for addinfo and addclosehook."""
 776
 777     def __init__(self, fp):
 778         self.fp = fp
 779         self.read = self.fp.read
 780         self.readline = self.fp.readline
 781         if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
 782         if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
 783         if hasattr(self.fp, "__iter__"):
 784             self.__iter__ = self.fp.__iter__
 785             if hasattr(self.fp, "next"):
 786                 self.next = self.fp.next
 787
 788     def __repr__(self):
 789         return '<%s at %s whose fp = %s>' % (self.__class__.__name__,
 790                                              `id(self)`, `self.fp`)
 791
 792     def close(self):
 793         self.read = None
 794         self.readline = None
 795         self.readlines = None
 796         self.fileno = None
 797         if self.fp: self.fp.close()
 798         self.fp = None
 799
 800 class addclosehook(addbase):
 801     """Class to add a close hook to an open file."""
 802
 803     def __init__(self, fp, closehook, *hookargs):
 804         addbase.__init__(self, fp)
 805         self.closehook = closehook
 806         self.hookargs = hookargs
 807
 808     def close(self):
 809         addbase.close(self)
 810         if self.closehook:
 811             self.closehook(*self.hookargs)
 812             self.closehook = None
 813             self.hookargs = None
 814
 815 class addinfo(addbase):
 816     """class to add an info() method to an open file."""
 817
 818     def __init__(self, fp, headers):
 819         addbase.__init__(self, fp)
 820         self.headers = headers
 821
 822     def info(self):
 823         return self.headers
 824
 825 class addinfourl(addbase):
 826     """class to add info() and geturl() methods to an open file."""
 827
 828     def __init__(self, fp, headers, url):
 829         addbase.__init__(self, fp)
 830         self.headers = headers
 831         self.url = url
 832
 833     def info(self):
 834         return self.headers
 835
 836     def geturl(self):
 837         return self.url
 838
 839
 840 def basejoin(base, url):
 841     """Utility to combine a URL with a base URL to form a new URL."""
 842     type, path = splittype(url)
 843     if type:
 844         # if url is complete (i.e., it contains a type), return it
 845         return url
 846     host, path = splithost(path)
 847     type, basepath = splittype(base) # inherit type from base
 848     if host:
 849         # if url contains host, just inherit type
 850         if type: return type + '://' + host + path
 851         else:
 852             # no type inherited, so url must have started with //
 853             # just return it
 854             return url
 855     host, basepath = splithost(basepath) # inherit host
 856     basepath, basetag = splittag(basepath) # remove extraneous cruft
 857     basepath, basequery = splitquery(basepath) # idem
 858     if path[:1] != '/':
 859         # non-absolute path name
 860         if path[:1] in ('#', '?'):
 861             # path is just a tag or query, attach to basepath
 862             i = len(basepath)
 863         else:
 864             # else replace last component
 865             i = basepath.rfind('/')
 866         if i < 0:
 867             # basepath not absolute
 868             if host:
 869                 # host present, make absolute
 870                 basepath = '/'
 871             else:
 872                 # else keep non-absolute
 873                 basepath = ''
 874         else:
 875             # remove last file component
 876             basepath = basepath[:i+1]
 877         # Interpret ../ (important because of symlinks)
 878         while basepath and path[:3] == '../':
 879             path = path[3:]
 880             i = basepath[:-1].rfind('/')
 881             if i > 0:
 882                 basepath = basepath[:i+1]
 883             elif i == 0:
 884                 basepath = '/'
 885                 break
 886             else:
 887                 basepath = ''
 888
 889         path = basepath + path
 890     if host and path and path[0] != '/':
 891         path = '/' + path
 892     if type and host: return type + '://' + host + path
 893     elif type: return type + ':' + path
 894     elif host: return '//' + host + path # don't know what this means
 895     else: return path
 896
 897
 898 # Utilities to parse URLs (most of these return None for missing parts):
 899 # unwrap('<URL:type://host/path>') --> 'type://host/path'
 900 # splittype('type:opaquestring') --> 'type', 'opaquestring'
 901 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
 902 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
 903 # splitpasswd('user:passwd') -> 'user', 'passwd'
 904 # splitport('host:port') --> 'host', 'port'
 905 # splitquery('/path?query') --> '/path', 'query'
 906 # splittag('/path#tag') --> '/path', 'tag'
 907 # splitattr('/path;attr1=value1;attr2=value2;...') ->
 908 #   '/path', ['attr1=value1', 'attr2=value2', ...]
 909 # splitvalue('attr=value') --> 'attr', 'value'
 910 # splitgophertype('/Xselector') --> 'X', 'selector'
 911 # unquote('abc%20def') -> 'abc def'
 912 # quote('abc def') -> 'abc%20def')
 913
 914 try:
 915     unicode
 916 except NameError:
 917     def _is_unicode(x):
 918         return 0
 919 else:
 920     def _is_unicode(x):
 921         return isinstance(x, unicode)
 922
 923 def toBytes(url):
 924     """toBytes(u"URL") --> 'URL'."""
 925     # Most URL schemes require ASCII. If that changes, the conversion
 926     # can be relaxed
 927     if _is_unicode(url):
 928         try:
 929             url = url.encode("ASCII")
 930         except UnicodeError:
 931             raise UnicodeError("URL " + repr(url) +
 932                                " contains non-ASCII characters")
 933     return url
 934
 935 def unwrap(url):
 936     """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
 937     url = url.strip()
 938     if url[:1] == '<' and url[-1:] == '>':
 939         url = url[1:-1].strip()
 940     if url[:4] == 'URL:': url = url[4:].strip()
 941     return url
 942
 943 _typeprog = None
 944 def splittype(url):
 945     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
 946     global _typeprog
 947     if _typeprog is None:
 948         import re
 949         _typeprog = re.compile('^([^/:]+):')
 950
 951     match = _typeprog.match(url)
 952     if match:
 953         scheme = match.group(1)
 954         return scheme.lower(), url[len(scheme) + 1:]
 955     return None, url
 956
 957 _hostprog = None
 958 def splithost(url):
 959     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
 960     global _hostprog
 961     if _hostprog is None:
 962         import re
 963         _hostprog = re.compile('^//([^/]*)(.*)$')
 964
 965     match = _hostprog.match(url)
 966     if match: return match.group(1, 2)
 967     return None, url
 968
 969 _userprog = None
 970 def splituser(host):
 971     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
 972     global _userprog
 973     if _userprog is None:
 974         import re
 975         _userprog = re.compile('^(.*)@(.*)$')
 976
 977     match = _userprog.match(host)
 978     if match: return map(unquote, match.group(1, 2))
 979     return None, host
 980
 981 _passwdprog = None
 982 def splitpasswd(user):
 983     """splitpasswd('user:passwd') -> 'user', 'passwd'."""
 984     global _passwdprog
 985     if _passwdprog is None:
 986         import re
 987         _passwdprog = re.compile('^([^:]*):(.*)$')
 988
 989     match = _passwdprog.match(user)
 990     if match: return match.group(1, 2)
 991     return user, None
 992
 993 # splittag('/path#tag') --> '/path', 'tag'
 994 _portprog = None
 995 def splitport(host):
 996     """splitport('host:port') --> 'host', 'port'."""
 997     global _portprog
 998     if _portprog is None:
 999         import re
1000         _portprog = re.compile('^(.*):([0-9]+)$')
1001
1002     match = _portprog.match(host)
1003     if match: return match.group(1, 2)
1004     return host, None
1005
1006 _nportprog = None
1007 def splitnport(host, defport=-1):
1008     """Split host and port, returning numeric port.
1009     Return given default port if no ':' found; defaults to -1.
1010     Return numerical port if a valid number are found after ':'.
1011     Return None if ':' but not a valid number."""
1012     global _nportprog
1013     if _nportprog is None:
1014         import re
1015         _nportprog = re.compile('^(.*):(.*)$')
1016
1017     match = _nportprog.match(host)
1018     if match:
1019         host, port = match.group(1, 2)
1020         try:
1021             if not port: raise ValueError, "no digits"
1022             nport = int(port)
1023         except ValueError:
1024             nport = None
1025         return host, nport
1026     return host, defport
1027
1028 _queryprog = None
1029 def splitquery(url):
1030     """splitquery('/path?query') --> '/path', 'query'."""
1031     global _queryprog
1032     if _queryprog is None:
1033         import re
1034         _queryprog = re.compile('^(.*)\?([^?]*)$')
1035
1036     match = _queryprog.match(url)
1037     if match: return match.group(1, 2)
1038     return url, None
1039
1040 _tagprog = None
1041 def splittag(url):
1042     """splittag('/path#tag') --> '/path', 'tag'."""
1043     global _tagprog
1044     if _tagprog is None:
1045         import re
1046         _tagprog = re.compile('^(.*)#([^#]*)$')
1047
1048     match = _tagprog.match(url)
1049     if match: return match.group(1, 2)
1050     return url, None
1051
1052 def splitattr(url):
1053     """splitattr('/path;attr1=value1;attr2=value2;...') ->
1054         '/path', ['attr1=value1', 'attr2=value2', ...]."""
1055     words = url.split(';')
1056     return words[0], words[1:]
1057
1058 _valueprog = None
1059 def splitvalue(attr):
1060     """splitvalue('attr=value') --> 'attr', 'value'."""
1061     global _valueprog
1062     if _valueprog is None:
1063         import re
1064         _valueprog = re.compile('^([^=]*)=(.*)$')
1065
1066     match = _valueprog.match(attr)
1067     if match: return match.group(1, 2)
1068     return attr, None
1069
1070 def splitgophertype(selector):
1071     """splitgophertype('/Xselector') --> 'X', 'selector'."""
1072     if selector[:1] == '/' and selector[1:2]:
1073         return selector[1], selector[2:]
1074     return None, selector
1075
1076 def unquote(s):
1077     """unquote('abc%20def') -> 'abc def'."""
1078     mychr = chr
1079     myatoi = int
1080     list = s.split('%')
1081     res = [list[0]]
1082     myappend = res.append
1083     del list[0]
1084     for item in list:
1085         if item[1:2]:
1086             try:
1087                 myappend(mychr(myatoi(item[:2], 16))
1088                      + item[2:])
1089             except ValueError:
1090                 myappend('%' + item)
1091         else:
1092             myappend('%' + item)
1093     return "".join(res)
1094
1095 def unquote_plus(s):
1096     """unquote('%7e/abc+def') -> '~/abc def'"""
1097     if '+' in s:
1098         # replace '+' with ' '
1099         s = ' '.join(s.split('+'))
1100     return unquote(s)
1101
1102 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1103                'abcdefghijklmnopqrstuvwxyz'
1104                '0123456789' '_.-')
1105
1106 _fast_safe_test = always_safe + '/'
1107 _fast_safe = None
1108
1109 def _fast_quote(s):
1110     global _fast_safe
1111     if _fast_safe is None:
1112         _fast_safe = {}
1113         for c in _fast_safe_test:
1114             _fast_safe[c] = c
1115     res = list(s)
1116     for i in range(len(res)):
1117         c = res[i]
1118         if not c in _fast_safe:
1119             res[i] = '%%%02X' % ord(c)
1120     return ''.join(res)
1121
1122 def quote(s, safe = '/'):
1123     """quote('abc def') -> 'abc%20def'
1124
1125     Each part of a URL, e.g. the path info, the query, etc., has a
1126     different set of reserved characters that must be quoted.
1127
1128     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1129     the following reserved characters.
1130
1131     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1132                   "$" | ","
1133
1134     Each of these characters is reserved in some component of a URL,
1135     but not necessarily in all of them.
1136
1137     By default, the quote function is intended for quoting the path
1138     section of a URL.  Thus, it will not encode '/'.  This character
1139     is reserved, but in typical usage the quote function is being
1140     called on a path where the existing slash characters are used as
1141     reserved characters.
1142     """
1143     safe = always_safe + safe
1144     if _fast_safe_test == safe:
1145         return _fast_quote(s)
1146     res = list(s)
1147     for i in range(len(res)):
1148         c = res[i]
1149         if c not in safe:
1150             res[i] = '%%%02X' % ord(c)
1151     return ''.join(res)
1152
1153 def quote_plus(s, safe = ''):
1154     """Quote the query fragment of a URL; replacing ' ' with '+'"""
1155     if ' ' in s:
1156         l = s.split(' ')
1157         for i in range(len(l)):
1158             l[i] = quote(l[i], safe)
1159         return '+'.join(l)
1160     else:
1161         return quote(s, safe)
1162
1163 def urlencode(query,doseq=0):
1164     """Encode a sequence of two-element tuples or dictionary into a URL query string.
1165
1166     If any values in the query arg are sequences and doseq is true, each
1167     sequence element is converted to a separate parameter.
1168
1169     If the query arg is a sequence of two-element tuples, the order of the
1170     parameters in the output will match the order of parameters in the
1171     input.
1172     """
1173
1174     if hasattr(query,"items"):
1175         # mapping objects
1176         query = query.items()
1177     else:
1178         # it's a bother at times that strings and string-like objects are
1179         # sequences...
1180         try:
1181             # non-sequence items should not work with len()
1182             # non-empty strings will fail this
1183             if len(query) and not isinstance(query[0], tuple):
1184                 raise TypeError
1185             # zero-length sequences of all types will get here and succeed,
1186             # but that's a minor nit - since the original implementation
1187             # allowed empty dicts that type of behavior probably should be
1188             # preserved for consistency
1189         except TypeError:
1190             ty,va,tb = sys.exc_info()
1191             raise TypeError, "not a valid non-string sequence or mapping object", tb
1192
1193     l = []
1194     if not doseq:
1195         # preserve old behavior
1196         for k, v in query:
1197             k = quote_plus(str(k))
1198             v = quote_plus(str(v))
1199             l.append(k + '=' + v)
1200     else:
1201         for k, v in query:
1202             k = quote_plus(str(k))
1203             if isinstance(v, str):
1204                 v = quote_plus(v)
1205                 l.append(k + '=' + v)
1206             elif _is_unicode(v):
1207                 # is there a reasonable way to convert to ASCII?
1208                 # encode generates a string, but "replace" or "ignore"
1209                 # lose information and "strict" can raise UnicodeError
1210                 v = quote_plus(v.encode("ASCII","replace"))
1211                 l.append(k + '=' + v)
1212             else:
1213                 try:
1214                     # is this a sufficient test for sequence-ness?
1215                     x = len(v)
1216                 except TypeError:
1217                     # not a sequence
1218                     v = quote_plus(str(v))
1219                     l.append(k + '=' + v)
1220                 else:
1221                     # loop over the sequence
1222                     for elt in v:
1223                         l.append(k + '=' + quote_plus(str(elt)))
1224     return '&'.join(l)
1225
1226 # Proxy handling
1227 def getproxies_environment():
1228     """Return a dictionary of scheme -> proxy server URL mappings.
1229
1230     Scan the environment for variables named <scheme>_proxy;
1231     this seems to be the standard convention.  If you need a
1232     different way, you can pass a proxies dictionary to the
1233     [Fancy]URLopener constructor.
1234
1235     """
1236     proxies = {}
1237     for name, value in os.environ.items():
1238         name = name.lower()
1239         if value and name[-6:] == '_proxy':
1240             proxies[name[:-6]] = value
1241     return proxies
1242
1243 if os.name == 'mac':
1244     def getproxies():
1245         """Return a dictionary of scheme -> proxy server URL mappings.
1246
1247         By convention the mac uses Internet Config to store
1248         proxies.  An HTTP proxy, for instance, is stored under
1249         the HttpProxy key.
1250
1251         """
1252         try:
1253             import ic
1254         except ImportError:
1255             return {}
1256
1257         try:
1258             config = ic.IC()
1259         except ic.error:
1260             return {}
1261         proxies = {}
1262         # HTTP:
1263         if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
1264             try:
1265                 value = config['HTTPProxyHost']
1266             except ic.error:
1267                 pass
1268             else:
1269                 proxies['http'] = 'http://%s' % value
1270         # FTP: XXXX To be done.
1271         # Gopher: XXXX To be done.
1272         return proxies
1273
1274     def proxy_bypass(x):
1275         return 0
1276
1277 elif os.name == 'nt':
1278     def getproxies_registry():
1279         """Return a dictionary of scheme -> proxy server URL mappings.
1280
1281         Win32 uses the registry to store proxies.
1282
1283         """
1284         proxies = {}
1285         try:
1286             import _winreg
1287         except ImportError:
1288             # Std module, so should be around - but you never know!
1289             return proxies
1290         try:
1291             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1292                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1293             proxyEnable = _winreg.QueryValueEx(internetSettings,
1294                                                'ProxyEnable')[0]
1295             if proxyEnable:
1296                 # Returned as Unicode but problems if not converted to ASCII
1297                 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1298                                                        'ProxyServer')[0])
1299                 if '=' in proxyServer:
1300                     # Per-protocol settings
1301                     for p in proxyServer.split(';'):
1302                         protocol, address = p.split('=', 1)
1303                         # See if address has a type:// prefix
1304                         import re
1305                         if not re.match('^([^/:]+)://', address):
1306                             address = '%s://%s' % (protocol, address)
1307                         proxies[protocol] = address
1308                 else:
1309                     # Use one setting for all protocols
1310                     if proxyServer[:5] == 'http:':
1311                         proxies['http'] = proxyServer
1312                     else:
1313                         proxies['http'] = 'http://%s' % proxyServer
1314                         proxies['ftp'] = 'ftp://%s' % proxyServer
1315             internetSettings.Close()
1316         except (WindowsError, ValueError, TypeError):
1317             # Either registry key not found etc, or the value in an
1318             # unexpected format.
1319             # proxies already set up to be empty so nothing to do
1320             pass
1321         return proxies
1322
1323     def getproxies():
1324         """Return a dictionary of scheme -> proxy server URL mappings.
1325
1326         Returns settings gathered from the environment, if specified,
1327         or the registry.
1328
1329         """
1330         return getproxies_environment() or getproxies_registry()
1331
1332     def proxy_bypass(host):
1333         try:
1334             import _winreg
1335             import re
1336         except ImportError:
1337             # Std modules, so should be around - but you never know!
1338             return 0
1339         try:
1340             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1341                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1342             proxyEnable = _winreg.QueryValueEx(internetSettings,
1343                                                'ProxyEnable')[0]
1344             proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1345                                                      'ProxyOverride')[0])
1346             # ^^^^ Returned as Unicode but problems if not converted to ASCII
1347         except WindowsError:
1348             return 0
1349         if not proxyEnable or not proxyOverride:
1350             return 0
1351         # try to make a host list from name and IP address.
1352         host = [host]
1353         try:
1354             addr = socket.gethostbyname(host[0])
1355             if addr != host:
1356                 host.append(addr)
1357         except socket.error:
1358             pass
1359         # make a check value list from the registry entry: replace the
1360         # '<local>' string by the localhost entry and the corresponding
1361         # canonical entry.
1362         proxyOverride = proxyOverride.split(';')
1363         i = 0
1364         while i < len(proxyOverride):
1365             if proxyOverride[i] == '<local>':
1366                 proxyOverride[i:i+1] = ['localhost',
1367                                         '127.0.0.1',
1368                                         socket.gethostname(),
1369                                         socket.gethostbyname(
1370                                             socket.gethostname())]
1371             i += 1
1372         # print proxyOverride
1373         # now check if we match one of the registry values.
1374         for test in proxyOverride:
1375             test = test.replace(".", r"\.")     # mask dots
1376             test = test.replace("*", r".*")     # change glob sequence
1377             test = test.replace("?", r".")      # change glob char
1378             for val in host:
1379                 # print "%s <--> %s" %( test, val )
1380                 if re.match(test, val, re.I):
1381                     return 1
1382         return 0
1383
1384 else:
1385     # By default use environment variables
1386     getproxies = getproxies_environment
1387
1388     def proxy_bypass(host):
1389         return 0
1390
1391 # Test and time quote() and unquote()
1392 def test1():
1393     s = ''
1394     for i in range(256): s = s + chr(i)
1395     s = s*4
1396     t0 = time.time()
1397     qs = quote(s)
1398     uqs = unquote(qs)
1399     t1 = time.time()
1400     if uqs != s:
1401         print 'Wrong!'
1402     print `s`
1403     print `qs`
1404     print `uqs`
1405     print round(t1 - t0, 3), 'sec'
1406
1407
1408 def reporthook(blocknum, blocksize, totalsize):
1409     # Report during remote transfers
1410     print "Block number: %d, Block size: %d, Total size: %d" % (
1411         blocknum, blocksize, totalsize)
1412
1413 # Test program
1414 def test(args=[]):
1415     if not args:
1416         args = [
1417             '/etc/passwd',
1418             'file:/etc/passwd',
1419             'file://localhost/etc/passwd',
1420             'ftp://ftp.python.org/pub/python/README',
1421 ##          'gopher://gopher.micro.umn.edu/1/',
1422             'http://www.python.org/index.html',
1423             ]
1424         if hasattr(URLopener, "open_https"):
1425             args.append('https://synergy.as.cmu.edu/~geek/')
1426     try:
1427         for url in args:
1428             print '-'*10, url, '-'*10
1429             fn, h = urlretrieve(url, None, reporthook)
1430             print fn
1431             if h:
1432                 print '======'
1433                 for k in h.keys(): print k + ':', h[k]
1434                 print '======'
1435             fp = open(fn, 'rb')
1436             data = fp.read()
1437             del fp
1438             if '\r' in data:
1439                 table = string.maketrans("", "")
1440                 data = data.translate(table, "\r")
1441             print data
1442             fn, h = None, None
1443         print '-'*40
1444     finally:
1445         urlcleanup()
1446
1447 def main():
1448     import getopt, sys
1449     try:
1450         opts, args = getopt.getopt(sys.argv[1:], "th")
1451     except getopt.error, msg:
1452         print msg
1453         print "Use -h for help"
1454         return
1455     t = 0
1456     for o, a in opts:
1457         if o == '-t':
1458             t = t + 1
1459         if o == '-h':
1460             print "Usage: python urllib.py [-t] [url ...]"
1461             print "-t runs self-test;",
1462             print "otherwise, contents of urls are printed"
1463             return
1464     if t:
1465         if t > 1:
1466             test1()
1467         test(args)
1468     else:
1469         if not args:
1470             print "Use -h for help"
1471         for url in args:
1472             print urlopen(url).read(),
1473
1474 # Run test program when run as a script
1475 if __name__ == '__main__':
1476     main()