Lib/urllib.py

   1 """Open an arbitrary URL.
   2
   3 See the following document for more info on URLs:
   4 "Names and Addresses, URIs, URLs, URNs, URCs", at
   5 http://www.w3.org/pub/WWW/Addressing/Overview.html
   6
   7 See also the HTTP spec (from which the error codes are derived):
   8 "HTTP - Hypertext Transfer Protocol", at
   9 http://www.w3.org/pub/WWW/Protocols/
  10
  11 Related standards and specs:
  12 - RFC1808: the "relative URL" spec. (authoritative status)
  13 - RFC1738 - the "URL standard". (authoritative status)
  14 - RFC1630 - the "URI spec". (informational status)
  15
  16 The object returned by URLopener().open(file) will differ per
  17 protocol.  All you know is that is has methods read(), readline(),
  18 readlines(), fileno(), close() and info().  The read*(), fileno()
  19 and close() methods work like those of open files.
  20 The info() method returns a mimetools.Message object which can be
  21 used to query various info about the object, if available.
  22 (mimetools.Message objects are queried with the getheader() method.)
  23 """
  24
  25 import string
  26 import socket
  27 import os
  28 import time
  29 import sys
  30
  31 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
  32            "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
  33            "urlencode", "url2pathname", "pathname2url", "splittag",
  34            "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
  35            "splittype", "splithost", "splituser", "splitpasswd", "splitport",
  36            "splitnport", "splitquery", "splitattr", "splitvalue",
  37            "splitgophertype", "getproxies"]
  38
  39 __version__ = '1.15'    # XXX This version is not always updated :-(
  40
  41 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
  42
  43 # Helper for non-unix systems
  44 if os.name == 'mac':
  45     from macurl2path import url2pathname, pathname2url
  46 elif os.name == 'nt':
  47     from nturl2path import url2pathname, pathname2url
  48 elif os.name == 'riscos':
  49     from rourl2path import url2pathname, pathname2url
  50 else:
  51     def url2pathname(pathname):
  52         return unquote(pathname)
  53     def pathname2url(pathname):
  54         return quote(pathname)
  55
  56 # This really consists of two pieces:
  57 # (1) a class which handles opening of all sorts of URLs
  58 #     (plus assorted utilities etc.)
  59 # (2) a set of functions for parsing URLs
  60 # XXX Should these be separated out into different modules?
  61
  62
  63 # Shortcut for basic usage
  64 _urlopener = None
  65 def urlopen(url, data=None, proxies=None):
  66     """urlopen(url [, data]) -> open file-like object"""
  67     global _urlopener
  68     if proxies is not None:
  69         opener = FancyURLopener(proxies=proxies)
  70     elif not _urlopener:
  71         opener = FancyURLopener()
  72         _urlopener = opener
  73     else:
  74         opener = _urlopener
  75     if data is None:
  76         return opener.open(url)
  77     else:
  78         return opener.open(url, data)
  79 def urlretrieve(url, filename=None, reporthook=None, data=None):
  80     global _urlopener
  81     if not _urlopener:
  82         _urlopener = FancyURLopener()
  83     return _urlopener.retrieve(url, filename, reporthook, data)
  84 def urlcleanup():
  85     if _urlopener:
  86         _urlopener.cleanup()
  87
  88
  89 ftpcache = {}
  90 class URLopener:
  91     """Class to open URLs.
  92     This is a class rather than just a subroutine because we may need
  93     more than one set of global protocol-specific options.
  94     Note -- this is a base class for those who don't want the
  95     automatic handling of errors type 302 (relocated) and 401
  96     (authorization needed)."""
  97
  98     __tempfiles = None
  99
 100     version = "Python-urllib/%s" % __version__
 101
 102     # Constructor
 103     def __init__(self, proxies=None, **x509):
 104         if proxies is None:
 105             proxies = getproxies()
 106         assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
 107         self.proxies = proxies
 108         self.key_file = x509.get('key_file')
 109         self.cert_file = x509.get('cert_file')
 110         self.addheaders = [('User-agent', self.version)]
 111         self.__tempfiles = []
 112         self.__unlink = os.unlink # See cleanup()
 113         self.tempcache = None
 114         # Undocumented feature: if you assign {} to tempcache,
 115         # it is used to cache files retrieved with
 116         # self.retrieve().  This is not enabled by default
 117         # since it does not work for changing documents (and I
 118         # haven't got the logic to check expiration headers
 119         # yet).
 120         self.ftpcache = ftpcache
 121         # Undocumented feature: you can use a different
 122         # ftp cache by assigning to the .ftpcache member;
 123         # in case you want logically independent URL openers
 124         # XXX This is not threadsafe.  Bah.
 125
 126     def __del__(self):
 127         self.close()
 128
 129     def close(self):
 130         self.cleanup()
 131
 132     def cleanup(self):
 133         # This code sometimes runs when the rest of this module
 134         # has already been deleted, so it can't use any globals
 135         # or import anything.
 136         if self.__tempfiles:
 137             for file in self.__tempfiles:
 138                 try:
 139                     self.__unlink(file)
 140                 except OSError:
 141                     pass
 142             del self.__tempfiles[:]
 143         if self.tempcache:
 144             self.tempcache.clear()
 145
 146     def addheader(self, *args):
 147         """Add a header to be used by the HTTP interface only
 148         e.g. u.addheader('Accept', 'sound/basic')"""
 149         self.addheaders.append(args)
 150
 151     # External interface
 152     def open(self, fullurl, data=None):
 153         """Use URLopener().open(file) instead of open(file, 'r')."""
 154         fullurl = unwrap(toBytes(fullurl))
 155         if self.tempcache and fullurl in self.tempcache:
 156             filename, headers = self.tempcache[fullurl]
 157             fp = open(filename, 'rb')
 158             return addinfourl(fp, headers, fullurl)
 159         urltype, url = splittype(fullurl)
 160         if not urltype:
 161             urltype = 'file'
 162         if urltype in self.proxies:
 163             proxy = self.proxies[urltype]
 164             urltype, proxyhost = splittype(proxy)
 165             host, selector = splithost(proxyhost)
 166             url = (host, fullurl) # Signal special case to open_*()
 167         else:
 168             proxy = None
 169         name = 'open_' + urltype
 170         self.type = urltype
 171         if '-' in name:
 172             # replace - with _
 173             name = '_'.join(name.split('-'))
 174         if not hasattr(self, name):
 175             if proxy:
 176                 return self.open_unknown_proxy(proxy, fullurl, data)
 177             else:
 178                 return self.open_unknown(fullurl, data)
 179         try:
 180             if data is None:
 181                 return getattr(self, name)(url)
 182             else:
 183                 return getattr(self, name)(url, data)
 184         except socket.error, msg:
 185             raise IOError, ('socket error', msg), sys.exc_info()[2]
 186
 187     def open_unknown(self, fullurl, data=None):
 188         """Overridable interface to open unknown URL type."""
 189         type, url = splittype(fullurl)
 190         raise IOError, ('url error', 'unknown url type', type)
 191
 192     def open_unknown_proxy(self, proxy, fullurl, data=None):
 193         """Overridable interface to open unknown URL type."""
 194         type, url = splittype(fullurl)
 195         raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
 196
 197     # External interface
 198     def retrieve(self, url, filename=None, reporthook=None, data=None):
 199         """retrieve(url) returns (filename, None) for a local object
 200         or (tempfilename, headers) for a remote object."""
 201         url = unwrap(toBytes(url))
 202         if self.tempcache and url in self.tempcache:
 203             return self.tempcache[url]
 204         type, url1 = splittype(url)
 205         if filename is None and (not type or type == 'file'):
 206             try:
 207                 fp = self.open_local_file(url1)
 208                 hdrs = fp.info()
 209                 del fp
 210                 return url2pathname(splithost(url1)[1]), hdrs
 211             except IOError, msg:
 212                 pass
 213         fp = self.open(url, data)
 214         headers = fp.info()
 215         if filename:
 216             tfp = open(filename, 'wb')
 217         else:
 218             import tempfile
 219             garbage, path = splittype(url)
 220             garbage, path = splithost(path or "")
 221             path, garbage = splitquery(path or "")
 222             path, garbage = splitattr(path or "")
 223             suffix = os.path.splitext(path)[1]
 224             (fd, filename) = tempfile.mkstemp(suffix)
 225             self.__tempfiles.append(filename)
 226             tfp = os.fdopen(fd, 'wb')
 227         result = filename, headers
 228         if self.tempcache is not None:
 229             self.tempcache[url] = result
 230         bs = 1024*8
 231         size = -1
 232         blocknum = 1
 233         if reporthook:
 234             if "content-length" in headers:
 235                 size = int(headers["Content-Length"])
 236             reporthook(0, bs, size)
 237         block = fp.read(bs)
 238         if reporthook:
 239             reporthook(1, bs, size)
 240         while block:
 241             tfp.write(block)
 242             block = fp.read(bs)
 243             blocknum = blocknum + 1
 244             if reporthook:
 245                 reporthook(blocknum, bs, size)
 246         fp.close()
 247         tfp.close()
 248         del fp
 249         del tfp
 250         return result
 251
 252     # Each method named open_<type> knows how to open that type of URL
 253
 254     def open_http(self, url, data=None):
 255         """Use HTTP protocol."""
 256         import httplib
 257         user_passwd = None
 258         if isinstance(url, str):
 259             host, selector = splithost(url)
 260             if host:
 261                 user_passwd, host = splituser(host)
 262                 host = unquote(host)
 263             realhost = host
 264         else:
 265             host, selector = url
 266             urltype, rest = splittype(selector)
 267             url = rest
 268             user_passwd = None
 269             if urltype.lower() != 'http':
 270                 realhost = None
 271             else:
 272                 realhost, rest = splithost(rest)
 273                 if realhost:
 274                     user_passwd, realhost = splituser(realhost)
 275                 if user_passwd:
 276                     selector = "%s://%s%s" % (urltype, realhost, rest)
 277                 if proxy_bypass(realhost):
 278                     host = realhost
 279
 280             #print "proxy via http:", host, selector
 281         if not host: raise IOError, ('http error', 'no host given')
 282         if user_passwd:
 283             import base64
 284             auth = base64.encodestring(user_passwd).strip()
 285         else:
 286             auth = None
 287         h = httplib.HTTP(host)
 288         if data is not None:
 289             h.putrequest('POST', selector)
 290             h.putheader('Content-type', 'application/x-www-form-urlencoded')
 291             h.putheader('Content-length', '%d' % len(data))
 292         else:
 293             h.putrequest('GET', selector)
 294         if auth: h.putheader('Authorization', 'Basic %s' % auth)
 295         if realhost: h.putheader('Host', realhost)
 296         for args in self.addheaders: apply(h.putheader, args)
 297         h.endheaders()
 298         if data is not None:
 299             h.send(data)
 300         errcode, errmsg, headers = h.getreply()
 301         fp = h.getfile()
 302         if errcode == 200:
 303             return addinfourl(fp, headers, "http:" + url)
 304         else:
 305             if data is None:
 306                 return self.http_error(url, fp, errcode, errmsg, headers)
 307             else:
 308                 return self.http_error(url, fp, errcode, errmsg, headers, data)
 309
 310     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
 311         """Handle http errors.
 312         Derived class can override this, or provide specific handlers
 313         named http_error_DDD where DDD is the 3-digit error code."""
 314         # First check if there's a specific handler for this error
 315         name = 'http_error_%d' % errcode
 316         if hasattr(self, name):
 317             method = getattr(self, name)
 318             if data is None:
 319                 result = method(url, fp, errcode, errmsg, headers)
 320             else:
 321                 result = method(url, fp, errcode, errmsg, headers, data)
 322             if result: return result
 323         return self.http_error_default(url, fp, errcode, errmsg, headers)
 324
 325     def http_error_default(self, url, fp, errcode, errmsg, headers):
 326         """Default error handler: close the connection and raise IOError."""
 327         void = fp.read()
 328         fp.close()
 329         raise IOError, ('http error', errcode, errmsg, headers)
 330
 331     if hasattr(socket, "ssl"):
 332         def open_https(self, url, data=None):
 333             """Use HTTPS protocol."""
 334             import httplib
 335             user_passwd = None
 336             if isinstance(url, str):
 337                 host, selector = splithost(url)
 338                 if host:
 339                     user_passwd, host = splituser(host)
 340                     host = unquote(host)
 341                 realhost = host
 342             else:
 343                 host, selector = url
 344                 urltype, rest = splittype(selector)
 345                 url = rest
 346                 user_passwd = None
 347                 if urltype.lower() != 'https':
 348                     realhost = None
 349                 else:
 350                     realhost, rest = splithost(rest)
 351                     if realhost:
 352                         user_passwd, realhost = splituser(realhost)
 353                     if user_passwd:
 354                         selector = "%s://%s%s" % (urltype, realhost, rest)
 355                 #print "proxy via https:", host, selector
 356             if not host: raise IOError, ('https error', 'no host given')
 357             if user_passwd:
 358                 import base64
 359                 auth = base64.encodestring(user_passwd).strip()
 360             else:
 361                 auth = None
 362             h = httplib.HTTPS(host, 0,
 363                               key_file=self.key_file,
 364                               cert_file=self.cert_file)
 365             if data is not None:
 366                 h.putrequest('POST', selector)
 367                 h.putheader('Content-type',
 368                             'application/x-www-form-urlencoded')
 369                 h.putheader('Content-length', '%d' % len(data))
 370             else:
 371                 h.putrequest('GET', selector)
 372             if auth: h.putheader('Authorization: Basic %s' % auth)
 373             if realhost: h.putheader('Host', realhost)
 374             for args in self.addheaders: apply(h.putheader, args)
 375             h.endheaders()
 376             if data is not None:
 377                 h.send(data)
 378             errcode, errmsg, headers = h.getreply()
 379             fp = h.getfile()
 380             if errcode == 200:
 381                 return addinfourl(fp, headers, "https:" + url)
 382             else:
 383                 if data is None:
 384                     return self.http_error(url, fp, errcode, errmsg, headers)
 385                 else:
 386                     return self.http_error(url, fp, errcode, errmsg, headers,
 387                                            data)
 388
 389     def open_gopher(self, url):
 390         """Use Gopher protocol."""
 391         import gopherlib
 392         host, selector = splithost(url)
 393         if not host: raise IOError, ('gopher error', 'no host given')
 394         host = unquote(host)
 395         type, selector = splitgophertype(selector)
 396         selector, query = splitquery(selector)
 397         selector = unquote(selector)
 398         if query:
 399             query = unquote(query)
 400             fp = gopherlib.send_query(selector, query, host)
 401         else:
 402             fp = gopherlib.send_selector(selector, host)
 403         return addinfourl(fp, noheaders(), "gopher:" + url)
 404
 405     def open_file(self, url):
 406         """Use local file or FTP depending on form of URL."""
 407         if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
 408             return self.open_ftp(url)
 409         else:
 410             return self.open_local_file(url)
 411
 412     def open_local_file(self, url):
 413         """Use local file."""
 414         import mimetypes, mimetools, rfc822, StringIO
 415         host, file = splithost(url)
 416         localname = url2pathname(file)
 417         try:
 418             stats = os.stat(localname)
 419         except OSError, e:
 420             raise IOError(e.errno, e.strerror, e.filename)
 421         size = stats.st_size
 422         modified = rfc822.formatdate(stats.st_mtime)
 423         mtype = mimetypes.guess_type(url)[0]
 424         headers = mimetools.Message(StringIO.StringIO(
 425             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
 426             (mtype or 'text/plain', size, modified)))
 427         if not host:
 428             urlfile = file
 429             if file[:1] == '/':
 430                 urlfile = 'file://' + file
 431             return addinfourl(open(localname, 'rb'),
 432                               headers, urlfile)
 433         host, port = splitport(host)
 434         if not port \
 435            and socket.gethostbyname(host) in (localhost(), thishost()):
 436             urlfile = file
 437             if file[:1] == '/':
 438                 urlfile = 'file://' + file
 439             return addinfourl(open(localname, 'rb'),
 440                               headers, urlfile)
 441         raise IOError, ('local file error', 'not on local host')
 442
 443     def open_ftp(self, url):
 444         """Use FTP protocol."""
 445         import mimetypes, mimetools, StringIO
 446         host, path = splithost(url)
 447         if not host: raise IOError, ('ftp error', 'no host given')
 448         host, port = splitport(host)
 449         user, host = splituser(host)
 450         if user: user, passwd = splitpasswd(user)
 451         else: passwd = None
 452         host = unquote(host)
 453         user = unquote(user or '')
 454         passwd = unquote(passwd or '')
 455         host = socket.gethostbyname(host)
 456         if not port:
 457             import ftplib
 458             port = ftplib.FTP_PORT
 459         else:
 460             port = int(port)
 461         path, attrs = splitattr(path)
 462         path = unquote(path)
 463         dirs = path.split('/')
 464         dirs, file = dirs[:-1], dirs[-1]
 465         if dirs and not dirs[0]: dirs = dirs[1:]
 466         if dirs and not dirs[0]: dirs[0] = '/'
 467         key = user, host, port, '/'.join(dirs)
 468         # XXX thread unsafe!
 469         if len(self.ftpcache) > MAXFTPCACHE:
 470             # Prune the cache, rather arbitrarily
 471             for k in self.ftpcache.keys():
 472                 if k != key:
 473                     v = self.ftpcache[k]
 474                     del self.ftpcache[k]
 475                     v.close()
 476         try:
 477             if not key in self.ftpcache:
 478                 self.ftpcache[key] = \
 479                     ftpwrapper(user, passwd, host, port, dirs)
 480             if not file: type = 'D'
 481             else: type = 'I'
 482             for attr in attrs:
 483                 attr, value = splitvalue(attr)
 484                 if attr.lower() == 'type' and \
 485                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
 486                     type = value.upper()
 487             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
 488             mtype = mimetypes.guess_type("ftp:" + url)[0]
 489             headers = ""
 490             if mtype:
 491                 headers += "Content-Type: %s\n" % mtype
 492             if retrlen is not None and retrlen >= 0:
 493                 headers += "Content-Length: %d\n" % retrlen
 494             headers = mimetools.Message(StringIO.StringIO(headers))
 495             return addinfourl(fp, headers, "ftp:" + url)
 496         except ftperrors(), msg:
 497             raise IOError, ('ftp error', msg), sys.exc_info()[2]
 498
 499     def open_data(self, url, data=None):
 500         """Use "data" URL."""
 501         # ignore POSTed data
 502         #
 503         # syntax of data URLs:
 504         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
 505         # mediatype := [ type "/" subtype ] *( ";" parameter )
 506         # data      := *urlchar
 507         # parameter := attribute "=" value
 508         import StringIO, mimetools
 509         try:
 510             [type, data] = url.split(',', 1)
 511         except ValueError:
 512             raise IOError, ('data error', 'bad data URL')
 513         if not type:
 514             type = 'text/plain;charset=US-ASCII'
 515         semi = type.rfind(';')
 516         if semi >= 0 and '=' not in type[semi:]:
 517             encoding = type[semi+1:]
 518             type = type[:semi]
 519         else:
 520             encoding = ''
 521         msg = []
 522         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
 523                                             time.gmtime(time.time())))
 524         msg.append('Content-type: %s' % type)
 525         if encoding == 'base64':
 526             import base64
 527             data = base64.decodestring(data)
 528         else:
 529             data = unquote(data)
 530         msg.append('Content-length: %d' % len(data))
 531         msg.append('')
 532         msg.append(data)
 533         msg = '\n'.join(msg)
 534         f = StringIO.StringIO(msg)
 535         headers = mimetools.Message(f, 0)
 536         f.fileno = None     # needed for addinfourl
 537         return addinfourl(f, headers, url)
 538
 539
 540 class FancyURLopener(URLopener):
 541     """Derived class with handlers for errors we can handle (perhaps)."""
 542
 543     def __init__(self, *args, **kwargs):
 544         apply(URLopener.__init__, (self,) + args, kwargs)
 545         self.auth_cache = {}
 546         self.tries = 0
 547         self.maxtries = 10
 548
 549     def http_error_default(self, url, fp, errcode, errmsg, headers):
 550         """Default error handling -- don't raise an exception."""
 551         return addinfourl(fp, headers, "http:" + url)
 552
 553     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
 554         """Error 302 -- relocated (temporarily)."""
 555         self.tries += 1
 556         if self.maxtries and self.tries >= self.maxtries:
 557             if hasattr(self, "http_error_500"):
 558                 meth = self.http_error_500
 559             else:
 560                 meth = self.http_error_default
 561             self.tries = 0
 562             return meth(url, fp, 500,
 563                         "Internal Server Error: Redirect Recursion", headers)
 564         result = self.redirect_internal(url, fp, errcode, errmsg, headers,
 565                                         data)
 566         self.tries = 0
 567         return result
 568
 569     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
 570         if 'location' in headers:
 571             newurl = headers['location']
 572         elif 'uri' in headers:
 573             newurl = headers['uri']
 574         else:
 575             return
 576         void = fp.read()
 577         fp.close()
 578         # In case the server sent a relative URL, join with original:
 579         newurl = basejoin(self.type + ":" + url, newurl)
 580         if data is None:
 581             return self.open(newurl)
 582         else:
 583             return self.open(newurl, data)
 584
 585     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
 586         """Error 301 -- also relocated (permanently)."""
 587         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 588
 589     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
 590         """Error 401 -- authentication required.
 591         See this URL for a description of the basic authentication scheme:
 592         http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
 593         if not 'www-authenticate' in headers:
 594             URLopener.http_error_default(self, url, fp,
 595                                          errcode, errmsg, headers)
 596         stuff = headers['www-authenticate']
 597         import re
 598         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 599         if not match:
 600             URLopener.http_error_default(self, url, fp,
 601                                          errcode, errmsg, headers)
 602         scheme, realm = match.groups()
 603         if scheme.lower() != 'basic':
 604             URLopener.http_error_default(self, url, fp,
 605                                          errcode, errmsg, headers)
 606         name = 'retry_' + self.type + '_basic_auth'
 607         if data is None:
 608             return getattr(self,name)(url, realm)
 609         else:
 610             return getattr(self,name)(url, realm, data)
 611
 612     def retry_http_basic_auth(self, url, realm, data=None):
 613         host, selector = splithost(url)
 614         i = host.find('@') + 1
 615         host = host[i:]
 616         user, passwd = self.get_user_passwd(host, realm, i)
 617         if not (user or passwd): return None
 618         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 619         newurl = 'http://' + host + selector
 620         if data is None:
 621             return self.open(newurl)
 622         else:
 623             return self.open(newurl, data)
 624
 625     def retry_https_basic_auth(self, url, realm, data=None):
 626         host, selector = splithost(url)
 627         i = host.find('@') + 1
 628         host = host[i:]
 629         user, passwd = self.get_user_passwd(host, realm, i)
 630         if not (user or passwd): return None
 631         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 632         newurl = '//' + host + selector
 633         return self.open_https(newurl, data)
 634
 635     def get_user_passwd(self, host, realm, clear_cache = 0):
 636         key = realm + '@' + host.lower()
 637         if key in self.auth_cache:
 638             if clear_cache:
 639                 del self.auth_cache[key]
 640             else:
 641                 return self.auth_cache[key]
 642         user, passwd = self.prompt_user_passwd(host, realm)
 643         if user or passwd: self.auth_cache[key] = (user, passwd)
 644         return user, passwd
 645
 646     def prompt_user_passwd(self, host, realm):
 647         """Override this in a GUI environment!"""
 648         import getpass
 649         try:
 650             user = raw_input("Enter username for %s at %s: " % (realm,
 651                                                                 host))
 652             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
 653                 (user, realm, host))
 654             return user, passwd
 655         except KeyboardInterrupt:
 656             print
 657             return None, None
 658
 659
 660 # Utility functions
 661
 662 _localhost = None
 663 def localhost():
 664     """Return the IP address of the magic hostname 'localhost'."""
 665     global _localhost
 666     if _localhost is None:
 667         _localhost = socket.gethostbyname('localhost')
 668     return _localhost
 669
 670 _thishost = None
 671 def thishost():
 672     """Return the IP address of the current host."""
 673     global _thishost
 674     if _thishost is None:
 675         _thishost = socket.gethostbyname(socket.gethostname())
 676     return _thishost
 677
 678 _ftperrors = None
 679 def ftperrors():
 680     """Return the set of errors raised by the FTP class."""
 681     global _ftperrors
 682     if _ftperrors is None:
 683         import ftplib
 684         _ftperrors = ftplib.all_errors
 685     return _ftperrors
 686
 687 _noheaders = None
 688 def noheaders():
 689     """Return an empty mimetools.Message object."""
 690     global _noheaders
 691     if _noheaders is None:
 692         import mimetools
 693         import StringIO
 694         _noheaders = mimetools.Message(StringIO.StringIO(), 0)
 695         _noheaders.fp.close()   # Recycle file descriptor
 696     return _noheaders
 697
 698
 699 # Utility classes
 700
 701 class ftpwrapper:
 702     """Class used by open_ftp() for cache of open FTP connections."""
 703
 704     def __init__(self, user, passwd, host, port, dirs):
 705         self.user = user
 706         self.passwd = passwd
 707         self.host = host
 708         self.port = port
 709         self.dirs = dirs
 710         self.init()
 711
 712     def init(self):
 713         import ftplib
 714         self.busy = 0
 715         self.ftp = ftplib.FTP()
 716         self.ftp.connect(self.host, self.port)
 717         self.ftp.login(self.user, self.passwd)
 718         for dir in self.dirs:
 719             self.ftp.cwd(dir)
 720
 721     def retrfile(self, file, type):
 722         import ftplib
 723         self.endtransfer()
 724         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
 725         else: cmd = 'TYPE ' + type; isdir = 0
 726         try:
 727             self.ftp.voidcmd(cmd)
 728         except ftplib.all_errors:
 729             self.init()
 730             self.ftp.voidcmd(cmd)
 731         conn = None
 732         if file and not isdir:
 733             # Use nlst to see if the file exists at all
 734             try:
 735                 self.ftp.nlst(file)
 736             except ftplib.error_perm, reason:
 737                 raise IOError, ('ftp error', reason), sys.exc_info()[2]
 738             # Restore the transfer mode!
 739             self.ftp.voidcmd(cmd)
 740             # Try to retrieve as a file
 741             try:
 742                 cmd = 'RETR ' + file
 743                 conn = self.ftp.ntransfercmd(cmd)
 744             except ftplib.error_perm, reason:
 745                 if str(reason)[:3] != '550':
 746                     raise IOError, ('ftp error', reason), sys.exc_info()[2]
 747         if not conn:
 748             # Set transfer mode to ASCII!
 749             self.ftp.voidcmd('TYPE A')
 750             # Try a directory listing
 751             if file: cmd = 'LIST ' + file
 752             else: cmd = 'LIST'
 753             conn = self.ftp.ntransfercmd(cmd)
 754         self.busy = 1
 755         # Pass back both a suitably decorated object and a retrieval length
 756         return (addclosehook(conn[0].makefile('rb'),
 757                              self.endtransfer), conn[1])
 758     def endtransfer(self):
 759         if not self.busy:
 760             return
 761         self.busy = 0
 762         try:
 763             self.ftp.voidresp()
 764         except ftperrors():
 765             pass
 766
 767     def close(self):
 768         self.endtransfer()
 769         try:
 770             self.ftp.close()
 771         except ftperrors():
 772             pass
 773
 774 class addbase:
 775     """Base class for addinfo and addclosehook."""
 776
 777     def __init__(self, fp):
 778         self.fp = fp
 779         self.read = self.fp.read
 780         self.readline = self.fp.readline
 781         if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
 782         if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
 783
 784     def __repr__(self):
 785         return '<%s at %s whose fp = %s>' % (self.__class__.__name__,
 786                                              `id(self)`, `self.fp`)
 787
 788     def close(self):
 789         self.read = None
 790         self.readline = None
 791         self.readlines = None
 792         self.fileno = None
 793         if self.fp: self.fp.close()
 794         self.fp = None
 795
 796 class addclosehook(addbase):
 797     """Class to add a close hook to an open file."""
 798
 799     def __init__(self, fp, closehook, *hookargs):
 800         addbase.__init__(self, fp)
 801         self.closehook = closehook
 802         self.hookargs = hookargs
 803
 804     def close(self):
 805         addbase.close(self)
 806         if self.closehook:
 807             apply(self.closehook, self.hookargs)
 808             self.closehook = None
 809             self.hookargs = None
 810
 811 class addinfo(addbase):
 812     """class to add an info() method to an open file."""
 813
 814     def __init__(self, fp, headers):
 815         addbase.__init__(self, fp)
 816         self.headers = headers
 817
 818     def info(self):
 819         return self.headers
 820
 821 class addinfourl(addbase):
 822     """class to add info() and geturl() methods to an open file."""
 823
 824     def __init__(self, fp, headers, url):
 825         addbase.__init__(self, fp)
 826         self.headers = headers
 827         self.url = url
 828
 829     def info(self):
 830         return self.headers
 831
 832     def geturl(self):
 833         return self.url
 834
 835
 836 def basejoin(base, url):
 837     """Utility to combine a URL with a base URL to form a new URL."""
 838     type, path = splittype(url)
 839     if type:
 840         # if url is complete (i.e., it contains a type), return it
 841         return url
 842     host, path = splithost(path)
 843     type, basepath = splittype(base) # inherit type from base
 844     if host:
 845         # if url contains host, just inherit type
 846         if type: return type + '://' + host + path
 847         else:
 848             # no type inherited, so url must have started with //
 849             # just return it
 850             return url
 851     host, basepath = splithost(basepath) # inherit host
 852     basepath, basetag = splittag(basepath) # remove extraneous cruft
 853     basepath, basequery = splitquery(basepath) # idem
 854     if path[:1] != '/':
 855         # non-absolute path name
 856         if path[:1] in ('#', '?'):
 857             # path is just a tag or query, attach to basepath
 858             i = len(basepath)
 859         else:
 860             # else replace last component
 861             i = basepath.rfind('/')
 862         if i < 0:
 863             # basepath not absolute
 864             if host:
 865                 # host present, make absolute
 866                 basepath = '/'
 867             else:
 868                 # else keep non-absolute
 869                 basepath = ''
 870         else:
 871             # remove last file component
 872             basepath = basepath[:i+1]
 873         # Interpret ../ (important because of symlinks)
 874         while basepath and path[:3] == '../':
 875             path = path[3:]
 876             i = basepath[:-1].rfind('/')
 877             if i > 0:
 878                 basepath = basepath[:i+1]
 879             elif i == 0:
 880                 basepath = '/'
 881                 break
 882             else:
 883                 basepath = ''
 884
 885         path = basepath + path
 886     if host and path and path[0] != '/':
 887         path = '/' + path
 888     if type and host: return type + '://' + host + path
 889     elif type: return type + ':' + path
 890     elif host: return '//' + host + path # don't know what this means
 891     else: return path
 892
 893
 894 # Utilities to parse URLs (most of these return None for missing parts):
 895 # unwrap('<URL:type://host/path>') --> 'type://host/path'
 896 # splittype('type:opaquestring') --> 'type', 'opaquestring'
 897 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
 898 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
 899 # splitpasswd('user:passwd') -> 'user', 'passwd'
 900 # splitport('host:port') --> 'host', 'port'
 901 # splitquery('/path?query') --> '/path', 'query'
 902 # splittag('/path#tag') --> '/path', 'tag'
 903 # splitattr('/path;attr1=value1;attr2=value2;...') ->
 904 #   '/path', ['attr1=value1', 'attr2=value2', ...]
 905 # splitvalue('attr=value') --> 'attr', 'value'
 906 # splitgophertype('/Xselector') --> 'X', 'selector'
 907 # unquote('abc%20def') -> 'abc def'
 908 # quote('abc def') -> 'abc%20def')
 909
 910 try:
 911     unicode
 912 except NameError:
 913     def _is_unicode(x):
 914         return 0
 915 else:
 916     def _is_unicode(x):
 917         return isinstance(x, unicode)
 918
 919 def toBytes(url):
 920     """toBytes(u"URL") --> 'URL'."""
 921     # Most URL schemes require ASCII. If that changes, the conversion
 922     # can be relaxed
 923     if _is_unicode(url):
 924         try:
 925             url = url.encode("ASCII")
 926         except UnicodeError:
 927             raise UnicodeError("URL " + repr(url) +
 928                                " contains non-ASCII characters")
 929     return url
 930
 931 def unwrap(url):
 932     """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
 933     url = url.strip()
 934     if url[:1] == '<' and url[-1:] == '>':
 935         url = url[1:-1].strip()
 936     if url[:4] == 'URL:': url = url[4:].strip()
 937     return url
 938
 939 _typeprog = None
 940 def splittype(url):
 941     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
 942     global _typeprog
 943     if _typeprog is None:
 944         import re
 945         _typeprog = re.compile('^([^/:]+):')
 946
 947     match = _typeprog.match(url)
 948     if match:
 949         scheme = match.group(1)
 950         return scheme.lower(), url[len(scheme) + 1:]
 951     return None, url
 952
 953 _hostprog = None
 954 def splithost(url):
 955     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
 956     global _hostprog
 957     if _hostprog is None:
 958         import re
 959         _hostprog = re.compile('^//([^/]*)(.*)$')
 960
 961     match = _hostprog.match(url)
 962     if match: return match.group(1, 2)
 963     return None, url
 964
 965 _userprog = None
 966 def splituser(host):
 967     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
 968     global _userprog
 969     if _userprog is None:
 970         import re
 971         _userprog = re.compile('^(.*)@(.*)$')
 972
 973     match = _userprog.match(host)
 974     if match: return map(unquote, match.group(1, 2))
 975     return None, host
 976
 977 _passwdprog = None
 978 def splitpasswd(user):
 979     """splitpasswd('user:passwd') -> 'user', 'passwd'."""
 980     global _passwdprog
 981     if _passwdprog is None:
 982         import re
 983         _passwdprog = re.compile('^([^:]*):(.*)$')
 984
 985     match = _passwdprog.match(user)
 986     if match: return match.group(1, 2)
 987     return user, None
 988
 989 # splittag('/path#tag') --> '/path', 'tag'
 990 _portprog = None
 991 def splitport(host):
 992     """splitport('host:port') --> 'host', 'port'."""
 993     global _portprog
 994     if _portprog is None:
 995         import re
 996         _portprog = re.compile('^(.*):([0-9]+)$')
 997
 998     match = _portprog.match(host)
 999     if match: return match.group(1, 2)
1000     return host, None
1001
1002 _nportprog = None
1003 def splitnport(host, defport=-1):
1004     """Split host and port, returning numeric port.
1005     Return given default port if no ':' found; defaults to -1.
1006     Return numerical port if a valid number are found after ':'.
1007     Return None if ':' but not a valid number."""
1008     global _nportprog
1009     if _nportprog is None:
1010         import re
1011         _nportprog = re.compile('^(.*):(.*)$')
1012
1013     match = _nportprog.match(host)
1014     if match:
1015         host, port = match.group(1, 2)
1016         try:
1017             if not port: raise ValueError, "no digits"
1018             nport = int(port)
1019         except ValueError:
1020             nport = None
1021         return host, nport
1022     return host, defport
1023
1024 _queryprog = None
1025 def splitquery(url):
1026     """splitquery('/path?query') --> '/path', 'query'."""
1027     global _queryprog
1028     if _queryprog is None:
1029         import re
1030         _queryprog = re.compile('^(.*)\?([^?]*)$')
1031
1032     match = _queryprog.match(url)
1033     if match: return match.group(1, 2)
1034     return url, None
1035
1036 _tagprog = None
1037 def splittag(url):
1038     """splittag('/path#tag') --> '/path', 'tag'."""
1039     global _tagprog
1040     if _tagprog is None:
1041         import re
1042         _tagprog = re.compile('^(.*)#([^#]*)$')
1043
1044     match = _tagprog.match(url)
1045     if match: return match.group(1, 2)
1046     return url, None
1047
1048 def splitattr(url):
1049     """splitattr('/path;attr1=value1;attr2=value2;...') ->
1050         '/path', ['attr1=value1', 'attr2=value2', ...]."""
1051     words = url.split(';')
1052     return words[0], words[1:]
1053
1054 _valueprog = None
1055 def splitvalue(attr):
1056     """splitvalue('attr=value') --> 'attr', 'value'."""
1057     global _valueprog
1058     if _valueprog is None:
1059         import re
1060         _valueprog = re.compile('^([^=]*)=(.*)$')
1061
1062     match = _valueprog.match(attr)
1063     if match: return match.group(1, 2)
1064     return attr, None
1065
1066 def splitgophertype(selector):
1067     """splitgophertype('/Xselector') --> 'X', 'selector'."""
1068     if selector[:1] == '/' and selector[1:2]:
1069         return selector[1], selector[2:]
1070     return None, selector
1071
1072 def unquote(s):
1073     """unquote('abc%20def') -> 'abc def'."""
1074     mychr = chr
1075     myatoi = int
1076     list = s.split('%')
1077     res = [list[0]]
1078     myappend = res.append
1079     del list[0]
1080     for item in list:
1081         if item[1:2]:
1082             try:
1083                 myappend(mychr(myatoi(item[:2], 16))
1084                      + item[2:])
1085             except ValueError:
1086                 myappend('%' + item)
1087         else:
1088             myappend('%' + item)
1089     return "".join(res)
1090
1091 def unquote_plus(s):
1092     """unquote('%7e/abc+def') -> '~/abc def'"""
1093     if '+' in s:
1094         # replace '+' with ' '
1095         s = ' '.join(s.split('+'))
1096     return unquote(s)
1097
1098 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1099                'abcdefghijklmnopqrstuvwxyz'
1100                '0123456789' '_.-')
1101
1102 _fast_safe_test = always_safe + '/'
1103 _fast_safe = None
1104
1105 def _fast_quote(s):
1106     global _fast_safe
1107     if _fast_safe is None:
1108         _fast_safe = {}
1109         for c in _fast_safe_test:
1110             _fast_safe[c] = c
1111     res = list(s)
1112     for i in range(len(res)):
1113         c = res[i]
1114         if not c in _fast_safe:
1115             res[i] = '%%%02X' % ord(c)
1116     return ''.join(res)
1117
1118 def quote(s, safe = '/'):
1119     """quote('abc def') -> 'abc%20def'
1120
1121     Each part of a URL, e.g. the path info, the query, etc., has a
1122     different set of reserved characters that must be quoted.
1123
1124     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1125     the following reserved characters.
1126
1127     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1128                   "$" | ","
1129
1130     Each of these characters is reserved in some component of a URL,
1131     but not necessarily in all of them.
1132
1133     By default, the quote function is intended for quoting the path
1134     section of a URL.  Thus, it will not encode '/'.  This character
1135     is reserved, but in typical usage the quote function is being
1136     called on a path where the existing slash characters are used as
1137     reserved characters.
1138     """
1139     safe = always_safe + safe
1140     if _fast_safe_test == safe:
1141         return _fast_quote(s)
1142     res = list(s)
1143     for i in range(len(res)):
1144         c = res[i]
1145         if c not in safe:
1146             res[i] = '%%%02X' % ord(c)
1147     return ''.join(res)
1148
1149 def quote_plus(s, safe = ''):
1150     """Quote the query fragment of a URL; replacing ' ' with '+'"""
1151     if ' ' in s:
1152         l = s.split(' ')
1153         for i in range(len(l)):
1154             l[i] = quote(l[i], safe)
1155         return '+'.join(l)
1156     else:
1157         return quote(s, safe)
1158
1159 def urlencode(query,doseq=0):
1160     """Encode a sequence of two-element tuples or dictionary into a URL query string.
1161
1162     If any values in the query arg are sequences and doseq is true, each
1163     sequence element is converted to a separate parameter.
1164
1165     If the query arg is a sequence of two-element tuples, the order of the
1166     parameters in the output will match the order of parameters in the
1167     input.
1168     """
1169
1170     if hasattr(query,"items"):
1171         # mapping objects
1172         query = query.items()
1173     else:
1174         # it's a bother at times that strings and string-like objects are
1175         # sequences...
1176         try:
1177             # non-sequence items should not work with len()
1178             # non-empty strings will fail this
1179             if len(query) and not isinstance(query[0], tuple):
1180                 raise TypeError
1181             # zero-length sequences of all types will get here and succeed,
1182             # but that's a minor nit - since the original implementation
1183             # allowed empty dicts that type of behavior probably should be
1184             # preserved for consistency
1185         except TypeError:
1186             ty,va,tb = sys.exc_info()
1187             raise TypeError, "not a valid non-string sequence or mapping object", tb
1188
1189     l = []
1190     if not doseq:
1191         # preserve old behavior
1192         for k, v in query:
1193             k = quote_plus(str(k))
1194             v = quote_plus(str(v))
1195             l.append(k + '=' + v)
1196     else:
1197         for k, v in query:
1198             k = quote_plus(str(k))
1199             if isinstance(v, str):
1200                 v = quote_plus(v)
1201                 l.append(k + '=' + v)
1202             elif _is_unicode(v):
1203                 # is there a reasonable way to convert to ASCII?
1204                 # encode generates a string, but "replace" or "ignore"
1205                 # lose information and "strict" can raise UnicodeError
1206                 v = quote_plus(v.encode("ASCII","replace"))
1207                 l.append(k + '=' + v)
1208             else:
1209                 try:
1210                     # is this a sufficient test for sequence-ness?
1211                     x = len(v)
1212                 except TypeError:
1213                     # not a sequence
1214                     v = quote_plus(str(v))
1215                     l.append(k + '=' + v)
1216                 else:
1217                     # loop over the sequence
1218                     for elt in v:
1219                         l.append(k + '=' + quote_plus(str(elt)))
1220     return '&'.join(l)
1221
1222 # Proxy handling
1223 def getproxies_environment():
1224     """Return a dictionary of scheme -> proxy server URL mappings.
1225
1226     Scan the environment for variables named <scheme>_proxy;
1227     this seems to be the standard convention.  If you need a
1228     different way, you can pass a proxies dictionary to the
1229     [Fancy]URLopener constructor.
1230
1231     """
1232     proxies = {}
1233     for name, value in os.environ.items():
1234         name = name.lower()
1235         if value and name[-6:] == '_proxy':
1236             proxies[name[:-6]] = value
1237     return proxies
1238
1239 if os.name == 'mac':
1240     def getproxies():
1241         """Return a dictionary of scheme -> proxy server URL mappings.
1242
1243         By convention the mac uses Internet Config to store
1244         proxies.  An HTTP proxy, for instance, is stored under
1245         the HttpProxy key.
1246
1247         """
1248         try:
1249             import ic
1250         except ImportError:
1251             return {}
1252
1253         try:
1254             config = ic.IC()
1255         except ic.error:
1256             return {}
1257         proxies = {}
1258         # HTTP:
1259         if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
1260             try:
1261                 value = config['HTTPProxyHost']
1262             except ic.error:
1263                 pass
1264             else:
1265                 proxies['http'] = 'http://%s' % value
1266         # FTP: XXXX To be done.
1267         # Gopher: XXXX To be done.
1268         return proxies
1269
1270     def proxy_bypass(x):
1271         return 0
1272
1273 elif os.name == 'nt':
1274     def getproxies_registry():
1275         """Return a dictionary of scheme -> proxy server URL mappings.
1276
1277         Win32 uses the registry to store proxies.
1278
1279         """
1280         proxies = {}
1281         try:
1282             import _winreg
1283         except ImportError:
1284             # Std module, so should be around - but you never know!
1285             return proxies
1286         try:
1287             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1288                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1289             proxyEnable = _winreg.QueryValueEx(internetSettings,
1290                                                'ProxyEnable')[0]
1291             if proxyEnable:
1292                 # Returned as Unicode but problems if not converted to ASCII
1293                 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1294                                                        'ProxyServer')[0])
1295                 if '=' in proxyServer:
1296                     # Per-protocol settings
1297                     for p in proxyServer.split(';'):
1298                         protocol, address = p.split('=', 1)
1299                         # See if address has a type:// prefix
1300                         import re
1301                         if not re.match('^([^/:]+)://', address):
1302                             address = '%s://%s' % (protocol, address)
1303                         proxies[protocol] = address
1304                 else:
1305                     # Use one setting for all protocols
1306                     if proxyServer[:5] == 'http:':
1307                         proxies['http'] = proxyServer
1308                     else:
1309                         proxies['http'] = 'http://%s' % proxyServer
1310                         proxies['ftp'] = 'ftp://%s' % proxyServer
1311             internetSettings.Close()
1312         except (WindowsError, ValueError, TypeError):
1313             # Either registry key not found etc, or the value in an
1314             # unexpected format.
1315             # proxies already set up to be empty so nothing to do
1316             pass
1317         return proxies
1318
1319     def getproxies():
1320         """Return a dictionary of scheme -> proxy server URL mappings.
1321
1322         Returns settings gathered from the environment, if specified,
1323         or the registry.
1324
1325         """
1326         return getproxies_environment() or getproxies_registry()
1327
1328     def proxy_bypass(host):
1329         try:
1330             import _winreg
1331             import re
1332         except ImportError:
1333             # Std modules, so should be around - but you never know!
1334             return 0
1335         try:
1336             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1337                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1338             proxyEnable = _winreg.QueryValueEx(internetSettings,
1339                                                'ProxyEnable')[0]
1340             proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1341                                                      'ProxyOverride')[0])
1342             # ^^^^ Returned as Unicode but problems if not converted to ASCII
1343         except WindowsError:
1344             return 0
1345         if not proxyEnable or not proxyOverride:
1346             return 0
1347         # try to make a host list from name and IP address.
1348         host = [host]
1349         try:
1350             addr = socket.gethostbyname(host[0])
1351             if addr != host:
1352                 host.append(addr)
1353         except socket.error:
1354             pass
1355         # make a check value list from the registry entry: replace the
1356         # '<local>' string by the localhost entry and the corresponding
1357         # canonical entry.
1358         proxyOverride = proxyOverride.split(';')
1359         i = 0
1360         while i < len(proxyOverride):
1361             if proxyOverride[i] == '<local>':
1362                 proxyOverride[i:i+1] = ['localhost',
1363                                         '127.0.0.1',
1364                                         socket.gethostname(),
1365                                         socket.gethostbyname(
1366                                             socket.gethostname())]
1367             i += 1
1368         # print proxyOverride
1369         # now check if we match one of the registry values.
1370         for test in proxyOverride:
1371             test = test.replace(".", r"\.")     # mask dots
1372             test = test.replace("*", r".*")     # change glob sequence
1373             test = test.replace("?", r".")      # change glob char
1374             for val in host:
1375                 # print "%s <--> %s" %( test, val )
1376                 if re.match(test, val, re.I):
1377                     return 1
1378         return 0
1379
1380 else:
1381     # By default use environment variables
1382     getproxies = getproxies_environment
1383
1384     def proxy_bypass(host):
1385         return 0
1386
1387 # Test and time quote() and unquote()
1388 def test1():
1389     s = ''
1390     for i in range(256): s = s + chr(i)
1391     s = s*4
1392     t0 = time.time()
1393     qs = quote(s)
1394     uqs = unquote(qs)
1395     t1 = time.time()
1396     if uqs != s:
1397         print 'Wrong!'
1398     print `s`
1399     print `qs`
1400     print `uqs`
1401     print round(t1 - t0, 3), 'sec'
1402
1403
1404 def reporthook(blocknum, blocksize, totalsize):
1405     # Report during remote transfers
1406     print "Block number: %d, Block size: %d, Total size: %d" % (
1407         blocknum, blocksize, totalsize)
1408
1409 # Test program
1410 def test(args=[]):
1411     if not args:
1412         args = [
1413             '/etc/passwd',
1414             'file:/etc/passwd',
1415             'file://localhost/etc/passwd',
1416             'ftp://ftp.python.org/pub/python/README',
1417 ##          'gopher://gopher.micro.umn.edu/1/',
1418             'http://www.python.org/index.html',
1419             ]
1420         if hasattr(URLopener, "open_https"):
1421             args.append('https://synergy.as.cmu.edu/~geek/')
1422     try:
1423         for url in args:
1424             print '-'*10, url, '-'*10
1425             fn, h = urlretrieve(url, None, reporthook)
1426             print fn
1427             if h:
1428                 print '======'
1429                 for k in h.keys(): print k + ':', h[k]
1430                 print '======'
1431             fp = open(fn, 'rb')
1432             data = fp.read()
1433             del fp
1434             if '\r' in data:
1435                 table = string.maketrans("", "")
1436                 data = data.translate(table, "\r")
1437             print data
1438             fn, h = None, None
1439         print '-'*40
1440     finally:
1441         urlcleanup()
1442
1443 def main():
1444     import getopt, sys
1445     try:
1446         opts, args = getopt.getopt(sys.argv[1:], "th")
1447     except getopt.error, msg:
1448         print msg
1449         print "Use -h for help"
1450         return
1451     t = 0
1452     for o, a in opts:
1453         if o == '-t':
1454             t = t + 1
1455         if o == '-h':
1456             print "Usage: python urllib.py [-t] [url ...]"
1457             print "-t runs self-test;",
1458             print "otherwise, contents of urls are printed"
1459             return
1460     if t:
1461         if t > 1:
1462             test1()
1463         test(args)
1464     else:
1465         if not args:
1466             print "Use -h for help"
1467         for url in args:
1468             print urlopen(url).read(),
1469
1470 # Run test program when run as a script
1471 if __name__ == '__main__':
1472     main()