Lib/urllib.py

   1 """Open an arbitrary URL.
   2
   3 See the following document for more info on URLs:
   4 "Names and Addresses, URIs, URLs, URNs, URCs", at
   5 http://www.w3.org/pub/WWW/Addressing/Overview.html
   6
   7 See also the HTTP spec (from which the error codes are derived):
   8 "HTTP - Hypertext Transfer Protocol", at
   9 http://www.w3.org/pub/WWW/Protocols/
  10
  11 Related standards and specs:
  12 - RFC1808: the "relative URL" spec. (authoritative status)
  13 - RFC1738 - the "URL standard". (authoritative status)
  14 - RFC1630 - the "URI spec". (informational status)
  15
  16 The object returned by URLopener().open(file) will differ per
  17 protocol.  All you know is that is has methods read(), readline(),
  18 readlines(), fileno(), close() and info().  The read*(), fileno()
  19 and close() methods work like those of open files.
  20 The info() method returns a mimetools.Message object which can be
  21 used to query various info about the object, if available.
  22 (mimetools.Message objects are queried with the getheader() method.)
  23 """
  24
  25 import string
  26 import socket
  27 import os
  28 import time
  29 import sys
  30
  31 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
  32            "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
  33            "urlencode", "url2pathname", "pathname2url", "splittag",
  34            "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
  35            "splittype", "splithost", "splituser", "splitpasswd", "splitport",
  36            "splitnport", "splitquery", "splitattr", "splitvalue",
  37            "splitgophertype", "getproxies"]
  38
  39 __version__ = '1.15'    # XXX This version is not always updated :-(
  40
  41 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
  42
  43 # Helper for non-unix systems
  44 if os.name == 'mac':
  45     from macurl2path import url2pathname, pathname2url
  46 elif os.name == 'nt':
  47     from nturl2path import url2pathname, pathname2url
  48 elif os.name == 'riscos':
  49     from rourl2path import url2pathname, pathname2url
  50 else:
  51     def url2pathname(pathname):
  52         return unquote(pathname)
  53     def pathname2url(pathname):
  54         return quote(pathname)
  55
  56 # This really consists of two pieces:
  57 # (1) a class which handles opening of all sorts of URLs
  58 #     (plus assorted utilities etc.)
  59 # (2) a set of functions for parsing URLs
  60 # XXX Should these be separated out into different modules?
  61
  62
  63 # Shortcut for basic usage
  64 _urlopener = None
  65 def urlopen(url, data=None, proxies=None):
  66     """urlopen(url [, data]) -> open file-like object"""
  67     global _urlopener
  68     if proxies is not None:
  69         opener = FancyURLopener(proxies=proxies)
  70     elif not _urlopener:
  71         opener = FancyURLopener()
  72         _urlopener = opener
  73     else:
  74         opener = _urlopener
  75     if data is None:
  76         return opener.open(url)
  77     else:
  78         return opener.open(url, data)
  79 def urlretrieve(url, filename=None, reporthook=None, data=None):
  80     global _urlopener
  81     if not _urlopener:
  82         _urlopener = FancyURLopener()
  83     return _urlopener.retrieve(url, filename, reporthook, data)
  84 def urlcleanup():
  85     if _urlopener:
  86         _urlopener.cleanup()
  87
  88
  89 ftpcache = {}
  90 class URLopener:
  91     """Class to open URLs.
  92     This is a class rather than just a subroutine because we may need
  93     more than one set of global protocol-specific options.
  94     Note -- this is a base class for those who don't want the
  95     automatic handling of errors type 302 (relocated) and 401
  96     (authorization needed)."""
  97
  98     __tempfiles = None
  99
 100     version = "Python-urllib/%s" % __version__
 101
 102     # Constructor
 103     def __init__(self, proxies=None, **x509):
 104         if proxies is None:
 105             proxies = getproxies()
 106         assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
 107         self.proxies = proxies
 108         self.key_file = x509.get('key_file')
 109         self.cert_file = x509.get('cert_file')
 110         self.addheaders = [('User-agent', self.version)]
 111         self.__tempfiles = []
 112         self.__unlink = os.unlink # See cleanup()
 113         self.tempcache = None
 114         # Undocumented feature: if you assign {} to tempcache,
 115         # it is used to cache files retrieved with
 116         # self.retrieve().  This is not enabled by default
 117         # since it does not work for changing documents (and I
 118         # haven't got the logic to check expiration headers
 119         # yet).
 120         self.ftpcache = ftpcache
 121         # Undocumented feature: you can use a different
 122         # ftp cache by assigning to the .ftpcache member;
 123         # in case you want logically independent URL openers
 124         # XXX This is not threadsafe.  Bah.
 125
 126     def __del__(self):
 127         self.close()
 128
 129     def close(self):
 130         self.cleanup()
 131
 132     def cleanup(self):
 133         # This code sometimes runs when the rest of this module
 134         # has already been deleted, so it can't use any globals
 135         # or import anything.
 136         if self.__tempfiles:
 137             for file in self.__tempfiles:
 138                 try:
 139                     self.__unlink(file)
 140                 except OSError:
 141                     pass
 142             del self.__tempfiles[:]
 143         if self.tempcache:
 144             self.tempcache.clear()
 145
 146     def addheader(self, *args):
 147         """Add a header to be used by the HTTP interface only
 148         e.g. u.addheader('Accept', 'sound/basic')"""
 149         self.addheaders.append(args)
 150
 151     # External interface
 152     def open(self, fullurl, data=None):
 153         """Use URLopener().open(file) instead of open(file, 'r')."""
 154         fullurl = unwrap(toBytes(fullurl))
 155         if self.tempcache and fullurl in self.tempcache:
 156             filename, headers = self.tempcache[fullurl]
 157             fp = open(filename, 'rb')
 158             return addinfourl(fp, headers, fullurl)
 159         urltype, url = splittype(fullurl)
 160         if not urltype:
 161             urltype = 'file'
 162         if urltype in self.proxies:
 163             proxy = self.proxies[urltype]
 164             urltype, proxyhost = splittype(proxy)
 165             host, selector = splithost(proxyhost)
 166             url = (host, fullurl) # Signal special case to open_*()
 167         else:
 168             proxy = None
 169         name = 'open_' + urltype
 170         self.type = urltype
 171         if '-' in name:
 172             # replace - with _
 173             name = '_'.join(name.split('-'))
 174         if not hasattr(self, name):
 175             if proxy:
 176                 return self.open_unknown_proxy(proxy, fullurl, data)
 177             else:
 178                 return self.open_unknown(fullurl, data)
 179         try:
 180             if data is None:
 181                 return getattr(self, name)(url)
 182             else:
 183                 return getattr(self, name)(url, data)
 184         except socket.error, msg:
 185             raise IOError, ('socket error', msg), sys.exc_info()[2]
 186
 187     def open_unknown(self, fullurl, data=None):
 188         """Overridable interface to open unknown URL type."""
 189         type, url = splittype(fullurl)
 190         raise IOError, ('url error', 'unknown url type', type)
 191
 192     def open_unknown_proxy(self, proxy, fullurl, data=None):
 193         """Overridable interface to open unknown URL type."""
 194         type, url = splittype(fullurl)
 195         raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
 196
 197     # External interface
 198     def retrieve(self, url, filename=None, reporthook=None, data=None):
 199         """retrieve(url) returns (filename, None) for a local object
 200         or (tempfilename, headers) for a remote object."""
 201         url = unwrap(toBytes(url))
 202         if self.tempcache and url in self.tempcache:
 203             return self.tempcache[url]
 204         type, url1 = splittype(url)
 205         if filename is None and (not type or type == 'file'):
 206             try:
 207                 fp = self.open_local_file(url1)
 208                 hdrs = fp.info()
 209                 del fp
 210                 return url2pathname(splithost(url1)[1]), hdrs
 211             except IOError, msg:
 212                 pass
 213         fp = self.open(url, data)
 214         headers = fp.info()
 215         if not filename:
 216             import tempfile
 217             garbage, path = splittype(url)
 218             garbage, path = splithost(path or "")
 219             path, garbage = splitquery(path or "")
 220             path, garbage = splitattr(path or "")
 221             suffix = os.path.splitext(path)[1]
 222             filename = tempfile.mktemp(suffix)
 223             self.__tempfiles.append(filename)
 224         result = filename, headers
 225         if self.tempcache is not None:
 226             self.tempcache[url] = result
 227         tfp = open(filename, 'wb')
 228         bs = 1024*8
 229         size = -1
 230         blocknum = 1
 231         if reporthook:
 232             if "content-length" in headers:
 233                 size = int(headers["Content-Length"])
 234             reporthook(0, bs, size)
 235         block = fp.read(bs)
 236         if reporthook:
 237             reporthook(1, bs, size)
 238         while block:
 239             tfp.write(block)
 240             block = fp.read(bs)
 241             blocknum = blocknum + 1
 242             if reporthook:
 243                 reporthook(blocknum, bs, size)
 244         fp.close()
 245         tfp.close()
 246         del fp
 247         del tfp
 248         return result
 249
 250     # Each method named open_<type> knows how to open that type of URL
 251
 252     def open_http(self, url, data=None):
 253         """Use HTTP protocol."""
 254         import httplib
 255         user_passwd = None
 256         if isinstance(url, str):
 257             host, selector = splithost(url)
 258             if host:
 259                 user_passwd, host = splituser(host)
 260                 host = unquote(host)
 261             realhost = host
 262         else:
 263             host, selector = url
 264             urltype, rest = splittype(selector)
 265             url = rest
 266             user_passwd = None
 267             if urltype.lower() != 'http':
 268                 realhost = None
 269             else:
 270                 realhost, rest = splithost(rest)
 271                 if realhost:
 272                     user_passwd, realhost = splituser(realhost)
 273                 if user_passwd:
 274                     selector = "%s://%s%s" % (urltype, realhost, rest)
 275                 if proxy_bypass(realhost):
 276                     host = realhost
 277
 278             #print "proxy via http:", host, selector
 279         if not host: raise IOError, ('http error', 'no host given')
 280         if user_passwd:
 281             import base64
 282             auth = base64.encodestring(user_passwd).strip()
 283         else:
 284             auth = None
 285         h = httplib.HTTP(host)
 286         if data is not None:
 287             h.putrequest('POST', selector)
 288             h.putheader('Content-type', 'application/x-www-form-urlencoded')
 289             h.putheader('Content-length', '%d' % len(data))
 290         else:
 291             h.putrequest('GET', selector)
 292         if auth: h.putheader('Authorization', 'Basic %s' % auth)
 293         if realhost: h.putheader('Host', realhost)
 294         for args in self.addheaders: apply(h.putheader, args)
 295         h.endheaders()
 296         if data is not None:
 297             h.send(data)
 298         errcode, errmsg, headers = h.getreply()
 299         fp = h.getfile()
 300         if errcode == 200:
 301             return addinfourl(fp, headers, "http:" + url)
 302         else:
 303             if data is None:
 304                 return self.http_error(url, fp, errcode, errmsg, headers)
 305             else:
 306                 return self.http_error(url, fp, errcode, errmsg, headers, data)
 307
 308     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
 309         """Handle http errors.
 310         Derived class can override this, or provide specific handlers
 311         named http_error_DDD where DDD is the 3-digit error code."""
 312         # First check if there's a specific handler for this error
 313         name = 'http_error_%d' % errcode
 314         if hasattr(self, name):
 315             method = getattr(self, name)
 316             if data is None:
 317                 result = method(url, fp, errcode, errmsg, headers)
 318             else:
 319                 result = method(url, fp, errcode, errmsg, headers, data)
 320             if result: return result
 321         return self.http_error_default(url, fp, errcode, errmsg, headers)
 322
 323     def http_error_default(self, url, fp, errcode, errmsg, headers):
 324         """Default error handler: close the connection and raise IOError."""
 325         void = fp.read()
 326         fp.close()
 327         raise IOError, ('http error', errcode, errmsg, headers)
 328
 329     if hasattr(socket, "ssl"):
 330         def open_https(self, url, data=None):
 331             """Use HTTPS protocol."""
 332             import httplib
 333             user_passwd = None
 334             if isinstance(url, str):
 335                 host, selector = splithost(url)
 336                 if host:
 337                     user_passwd, host = splituser(host)
 338                     host = unquote(host)
 339                 realhost = host
 340             else:
 341                 host, selector = url
 342                 urltype, rest = splittype(selector)
 343                 url = rest
 344                 user_passwd = None
 345                 if urltype.lower() != 'https':
 346                     realhost = None
 347                 else:
 348                     realhost, rest = splithost(rest)
 349                     if realhost:
 350                         user_passwd, realhost = splituser(realhost)
 351                     if user_passwd:
 352                         selector = "%s://%s%s" % (urltype, realhost, rest)
 353                 #print "proxy via https:", host, selector
 354             if not host: raise IOError, ('https error', 'no host given')
 355             if user_passwd:
 356                 import base64
 357                 auth = base64.encodestring(user_passwd).strip()
 358             else:
 359                 auth = None
 360             h = httplib.HTTPS(host, 0,
 361                               key_file=self.key_file,
 362                               cert_file=self.cert_file)
 363             if data is not None:
 364                 h.putrequest('POST', selector)
 365                 h.putheader('Content-type',
 366                             'application/x-www-form-urlencoded')
 367                 h.putheader('Content-length', '%d' % len(data))
 368             else:
 369                 h.putrequest('GET', selector)
 370             if auth: h.putheader('Authorization: Basic %s' % auth)
 371             if realhost: h.putheader('Host', realhost)
 372             for args in self.addheaders: apply(h.putheader, args)
 373             h.endheaders()
 374             if data is not None:
 375                 h.send(data)
 376             errcode, errmsg, headers = h.getreply()
 377             fp = h.getfile()
 378             if errcode == 200:
 379                 return addinfourl(fp, headers, "https:" + url)
 380             else:
 381                 if data is None:
 382                     return self.http_error(url, fp, errcode, errmsg, headers)
 383                 else:
 384                     return self.http_error(url, fp, errcode, errmsg, headers,
 385                                            data)
 386
 387     def open_gopher(self, url):
 388         """Use Gopher protocol."""
 389         import gopherlib
 390         host, selector = splithost(url)
 391         if not host: raise IOError, ('gopher error', 'no host given')
 392         host = unquote(host)
 393         type, selector = splitgophertype(selector)
 394         selector, query = splitquery(selector)
 395         selector = unquote(selector)
 396         if query:
 397             query = unquote(query)
 398             fp = gopherlib.send_query(selector, query, host)
 399         else:
 400             fp = gopherlib.send_selector(selector, host)
 401         return addinfourl(fp, noheaders(), "gopher:" + url)
 402
 403     def open_file(self, url):
 404         """Use local file or FTP depending on form of URL."""
 405         if url[:2] == '//' and url[2:3] != '/':
 406             return self.open_ftp(url)
 407         else:
 408             return self.open_local_file(url)
 409
 410     def open_local_file(self, url):
 411         """Use local file."""
 412         import mimetypes, mimetools, rfc822, StringIO
 413         host, file = splithost(url)
 414         localname = url2pathname(file)
 415         try:
 416             stats = os.stat(localname)
 417         except OSError, e:
 418             raise IOError(e.errno, e.strerror, e.filename)
 419         size = stats.st_size
 420         modified = rfc822.formatdate(stats.st_mtime)
 421         mtype = mimetypes.guess_type(url)[0]
 422         headers = mimetools.Message(StringIO.StringIO(
 423             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
 424             (mtype or 'text/plain', size, modified)))
 425         if not host:
 426             urlfile = file
 427             if file[:1] == '/':
 428                 urlfile = 'file://' + file
 429             return addinfourl(open(localname, 'rb'),
 430                               headers, urlfile)
 431         host, port = splitport(host)
 432         if not port \
 433            and socket.gethostbyname(host) in (localhost(), thishost()):
 434             urlfile = file
 435             if file[:1] == '/':
 436                 urlfile = 'file://' + file
 437             return addinfourl(open(localname, 'rb'),
 438                               headers, urlfile)
 439         raise IOError, ('local file error', 'not on local host')
 440
 441     def open_ftp(self, url):
 442         """Use FTP protocol."""
 443         import mimetypes, mimetools, StringIO
 444         host, path = splithost(url)
 445         if not host: raise IOError, ('ftp error', 'no host given')
 446         host, port = splitport(host)
 447         user, host = splituser(host)
 448         if user: user, passwd = splitpasswd(user)
 449         else: passwd = None
 450         host = unquote(host)
 451         user = unquote(user or '')
 452         passwd = unquote(passwd or '')
 453         host = socket.gethostbyname(host)
 454         if not port:
 455             import ftplib
 456             port = ftplib.FTP_PORT
 457         else:
 458             port = int(port)
 459         path, attrs = splitattr(path)
 460         path = unquote(path)
 461         dirs = path.split('/')
 462         dirs, file = dirs[:-1], dirs[-1]
 463         if dirs and not dirs[0]: dirs = dirs[1:]
 464         if dirs and not dirs[0]: dirs[0] = '/'
 465         key = user, host, port, '/'.join(dirs)
 466         # XXX thread unsafe!
 467         if len(self.ftpcache) > MAXFTPCACHE:
 468             # Prune the cache, rather arbitrarily
 469             for k in self.ftpcache.keys():
 470                 if k != key:
 471                     v = self.ftpcache[k]
 472                     del self.ftpcache[k]
 473                     v.close()
 474         try:
 475             if not key in self.ftpcache:
 476                 self.ftpcache[key] = \
 477                     ftpwrapper(user, passwd, host, port, dirs)
 478             if not file: type = 'D'
 479             else: type = 'I'
 480             for attr in attrs:
 481                 attr, value = splitvalue(attr)
 482                 if attr.lower() == 'type' and \
 483                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
 484                     type = value.upper()
 485             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
 486             mtype = mimetypes.guess_type("ftp:" + url)[0]
 487             headers = ""
 488             if mtype:
 489                 headers += "Content-Type: %s\n" % mtype
 490             if retrlen is not None and retrlen >= 0:
 491                 headers += "Content-Length: %d\n" % retrlen
 492             headers = mimetools.Message(StringIO.StringIO(headers))
 493             return addinfourl(fp, headers, "ftp:" + url)
 494         except ftperrors(), msg:
 495             raise IOError, ('ftp error', msg), sys.exc_info()[2]
 496
 497     def open_data(self, url, data=None):
 498         """Use "data" URL."""
 499         # ignore POSTed data
 500         #
 501         # syntax of data URLs:
 502         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
 503         # mediatype := [ type "/" subtype ] *( ";" parameter )
 504         # data      := *urlchar
 505         # parameter := attribute "=" value
 506         import StringIO, mimetools
 507         try:
 508             [type, data] = url.split(',', 1)
 509         except ValueError:
 510             raise IOError, ('data error', 'bad data URL')
 511         if not type:
 512             type = 'text/plain;charset=US-ASCII'
 513         semi = type.rfind(';')
 514         if semi >= 0 and '=' not in type[semi:]:
 515             encoding = type[semi+1:]
 516             type = type[:semi]
 517         else:
 518             encoding = ''
 519         msg = []
 520         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
 521                                             time.gmtime(time.time())))
 522         msg.append('Content-type: %s' % type)
 523         if encoding == 'base64':
 524             import base64
 525             data = base64.decodestring(data)
 526         else:
 527             data = unquote(data)
 528         msg.append('Content-length: %d' % len(data))
 529         msg.append('')
 530         msg.append(data)
 531         msg = '\n'.join(msg)
 532         f = StringIO.StringIO(msg)
 533         headers = mimetools.Message(f, 0)
 534         f.fileno = None     # needed for addinfourl
 535         return addinfourl(f, headers, url)
 536
 537
 538 class FancyURLopener(URLopener):
 539     """Derived class with handlers for errors we can handle (perhaps)."""
 540
 541     def __init__(self, *args, **kwargs):
 542         apply(URLopener.__init__, (self,) + args, kwargs)
 543         self.auth_cache = {}
 544         self.tries = 0
 545         self.maxtries = 10
 546
 547     def http_error_default(self, url, fp, errcode, errmsg, headers):
 548         """Default error handling -- don't raise an exception."""
 549         return addinfourl(fp, headers, "http:" + url)
 550
 551     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
 552         """Error 302 -- relocated (temporarily)."""
 553         self.tries += 1
 554         if self.maxtries and self.tries >= self.maxtries:
 555             if hasattr(self, "http_error_500"):
 556                 meth = self.http_error_500
 557             else:
 558                 meth = self.http_error_default
 559             self.tries = 0
 560             return meth(url, fp, 500,
 561                         "Internal Server Error: Redirect Recursion", headers)
 562         result = self.redirect_internal(url, fp, errcode, errmsg, headers,
 563                                         data)
 564         self.tries = 0
 565         return result
 566
 567     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
 568         if 'location' in headers:
 569             newurl = headers['location']
 570         elif 'uri' in headers:
 571             newurl = headers['uri']
 572         else:
 573             return
 574         void = fp.read()
 575         fp.close()
 576         # In case the server sent a relative URL, join with original:
 577         newurl = basejoin(self.type + ":" + url, newurl)
 578         if data is None:
 579             return self.open(newurl)
 580         else:
 581             return self.open(newurl, data)
 582
 583     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
 584         """Error 301 -- also relocated (permanently)."""
 585         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 586
 587     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
 588         """Error 401 -- authentication required.
 589         See this URL for a description of the basic authentication scheme:
 590         http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt"""
 591         if not 'www-authenticate' in headers:
 592             URLopener.http_error_default(self, url, fp,
 593                                          errcode, errmsg, headers)
 594         stuff = headers['www-authenticate']
 595         import re
 596         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 597         if not match:
 598             URLopener.http_error_default(self, url, fp,
 599                                          errcode, errmsg, headers)
 600         scheme, realm = match.groups()
 601         if scheme.lower() != 'basic':
 602             URLopener.http_error_default(self, url, fp,
 603                                          errcode, errmsg, headers)
 604         name = 'retry_' + self.type + '_basic_auth'
 605         if data is None:
 606             return getattr(self,name)(url, realm)
 607         else:
 608             return getattr(self,name)(url, realm, data)
 609
 610     def retry_http_basic_auth(self, url, realm, data=None):
 611         host, selector = splithost(url)
 612         i = host.find('@') + 1
 613         host = host[i:]
 614         user, passwd = self.get_user_passwd(host, realm, i)
 615         if not (user or passwd): return None
 616         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 617         newurl = 'http://' + host + selector
 618         if data is None:
 619             return self.open(newurl)
 620         else:
 621             return self.open(newurl, data)
 622
 623     def retry_https_basic_auth(self, url, realm, data=None):
 624         host, selector = splithost(url)
 625         i = host.find('@') + 1
 626         host = host[i:]
 627         user, passwd = self.get_user_passwd(host, realm, i)
 628         if not (user or passwd): return None
 629         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 630         newurl = '//' + host + selector
 631         return self.open_https(newurl, data)
 632
 633     def get_user_passwd(self, host, realm, clear_cache = 0):
 634         key = realm + '@' + host.lower()
 635         if key in self.auth_cache:
 636             if clear_cache:
 637                 del self.auth_cache[key]
 638             else:
 639                 return self.auth_cache[key]
 640         user, passwd = self.prompt_user_passwd(host, realm)
 641         if user or passwd: self.auth_cache[key] = (user, passwd)
 642         return user, passwd
 643
 644     def prompt_user_passwd(self, host, realm):
 645         """Override this in a GUI environment!"""
 646         import getpass
 647         try:
 648             user = raw_input("Enter username for %s at %s: " % (realm,
 649                                                                 host))
 650             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
 651                 (user, realm, host))
 652             return user, passwd
 653         except KeyboardInterrupt:
 654             print
 655             return None, None
 656
 657
 658 # Utility functions
 659
 660 _localhost = None
 661 def localhost():
 662     """Return the IP address of the magic hostname 'localhost'."""
 663     global _localhost
 664     if _localhost is None:
 665         _localhost = socket.gethostbyname('localhost')
 666     return _localhost
 667
 668 _thishost = None
 669 def thishost():
 670     """Return the IP address of the current host."""
 671     global _thishost
 672     if _thishost is None:
 673         _thishost = socket.gethostbyname(socket.gethostname())
 674     return _thishost
 675
 676 _ftperrors = None
 677 def ftperrors():
 678     """Return the set of errors raised by the FTP class."""
 679     global _ftperrors
 680     if _ftperrors is None:
 681         import ftplib
 682         _ftperrors = ftplib.all_errors
 683     return _ftperrors
 684
 685 _noheaders = None
 686 def noheaders():
 687     """Return an empty mimetools.Message object."""
 688     global _noheaders
 689     if _noheaders is None:
 690         import mimetools
 691         import StringIO
 692         _noheaders = mimetools.Message(StringIO.StringIO(), 0)
 693         _noheaders.fp.close()   # Recycle file descriptor
 694     return _noheaders
 695
 696
 697 # Utility classes
 698
 699 class ftpwrapper:
 700     """Class used by open_ftp() for cache of open FTP connections."""
 701
 702     def __init__(self, user, passwd, host, port, dirs):
 703         self.user = user
 704         self.passwd = passwd
 705         self.host = host
 706         self.port = port
 707         self.dirs = dirs
 708         self.init()
 709
 710     def init(self):
 711         import ftplib
 712         self.busy = 0
 713         self.ftp = ftplib.FTP()
 714         self.ftp.connect(self.host, self.port)
 715         self.ftp.login(self.user, self.passwd)
 716         for dir in self.dirs:
 717             self.ftp.cwd(dir)
 718
 719     def retrfile(self, file, type):
 720         import ftplib
 721         self.endtransfer()
 722         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
 723         else: cmd = 'TYPE ' + type; isdir = 0
 724         try:
 725             self.ftp.voidcmd(cmd)
 726         except ftplib.all_errors:
 727             self.init()
 728             self.ftp.voidcmd(cmd)
 729         conn = None
 730         if file and not isdir:
 731             # Use nlst to see if the file exists at all
 732             try:
 733                 self.ftp.nlst(file)
 734             except ftplib.error_perm, reason:
 735                 raise IOError, ('ftp error', reason), sys.exc_info()[2]
 736             # Restore the transfer mode!
 737             self.ftp.voidcmd(cmd)
 738             # Try to retrieve as a file
 739             try:
 740                 cmd = 'RETR ' + file
 741                 conn = self.ftp.ntransfercmd(cmd)
 742             except ftplib.error_perm, reason:
 743                 if str(reason)[:3] != '550':
 744                     raise IOError, ('ftp error', reason), sys.exc_info()[2]
 745         if not conn:
 746             # Set transfer mode to ASCII!
 747             self.ftp.voidcmd('TYPE A')
 748             # Try a directory listing
 749             if file: cmd = 'LIST ' + file
 750             else: cmd = 'LIST'
 751             conn = self.ftp.ntransfercmd(cmd)
 752         self.busy = 1
 753         # Pass back both a suitably decorated object and a retrieval length
 754         return (addclosehook(conn[0].makefile('rb'),
 755                              self.endtransfer), conn[1])
 756     def endtransfer(self):
 757         if not self.busy:
 758             return
 759         self.busy = 0
 760         try:
 761             self.ftp.voidresp()
 762         except ftperrors():
 763             pass
 764
 765     def close(self):
 766         self.endtransfer()
 767         try:
 768             self.ftp.close()
 769         except ftperrors():
 770             pass
 771
 772 class addbase:
 773     """Base class for addinfo and addclosehook."""
 774
 775     def __init__(self, fp):
 776         self.fp = fp
 777         self.read = self.fp.read
 778         self.readline = self.fp.readline
 779         if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
 780         if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno
 781
 782     def __repr__(self):
 783         return '<%s at %s whose fp = %s>' % (self.__class__.__name__,
 784                                              `id(self)`, `self.fp`)
 785
 786     def close(self):
 787         self.read = None
 788         self.readline = None
 789         self.readlines = None
 790         self.fileno = None
 791         if self.fp: self.fp.close()
 792         self.fp = None
 793
 794 class addclosehook(addbase):
 795     """Class to add a close hook to an open file."""
 796
 797     def __init__(self, fp, closehook, *hookargs):
 798         addbase.__init__(self, fp)
 799         self.closehook = closehook
 800         self.hookargs = hookargs
 801
 802     def close(self):
 803         addbase.close(self)
 804         if self.closehook:
 805             apply(self.closehook, self.hookargs)
 806             self.closehook = None
 807             self.hookargs = None
 808
 809 class addinfo(addbase):
 810     """class to add an info() method to an open file."""
 811
 812     def __init__(self, fp, headers):
 813         addbase.__init__(self, fp)
 814         self.headers = headers
 815
 816     def info(self):
 817         return self.headers
 818
 819 class addinfourl(addbase):
 820     """class to add info() and geturl() methods to an open file."""
 821
 822     def __init__(self, fp, headers, url):
 823         addbase.__init__(self, fp)
 824         self.headers = headers
 825         self.url = url
 826
 827     def info(self):
 828         return self.headers
 829
 830     def geturl(self):
 831         return self.url
 832
 833
 834 def basejoin(base, url):
 835     """Utility to combine a URL with a base URL to form a new URL."""
 836     type, path = splittype(url)
 837     if type:
 838         # if url is complete (i.e., it contains a type), return it
 839         return url
 840     host, path = splithost(path)
 841     type, basepath = splittype(base) # inherit type from base
 842     if host:
 843         # if url contains host, just inherit type
 844         if type: return type + '://' + host + path
 845         else:
 846             # no type inherited, so url must have started with //
 847             # just return it
 848             return url
 849     host, basepath = splithost(basepath) # inherit host
 850     basepath, basetag = splittag(basepath) # remove extraneous cruft
 851     basepath, basequery = splitquery(basepath) # idem
 852     if path[:1] != '/':
 853         # non-absolute path name
 854         if path[:1] in ('#', '?'):
 855             # path is just a tag or query, attach to basepath
 856             i = len(basepath)
 857         else:
 858             # else replace last component
 859             i = basepath.rfind('/')
 860         if i < 0:
 861             # basepath not absolute
 862             if host:
 863                 # host present, make absolute
 864                 basepath = '/'
 865             else:
 866                 # else keep non-absolute
 867                 basepath = ''
 868         else:
 869             # remove last file component
 870             basepath = basepath[:i+1]
 871         # Interpret ../ (important because of symlinks)
 872         while basepath and path[:3] == '../':
 873             path = path[3:]
 874             i = basepath[:-1].rfind('/')
 875             if i > 0:
 876                 basepath = basepath[:i+1]
 877             elif i == 0:
 878                 basepath = '/'
 879                 break
 880             else:
 881                 basepath = ''
 882
 883         path = basepath + path
 884     if host and path and path[0] != '/':
 885         path = '/' + path
 886     if type and host: return type + '://' + host + path
 887     elif type: return type + ':' + path
 888     elif host: return '//' + host + path # don't know what this means
 889     else: return path
 890
 891
 892 # Utilities to parse URLs (most of these return None for missing parts):
 893 # unwrap('<URL:type://host/path>') --> 'type://host/path'
 894 # splittype('type:opaquestring') --> 'type', 'opaquestring'
 895 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
 896 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
 897 # splitpasswd('user:passwd') -> 'user', 'passwd'
 898 # splitport('host:port') --> 'host', 'port'
 899 # splitquery('/path?query') --> '/path', 'query'
 900 # splittag('/path#tag') --> '/path', 'tag'
 901 # splitattr('/path;attr1=value1;attr2=value2;...') ->
 902 #   '/path', ['attr1=value1', 'attr2=value2', ...]
 903 # splitvalue('attr=value') --> 'attr', 'value'
 904 # splitgophertype('/Xselector') --> 'X', 'selector'
 905 # unquote('abc%20def') -> 'abc def'
 906 # quote('abc def') -> 'abc%20def')
 907
 908 try:
 909     unicode
 910 except NameError:
 911     def _is_unicode(x):
 912         return 0
 913 else:
 914     def _is_unicode(x):
 915         return isinstance(x, unicode)
 916
 917 def toBytes(url):
 918     """toBytes(u"URL") --> 'URL'."""
 919     # Most URL schemes require ASCII. If that changes, the conversion
 920     # can be relaxed
 921     if _is_unicode(url):
 922         try:
 923             url = url.encode("ASCII")
 924         except UnicodeError:
 925             raise UnicodeError("URL " + repr(url) +
 926                                " contains non-ASCII characters")
 927     return url
 928
 929 def unwrap(url):
 930     """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
 931     url = url.strip()
 932     if url[:1] == '<' and url[-1:] == '>':
 933         url = url[1:-1].strip()
 934     if url[:4] == 'URL:': url = url[4:].strip()
 935     return url
 936
 937 _typeprog = None
 938 def splittype(url):
 939     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
 940     global _typeprog
 941     if _typeprog is None:
 942         import re
 943         _typeprog = re.compile('^([^/:]+):')
 944
 945     match = _typeprog.match(url)
 946     if match:
 947         scheme = match.group(1)
 948         return scheme.lower(), url[len(scheme) + 1:]
 949     return None, url
 950
 951 _hostprog = None
 952 def splithost(url):
 953     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
 954     global _hostprog
 955     if _hostprog is None:
 956         import re
 957         _hostprog = re.compile('^//([^/]*)(.*)$')
 958
 959     match = _hostprog.match(url)
 960     if match: return match.group(1, 2)
 961     return None, url
 962
 963 _userprog = None
 964 def splituser(host):
 965     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
 966     global _userprog
 967     if _userprog is None:
 968         import re
 969         _userprog = re.compile('^([^@]*)@(.*)$')
 970
 971     match = _userprog.match(host)
 972     if match: return map(unquote, match.group(1, 2))
 973     return None, host
 974
 975 _passwdprog = None
 976 def splitpasswd(user):
 977     """splitpasswd('user:passwd') -> 'user', 'passwd'."""
 978     global _passwdprog
 979     if _passwdprog is None:
 980         import re
 981         _passwdprog = re.compile('^([^:]*):(.*)$')
 982
 983     match = _passwdprog.match(user)
 984     if match: return match.group(1, 2)
 985     return user, None
 986
 987 # splittag('/path#tag') --> '/path', 'tag'
 988 _portprog = None
 989 def splitport(host):
 990     """splitport('host:port') --> 'host', 'port'."""
 991     global _portprog
 992     if _portprog is None:
 993         import re
 994         _portprog = re.compile('^(.*):([0-9]+)$')
 995
 996     match = _portprog.match(host)
 997     if match: return match.group(1, 2)
 998     return host, None
 999
1000 _nportprog = None
1001 def splitnport(host, defport=-1):
1002     """Split host and port, returning numeric port.
1003     Return given default port if no ':' found; defaults to -1.
1004     Return numerical port if a valid number are found after ':'.
1005     Return None if ':' but not a valid number."""
1006     global _nportprog
1007     if _nportprog is None:
1008         import re
1009         _nportprog = re.compile('^(.*):(.*)$')
1010
1011     match = _nportprog.match(host)
1012     if match:
1013         host, port = match.group(1, 2)
1014         try:
1015             if not port: raise ValueError, "no digits"
1016             nport = int(port)
1017         except ValueError:
1018             nport = None
1019         return host, nport
1020     return host, defport
1021
1022 _queryprog = None
1023 def splitquery(url):
1024     """splitquery('/path?query') --> '/path', 'query'."""
1025     global _queryprog
1026     if _queryprog is None:
1027         import re
1028         _queryprog = re.compile('^(.*)\?([^?]*)$')
1029
1030     match = _queryprog.match(url)
1031     if match: return match.group(1, 2)
1032     return url, None
1033
1034 _tagprog = None
1035 def splittag(url):
1036     """splittag('/path#tag') --> '/path', 'tag'."""
1037     global _tagprog
1038     if _tagprog is None:
1039         import re
1040         _tagprog = re.compile('^(.*)#([^#]*)$')
1041
1042     match = _tagprog.match(url)
1043     if match: return match.group(1, 2)
1044     return url, None
1045
1046 def splitattr(url):
1047     """splitattr('/path;attr1=value1;attr2=value2;...') ->
1048         '/path', ['attr1=value1', 'attr2=value2', ...]."""
1049     words = url.split(';')
1050     return words[0], words[1:]
1051
1052 _valueprog = None
1053 def splitvalue(attr):
1054     """splitvalue('attr=value') --> 'attr', 'value'."""
1055     global _valueprog
1056     if _valueprog is None:
1057         import re
1058         _valueprog = re.compile('^([^=]*)=(.*)$')
1059
1060     match = _valueprog.match(attr)
1061     if match: return match.group(1, 2)
1062     return attr, None
1063
1064 def splitgophertype(selector):
1065     """splitgophertype('/Xselector') --> 'X', 'selector'."""
1066     if selector[:1] == '/' and selector[1:2]:
1067         return selector[1], selector[2:]
1068     return None, selector
1069
1070 def unquote(s):
1071     """unquote('abc%20def') -> 'abc def'."""
1072     mychr = chr
1073     myatoi = int
1074     list = s.split('%')
1075     res = [list[0]]
1076     myappend = res.append
1077     del list[0]
1078     for item in list:
1079         if item[1:2]:
1080             try:
1081                 myappend(mychr(myatoi(item[:2], 16))
1082                      + item[2:])
1083             except ValueError:
1084                 myappend('%' + item)
1085         else:
1086             myappend('%' + item)
1087     return "".join(res)
1088
1089 def unquote_plus(s):
1090     """unquote('%7e/abc+def') -> '~/abc def'"""
1091     if '+' in s:
1092         # replace '+' with ' '
1093         s = ' '.join(s.split('+'))
1094     return unquote(s)
1095
1096 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1097                'abcdefghijklmnopqrstuvwxyz'
1098                '0123456789' '_.-')
1099
1100 _fast_safe_test = always_safe + '/'
1101 _fast_safe = None
1102
1103 def _fast_quote(s):
1104     global _fast_safe
1105     if _fast_safe is None:
1106         _fast_safe = {}
1107         for c in _fast_safe_test:
1108             _fast_safe[c] = c
1109     res = list(s)
1110     for i in range(len(res)):
1111         c = res[i]
1112         if not c in _fast_safe:
1113             res[i] = '%%%02X' % ord(c)
1114     return ''.join(res)
1115
1116 def quote(s, safe = '/'):
1117     """quote('abc def') -> 'abc%20def'
1118
1119     Each part of a URL, e.g. the path info, the query, etc., has a
1120     different set of reserved characters that must be quoted.
1121
1122     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1123     the following reserved characters.
1124
1125     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1126                   "$" | ","
1127
1128     Each of these characters is reserved in some component of a URL,
1129     but not necessarily in all of them.
1130
1131     By default, the quote function is intended for quoting the path
1132     section of a URL.  Thus, it will not encode '/'.  This character
1133     is reserved, but in typical usage the quote function is being
1134     called on a path where the existing slash characters are used as
1135     reserved characters.
1136     """
1137     safe = always_safe + safe
1138     if _fast_safe_test == safe:
1139         return _fast_quote(s)
1140     res = list(s)
1141     for i in range(len(res)):
1142         c = res[i]
1143         if c not in safe:
1144             res[i] = '%%%02X' % ord(c)
1145     return ''.join(res)
1146
1147 def quote_plus(s, safe = ''):
1148     """Quote the query fragment of a URL; replacing ' ' with '+'"""
1149     if ' ' in s:
1150         l = s.split(' ')
1151         for i in range(len(l)):
1152             l[i] = quote(l[i], safe)
1153         return '+'.join(l)
1154     else:
1155         return quote(s, safe)
1156
1157 def urlencode(query,doseq=0):
1158     """Encode a sequence of two-element tuples or dictionary into a URL query string.
1159
1160     If any values in the query arg are sequences and doseq is true, each
1161     sequence element is converted to a separate parameter.
1162
1163     If the query arg is a sequence of two-element tuples, the order of the
1164     parameters in the output will match the order of parameters in the
1165     input.
1166     """
1167
1168     if hasattr(query,"items"):
1169         # mapping objects
1170         query = query.items()
1171     else:
1172         # it's a bother at times that strings and string-like objects are
1173         # sequences...
1174         try:
1175             # non-sequence items should not work with len()
1176             # non-empty strings will fail this
1177             if len(query) and not isinstance(query[0], tuple):
1178                 raise TypeError
1179             # zero-length sequences of all types will get here and succeed,
1180             # but that's a minor nit - since the original implementation
1181             # allowed empty dicts that type of behavior probably should be
1182             # preserved for consistency
1183         except TypeError:
1184             ty,va,tb = sys.exc_info()
1185             raise TypeError, "not a valid non-string sequence or mapping object", tb
1186
1187     l = []
1188     if not doseq:
1189         # preserve old behavior
1190         for k, v in query:
1191             k = quote_plus(str(k))
1192             v = quote_plus(str(v))
1193             l.append(k + '=' + v)
1194     else:
1195         for k, v in query:
1196             k = quote_plus(str(k))
1197             if isinstance(v, str):
1198                 v = quote_plus(v)
1199                 l.append(k + '=' + v)
1200             elif _is_unicode(v):
1201                 # is there a reasonable way to convert to ASCII?
1202                 # encode generates a string, but "replace" or "ignore"
1203                 # lose information and "strict" can raise UnicodeError
1204                 v = quote_plus(v.encode("ASCII","replace"))
1205                 l.append(k + '=' + v)
1206             else:
1207                 try:
1208                     # is this a sufficient test for sequence-ness?
1209                     x = len(v)
1210                 except TypeError:
1211                     # not a sequence
1212                     v = quote_plus(str(v))
1213                     l.append(k + '=' + v)
1214                 else:
1215                     # loop over the sequence
1216                     for elt in v:
1217                         l.append(k + '=' + quote_plus(str(elt)))
1218     return '&'.join(l)
1219
1220 # Proxy handling
1221 def getproxies_environment():
1222     """Return a dictionary of scheme -> proxy server URL mappings.
1223
1224     Scan the environment for variables named <scheme>_proxy;
1225     this seems to be the standard convention.  If you need a
1226     different way, you can pass a proxies dictionary to the
1227     [Fancy]URLopener constructor.
1228
1229     """
1230     proxies = {}
1231     for name, value in os.environ.items():
1232         name = name.lower()
1233         if value and name[-6:] == '_proxy':
1234             proxies[name[:-6]] = value
1235     return proxies
1236
1237 if os.name == 'mac':
1238     def getproxies():
1239         """Return a dictionary of scheme -> proxy server URL mappings.
1240
1241         By convention the mac uses Internet Config to store
1242         proxies.  An HTTP proxy, for instance, is stored under
1243         the HttpProxy key.
1244
1245         """
1246         try:
1247             import ic
1248         except ImportError:
1249             return {}
1250
1251         try:
1252             config = ic.IC()
1253         except ic.error:
1254             return {}
1255         proxies = {}
1256         # HTTP:
1257         if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
1258             try:
1259                 value = config['HTTPProxyHost']
1260             except ic.error:
1261                 pass
1262             else:
1263                 proxies['http'] = 'http://%s' % value
1264         # FTP: XXXX To be done.
1265         # Gopher: XXXX To be done.
1266         return proxies
1267
1268     def proxy_bypass(x):
1269         return 0
1270
1271 elif os.name == 'nt':
1272     def getproxies_registry():
1273         """Return a dictionary of scheme -> proxy server URL mappings.
1274
1275         Win32 uses the registry to store proxies.
1276
1277         """
1278         proxies = {}
1279         try:
1280             import _winreg
1281         except ImportError:
1282             # Std module, so should be around - but you never know!
1283             return proxies
1284         try:
1285             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1286                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1287             proxyEnable = _winreg.QueryValueEx(internetSettings,
1288                                                'ProxyEnable')[0]
1289             if proxyEnable:
1290                 # Returned as Unicode but problems if not converted to ASCII
1291                 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1292                                                        'ProxyServer')[0])
1293                 if '=' in proxyServer:
1294                     # Per-protocol settings
1295                     for p in proxyServer.split(';'):
1296                         protocol, address = p.split('=', 1)
1297                         # See if address has a type:// prefix
1298                         import re
1299                         if not re.match('^([^/:]+)://', address):
1300                             address = '%s://%s' % (protocol, address)
1301                         proxies[protocol] = address
1302                 else:
1303                     # Use one setting for all protocols
1304                     if proxyServer[:5] == 'http:':
1305                         proxies['http'] = proxyServer
1306                     else:
1307                         proxies['http'] = 'http://%s' % proxyServer
1308                         proxies['ftp'] = 'ftp://%s' % proxyServer
1309             internetSettings.Close()
1310         except (WindowsError, ValueError, TypeError):
1311             # Either registry key not found etc, or the value in an
1312             # unexpected format.
1313             # proxies already set up to be empty so nothing to do
1314             pass
1315         return proxies
1316
1317     def getproxies():
1318         """Return a dictionary of scheme -> proxy server URL mappings.
1319
1320         Returns settings gathered from the environment, if specified,
1321         or the registry.
1322
1323         """
1324         return getproxies_environment() or getproxies_registry()
1325
1326     def proxy_bypass(host):
1327         try:
1328             import _winreg
1329             import re
1330         except ImportError:
1331             # Std modules, so should be around - but you never know!
1332             return 0
1333         try:
1334             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1335                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1336             proxyEnable = _winreg.QueryValueEx(internetSettings,
1337                                                'ProxyEnable')[0]
1338             proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1339                                                      'ProxyOverride')[0])
1340             # ^^^^ Returned as Unicode but problems if not converted to ASCII
1341         except WindowsError:
1342             return 0
1343         if not proxyEnable or not proxyOverride:
1344             return 0
1345         # try to make a host list from name and IP address.
1346         host = [host]
1347         try:
1348             addr = socket.gethostbyname(host[0])
1349             if addr != host:
1350                 host.append(addr)
1351         except socket.error:
1352             pass
1353         # make a check value list from the registry entry: replace the
1354         # '<local>' string by the localhost entry and the corresponding
1355         # canonical entry.
1356         proxyOverride = proxyOverride.split(';')
1357         i = 0
1358         while i < len(proxyOverride):
1359             if proxyOverride[i] == '<local>':
1360                 proxyOverride[i:i+1] = ['localhost',
1361                                         '127.0.0.1',
1362                                         socket.gethostname(),
1363                                         socket.gethostbyname(
1364                                             socket.gethostname())]
1365             i += 1
1366         # print proxyOverride
1367         # now check if we match one of the registry values.
1368         for test in proxyOverride:
1369             test = test.replace(".", r"\.")     # mask dots
1370             test = test.replace("*", r".*")     # change glob sequence
1371             test = test.replace("?", r".")      # change glob char
1372             for val in host:
1373                 # print "%s <--> %s" %( test, val )
1374                 if re.match(test, val, re.I):
1375                     return 1
1376         return 0
1377
1378 else:
1379     # By default use environment variables
1380     getproxies = getproxies_environment
1381
1382     def proxy_bypass(host):
1383         return 0
1384
1385 # Test and time quote() and unquote()
1386 def test1():
1387     s = ''
1388     for i in range(256): s = s + chr(i)
1389     s = s*4
1390     t0 = time.time()
1391     qs = quote(s)
1392     uqs = unquote(qs)
1393     t1 = time.time()
1394     if uqs != s:
1395         print 'Wrong!'
1396     print `s`
1397     print `qs`
1398     print `uqs`
1399     print round(t1 - t0, 3), 'sec'
1400
1401
1402 def reporthook(blocknum, blocksize, totalsize):
1403     # Report during remote transfers
1404     print "Block number: %d, Block size: %d, Total size: %d" % (
1405         blocknum, blocksize, totalsize)
1406
1407 # Test program
1408 def test(args=[]):
1409     if not args:
1410         args = [
1411             '/etc/passwd',
1412             'file:/etc/passwd',
1413             'file://localhost/etc/passwd',
1414             'ftp://ftp.python.org/pub/python/README',
1415 ##          'gopher://gopher.micro.umn.edu/1/',
1416             'http://www.python.org/index.html',
1417             ]
1418         if hasattr(URLopener, "open_https"):
1419             args.append('https://synergy.as.cmu.edu/~geek/')
1420     try:
1421         for url in args:
1422             print '-'*10, url, '-'*10
1423             fn, h = urlretrieve(url, None, reporthook)
1424             print fn
1425             if h:
1426                 print '======'
1427                 for k in h.keys(): print k + ':', h[k]
1428                 print '======'
1429             fp = open(fn, 'rb')
1430             data = fp.read()
1431             del fp
1432             if '\r' in data:
1433                 table = string.maketrans("", "")
1434                 data = data.translate(table, "\r")
1435             print data
1436             fn, h = None, None
1437         print '-'*40
1438     finally:
1439         urlcleanup()
1440
1441 def main():
1442     import getopt, sys
1443     try:
1444         opts, args = getopt.getopt(sys.argv[1:], "th")
1445     except getopt.error, msg:
1446         print msg
1447         print "Use -h for help"
1448         return
1449     t = 0
1450     for o, a in opts:
1451         if o == '-t':
1452             t = t + 1
1453         if o == '-h':
1454             print "Usage: python urllib.py [-t] [url ...]"
1455             print "-t runs self-test;",
1456             print "otherwise, contents of urls are printed"
1457             return
1458     if t:
1459         if t > 1:
1460             test1()
1461         test(args)
1462     else:
1463         if not args:
1464             print "Use -h for help"
1465         for url in args:
1466             print urlopen(url).read(),
1467
1468 # Run test program when run as a script
1469 if __name__ == '__main__':
1470     main()