Lib/httplib.py

   1 """HTTP/1.1 client library
   2
   3 <intro stuff goes here>
   4 <other stuff, too>
   5
   6 HTTPConnection goes through a number of "states", which define when a client
   7 may legally make another request or fetch the response for a particular
   8 request. This diagram details these state transitions:
   9
  10     (null)
  11       |
  12       | HTTPConnection()
  13       v
  14     Idle
  15       |
  16       | putrequest()
  17       v
  18     Request-started
  19       |
  20       | ( putheader() )*  endheaders()
  21       v
  22     Request-sent
  23       |
  24       | response = getresponse()
  25       v
  26     Unread-response   [Response-headers-read]
  27       |\____________________
  28       |                     |
  29       | response.read()     | putrequest()
  30       v                     v
  31     Idle                  Req-started-unread-response
  32                      ______/|
  33                    /        |
  34    response.read() |        | ( putheader() )*  endheaders()
  35                    v        v
  36        Request-started    Req-sent-unread-response
  37                             |
  38                             | response.read()
  39                             v
  40                           Request-sent
  41
  42 This diagram presents the following rules:
  43   -- a second request may not be started until {response-headers-read}
  44   -- a response [object] cannot be retrieved until {request-sent}
  45   -- there is no differentiation between an unread response body and a
  46      partially read response body
  47
  48 Note: this enforcement is applied by the HTTPConnection class. The
  49       HTTPResponse class does not enforce this state machine, which
  50       implies sophisticated clients may accelerate the request/response
  51       pipeline. Caution should be taken, though: accelerating the states
  52       beyond the above pattern may imply knowledge of the server's
  53       connection-close behavior for certain requests. For example, it
  54       is impossible to tell whether the server will close the connection
  55       UNTIL the response headers have been read; this means that further
  56       requests cannot be placed into the pipeline until it is known that
  57       the server will NOT be closing the connection.
  58
  59 Logical State                  __state            __response
  60 -------------                  -------            ----------
  61 Idle                           _CS_IDLE           None
  62 Request-started                _CS_REQ_STARTED    None
  63 Request-sent                   _CS_REQ_SENT       None
  64 Unread-response                _CS_IDLE           <response_class>
  65 Req-started-unread-response    _CS_REQ_STARTED    <response_class>
  66 Req-sent-unread-response       _CS_REQ_SENT       <response_class>
  67 """
  68
  69 from array import array
  70 import socket
  71 from sys import py3kwarning
  72 from urlparse import urlsplit
  73 import warnings
  74 with warnings.catch_warnings():
  75     if py3kwarning:
  76         warnings.filterwarnings("ignore", ".*mimetools has been removed",
  77                                 DeprecationWarning)
  78     import mimetools
  79
  80 try:
  81     from cStringIO import StringIO
  82 except ImportError:
  83     from StringIO import StringIO
  84
  85 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
  86            "HTTPException", "NotConnected", "UnknownProtocol",
  87            "UnknownTransferEncoding", "UnimplementedFileMode",
  88            "IncompleteRead", "InvalidURL", "ImproperConnectionState",
  89            "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
  90            "BadStatusLine", "error", "responses"]
  91
  92 HTTP_PORT = 80
  93 HTTPS_PORT = 443
  94
  95 _UNKNOWN = 'UNKNOWN'
  96
  97 # connection states
  98 _CS_IDLE = 'Idle'
  99 _CS_REQ_STARTED = 'Request-started'
 100 _CS_REQ_SENT = 'Request-sent'
 101
 102 # status codes
 103 # informational
 104 CONTINUE = 100
 105 SWITCHING_PROTOCOLS = 101
 106 PROCESSING = 102
 107
 108 # successful
 109 OK = 200
 110 CREATED = 201
 111 ACCEPTED = 202
 112 NON_AUTHORITATIVE_INFORMATION = 203
 113 NO_CONTENT = 204
 114 RESET_CONTENT = 205
 115 PARTIAL_CONTENT = 206
 116 MULTI_STATUS = 207
 117 IM_USED = 226
 118
 119 # redirection
 120 MULTIPLE_CHOICES = 300
 121 MOVED_PERMANENTLY = 301
 122 FOUND = 302
 123 SEE_OTHER = 303
 124 NOT_MODIFIED = 304
 125 USE_PROXY = 305
 126 TEMPORARY_REDIRECT = 307
 127
 128 # client error
 129 BAD_REQUEST = 400
 130 UNAUTHORIZED = 401
 131 PAYMENT_REQUIRED = 402
 132 FORBIDDEN = 403
 133 NOT_FOUND = 404
 134 METHOD_NOT_ALLOWED = 405
 135 NOT_ACCEPTABLE = 406
 136 PROXY_AUTHENTICATION_REQUIRED = 407
 137 REQUEST_TIMEOUT = 408
 138 CONFLICT = 409
 139 GONE = 410
 140 LENGTH_REQUIRED = 411
 141 PRECONDITION_FAILED = 412
 142 REQUEST_ENTITY_TOO_LARGE = 413
 143 REQUEST_URI_TOO_LONG = 414
 144 UNSUPPORTED_MEDIA_TYPE = 415
 145 REQUESTED_RANGE_NOT_SATISFIABLE = 416
 146 EXPECTATION_FAILED = 417
 147 UNPROCESSABLE_ENTITY = 422
 148 LOCKED = 423
 149 FAILED_DEPENDENCY = 424
 150 UPGRADE_REQUIRED = 426
 151
 152 # server error
 153 INTERNAL_SERVER_ERROR = 500
 154 NOT_IMPLEMENTED = 501
 155 BAD_GATEWAY = 502
 156 SERVICE_UNAVAILABLE = 503
 157 GATEWAY_TIMEOUT = 504
 158 HTTP_VERSION_NOT_SUPPORTED = 505
 159 INSUFFICIENT_STORAGE = 507
 160 NOT_EXTENDED = 510
 161
 162 # Mapping status codes to official W3C names
 163 responses = {
 164     100: 'Continue',
 165     101: 'Switching Protocols',
 166
 167     200: 'OK',
 168     201: 'Created',
 169     202: 'Accepted',
 170     203: 'Non-Authoritative Information',
 171     204: 'No Content',
 172     205: 'Reset Content',
 173     206: 'Partial Content',
 174
 175     300: 'Multiple Choices',
 176     301: 'Moved Permanently',
 177     302: 'Found',
 178     303: 'See Other',
 179     304: 'Not Modified',
 180     305: 'Use Proxy',
 181     306: '(Unused)',
 182     307: 'Temporary Redirect',
 183
 184     400: 'Bad Request',
 185     401: 'Unauthorized',
 186     402: 'Payment Required',
 187     403: 'Forbidden',
 188     404: 'Not Found',
 189     405: 'Method Not Allowed',
 190     406: 'Not Acceptable',
 191     407: 'Proxy Authentication Required',
 192     408: 'Request Timeout',
 193     409: 'Conflict',
 194     410: 'Gone',
 195     411: 'Length Required',
 196     412: 'Precondition Failed',
 197     413: 'Request Entity Too Large',
 198     414: 'Request-URI Too Long',
 199     415: 'Unsupported Media Type',
 200     416: 'Requested Range Not Satisfiable',
 201     417: 'Expectation Failed',
 202
 203     500: 'Internal Server Error',
 204     501: 'Not Implemented',
 205     502: 'Bad Gateway',
 206     503: 'Service Unavailable',
 207     504: 'Gateway Timeout',
 208     505: 'HTTP Version Not Supported',
 209 }
 210
 211 # maximal amount of data to read at one time in _safe_read
 212 MAXAMOUNT = 1048576
 213
 214 class HTTPMessage(mimetools.Message):
 215
 216     def addheader(self, key, value):
 217         """Add header for field key handling repeats."""
 218         prev = self.dict.get(key)
 219         if prev is None:
 220             self.dict[key] = value
 221         else:
 222             combined = ", ".join((prev, value))
 223             self.dict[key] = combined
 224
 225     def addcontinue(self, key, more):
 226         """Add more field data from a continuation line."""
 227         prev = self.dict[key]
 228         self.dict[key] = prev + "\n " + more
 229
 230     def readheaders(self):
 231         """Read header lines.
 232
 233         Read header lines up to the entirely blank line that terminates them.
 234         The (normally blank) line that ends the headers is skipped, but not
 235         included in the returned list.  If a non-header line ends the headers,
 236         (which is an error), an attempt is made to backspace over it; it is
 237         never included in the returned list.
 238
 239         The variable self.status is set to the empty string if all went well,
 240         otherwise it is an error message.  The variable self.headers is a
 241         completely uninterpreted list of lines contained in the header (so
 242         printing them will reproduce the header exactly as it appears in the
 243         file).
 244
 245         If multiple header fields with the same name occur, they are combined
 246         according to the rules in RFC 2616 sec 4.2:
 247
 248         Appending each subsequent field-value to the first, each separated
 249         by a comma. The order in which header fields with the same field-name
 250         are received is significant to the interpretation of the combined
 251         field value.
 252         """
 253         # XXX The implementation overrides the readheaders() method of
 254         # rfc822.Message.  The base class design isn't amenable to
 255         # customized behavior here so the method here is a copy of the
 256         # base class code with a few small changes.
 257
 258         self.dict = {}
 259         self.unixfrom = ''
 260         self.headers = hlist = []
 261         self.status = ''
 262         headerseen = ""
 263         firstline = 1
 264         startofline = unread = tell = None
 265         if hasattr(self.fp, 'unread'):
 266             unread = self.fp.unread
 267         elif self.seekable:
 268             tell = self.fp.tell
 269         while True:
 270             if tell:
 271                 try:
 272                     startofline = tell()
 273                 except IOError:
 274                     startofline = tell = None
 275                     self.seekable = 0
 276             line = self.fp.readline()
 277             if not line:
 278                 self.status = 'EOF in headers'
 279                 break
 280             # Skip unix From name time lines
 281             if firstline and line.startswith('From '):
 282                 self.unixfrom = self.unixfrom + line
 283                 continue
 284             firstline = 0
 285             if headerseen and line[0] in ' \t':
 286                 # XXX Not sure if continuation lines are handled properly
 287                 # for http and/or for repeating headers
 288                 # It's a continuation line.
 289                 hlist.append(line)
 290                 self.addcontinue(headerseen, line.strip())
 291                 continue
 292             elif self.iscomment(line):
 293                 # It's a comment.  Ignore it.
 294                 continue
 295             elif self.islast(line):
 296                 # Note! No pushback here!  The delimiter line gets eaten.
 297                 break
 298             headerseen = self.isheader(line)
 299             if headerseen:
 300                 # It's a legal header line, save it.
 301                 hlist.append(line)
 302                 self.addheader(headerseen, line[len(headerseen)+1:].strip())
 303                 continue
 304             else:
 305                 # It's not a header line; throw it back and stop here.
 306                 if not self.dict:
 307                     self.status = 'No headers'
 308                 else:
 309                     self.status = 'Non-header line where header expected'
 310                 # Try to undo the read.
 311                 if unread:
 312                     unread(line)
 313                 elif tell:
 314                     self.fp.seek(startofline)
 315                 else:
 316                     self.status = self.status + '; bad seek'
 317                 break
 318
 319 class HTTPResponse:
 320
 321     # strict: If true, raise BadStatusLine if the status line can't be
 322     # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
 323     # false because it prevents clients from talking to HTTP/0.9
 324     # servers.  Note that a response with a sufficiently corrupted
 325     # status line will look like an HTTP/0.9 response.
 326
 327     # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
 328
 329     def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
 330         if buffering:
 331             # The caller won't be using any sock.recv() calls, so buffering
 332             # is fine and recommended for performance.
 333             self.fp = sock.makefile('rb')
 334         else:
 335             # The buffer size is specified as zero, because the headers of
 336             # the response are read with readline().  If the reads were
 337             # buffered the readline() calls could consume some of the
 338             # response, which make be read via a recv() on the underlying
 339             # socket.
 340             self.fp = sock.makefile('rb', 0)
 341         self.debuglevel = debuglevel
 342         self.strict = strict
 343         self._method = method
 344
 345         self.msg = None
 346
 347         # from the Status-Line of the response
 348         self.version = _UNKNOWN # HTTP-Version
 349         self.status = _UNKNOWN  # Status-Code
 350         self.reason = _UNKNOWN  # Reason-Phrase
 351
 352         self.chunked = _UNKNOWN         # is "chunked" being used?
 353         self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
 354         self.length = _UNKNOWN          # number of bytes left in response
 355         self.will_close = _UNKNOWN      # conn will close at end of response
 356
 357     def _read_status(self):
 358         # Initialize with Simple-Response defaults
 359         line = self.fp.readline()
 360         if self.debuglevel > 0:
 361             print "reply:", repr(line)
 362         if not line:
 363             # Presumably, the server closed the connection before
 364             # sending a valid response.
 365             raise BadStatusLine(line)
 366         try:
 367             [version, status, reason] = line.split(None, 2)
 368         except ValueError:
 369             try:
 370                 [version, status] = line.split(None, 1)
 371                 reason = ""
 372             except ValueError:
 373                 # empty version will cause next test to fail and status
 374                 # will be treated as 0.9 response.
 375                 version = ""
 376         if not version.startswith('HTTP/'):
 377             if self.strict:
 378                 self.close()
 379                 raise BadStatusLine(line)
 380             else:
 381                 # assume it's a Simple-Response from an 0.9 server
 382                 self.fp = LineAndFileWrapper(line, self.fp)
 383                 return "HTTP/0.9", 200, ""
 384
 385         # The status code is a three-digit number
 386         try:
 387             status = int(status)
 388             if status < 100 or status > 999:
 389                 raise BadStatusLine(line)
 390         except ValueError:
 391             raise BadStatusLine(line)
 392         return version, status, reason
 393
 394     def begin(self):
 395         if self.msg is not None:
 396             # we've already started reading the response
 397             return
 398
 399         # read until we get a non-100 response
 400         while True:
 401             version, status, reason = self._read_status()
 402             if status != CONTINUE:
 403                 break
 404             # skip the header from the 100 response
 405             while True:
 406                 skip = self.fp.readline().strip()
 407                 if not skip:
 408                     break
 409                 if self.debuglevel > 0:
 410                     print "header:", skip
 411
 412         self.status = status
 413         self.reason = reason.strip()
 414         if version == 'HTTP/1.0':
 415             self.version = 10
 416         elif version.startswith('HTTP/1.'):
 417             self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
 418         elif version == 'HTTP/0.9':
 419             self.version = 9
 420         else:
 421             raise UnknownProtocol(version)
 422
 423         if self.version == 9:
 424             self.length = None
 425             self.chunked = 0
 426             self.will_close = 1
 427             self.msg = HTTPMessage(StringIO())
 428             return
 429
 430         self.msg = HTTPMessage(self.fp, 0)
 431         if self.debuglevel > 0:
 432             for hdr in self.msg.headers:
 433                 print "header:", hdr,
 434
 435         # don't let the msg keep an fp
 436         self.msg.fp = None
 437
 438         # are we using the chunked-style of transfer encoding?
 439         tr_enc = self.msg.getheader('transfer-encoding')
 440         if tr_enc and tr_enc.lower() == "chunked":
 441             self.chunked = 1
 442             self.chunk_left = None
 443         else:
 444             self.chunked = 0
 445
 446         # will the connection close at the end of the response?
 447         self.will_close = self._check_close()
 448
 449         # do we have a Content-Length?
 450         # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
 451         length = self.msg.getheader('content-length')
 452         if length and not self.chunked:
 453             try:
 454                 self.length = int(length)
 455             except ValueError:
 456                 self.length = None
 457             else:
 458                 if self.length < 0:  # ignore nonsensical negative lengths
 459                     self.length = None
 460         else:
 461             self.length = None
 462
 463         # does the body have a fixed length? (of zero)
 464         if (status == NO_CONTENT or status == NOT_MODIFIED or
 465             100 <= status < 200 or      # 1xx codes
 466             self._method == 'HEAD'):
 467             self.length = 0
 468
 469         # if the connection remains open, and we aren't using chunked, and
 470         # a content-length was not provided, then assume that the connection
 471         # WILL close.
 472         if not self.will_close and \
 473            not self.chunked and \
 474            self.length is None:
 475             self.will_close = 1
 476
 477     def _check_close(self):
 478         conn = self.msg.getheader('connection')
 479         if self.version == 11:
 480             # An HTTP/1.1 proxy is assumed to stay open unless
 481             # explicitly closed.
 482             conn = self.msg.getheader('connection')
 483             if conn and "close" in conn.lower():
 484                 return True
 485             return False
 486
 487         # Some HTTP/1.0 implementations have support for persistent
 488         # connections, using rules different than HTTP/1.1.
 489
 490         # For older HTTP, Keep-Alive indicates persistent connection.
 491         if self.msg.getheader('keep-alive'):
 492             return False
 493
 494         # At least Akamai returns a "Connection: Keep-Alive" header,
 495         # which was supposed to be sent by the client.
 496         if conn and "keep-alive" in conn.lower():
 497             return False
 498
 499         # Proxy-Connection is a netscape hack.
 500         pconn = self.msg.getheader('proxy-connection')
 501         if pconn and "keep-alive" in pconn.lower():
 502             return False
 503
 504         # otherwise, assume it will close
 505         return True
 506
 507     def close(self):
 508         if self.fp:
 509             self.fp.close()
 510             self.fp = None
 511
 512     def isclosed(self):
 513         # NOTE: it is possible that we will not ever call self.close(). This
 514         #       case occurs when will_close is TRUE, length is None, and we
 515         #       read up to the last byte, but NOT past it.
 516         #
 517         # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
 518         #          called, meaning self.isclosed() is meaningful.
 519         return self.fp is None
 520
 521     # XXX It would be nice to have readline and __iter__ for this, too.
 522
 523     def read(self, amt=None):
 524         if self.fp is None:
 525             return ''
 526
 527         if self._method == 'HEAD':
 528             self.close()
 529             return ''
 530
 531         if self.chunked:
 532             return self._read_chunked(amt)
 533
 534         if amt is None:
 535             # unbounded read
 536             if self.length is None:
 537                 s = self.fp.read()
 538             else:
 539                 s = self._safe_read(self.length)
 540                 self.length = 0
 541             self.close()        # we read everything
 542             return s
 543
 544         if self.length is not None:
 545             if amt > self.length:
 546                 # clip the read to the "end of response"
 547                 amt = self.length
 548
 549         # we do not use _safe_read() here because this may be a .will_close
 550         # connection, and the user is reading more bytes than will be provided
 551         # (for example, reading in 1k chunks)
 552         s = self.fp.read(amt)
 553         if self.length is not None:
 554             self.length -= len(s)
 555             if not self.length:
 556                 self.close()
 557         return s
 558
 559     def _read_chunked(self, amt):
 560         assert self.chunked != _UNKNOWN
 561         chunk_left = self.chunk_left
 562         value = []
 563         while True:
 564             if chunk_left is None:
 565                 line = self.fp.readline()
 566                 i = line.find(';')
 567                 if i >= 0:
 568                     line = line[:i] # strip chunk-extensions
 569                 try:
 570                     chunk_left = int(line, 16)
 571                 except ValueError:
 572                     # close the connection as protocol synchronisation is
 573                     # probably lost
 574                     self.close()
 575                     raise IncompleteRead(''.join(value))
 576                 if chunk_left == 0:
 577                     break
 578             if amt is None:
 579                 value.append(self._safe_read(chunk_left))
 580             elif amt < chunk_left:
 581                 value.append(self._safe_read(amt))
 582                 self.chunk_left = chunk_left - amt
 583                 return ''.join(value)
 584             elif amt == chunk_left:
 585                 value.append(self._safe_read(amt))
 586                 self._safe_read(2)  # toss the CRLF at the end of the chunk
 587                 self.chunk_left = None
 588                 return ''.join(value)
 589             else:
 590                 value.append(self._safe_read(chunk_left))
 591                 amt -= chunk_left
 592
 593             # we read the whole chunk, get another
 594             self._safe_read(2)      # toss the CRLF at the end of the chunk
 595             chunk_left = None
 596
 597         # read and discard trailer up to the CRLF terminator
 598         ### note: we shouldn't have any trailers!
 599         while True:
 600             line = self.fp.readline()
 601             if not line:
 602                 # a vanishingly small number of sites EOF without
 603                 # sending the trailer
 604                 break
 605             if line == '\r\n':
 606                 break
 607
 608         # we read everything; close the "file"
 609         self.close()
 610
 611         return ''.join(value)
 612
 613     def _safe_read(self, amt):
 614         """Read the number of bytes requested, compensating for partial reads.
 615
 616         Normally, we have a blocking socket, but a read() can be interrupted
 617         by a signal (resulting in a partial read).
 618
 619         Note that we cannot distinguish between EOF and an interrupt when zero
 620         bytes have been read. IncompleteRead() will be raised in this
 621         situation.
 622
 623         This function should be used when <amt> bytes "should" be present for
 624         reading. If the bytes are truly not available (due to EOF), then the
 625         IncompleteRead exception can be used to detect the problem.
 626         """
 627         # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
 628         # return less than x bytes unless EOF is encountered.  It now handles
 629         # signal interruptions (socket.error EINTR) internally.  This code
 630         # never caught that exception anyways.  It seems largely pointless.
 631         # self.fp.read(amt) will work fine.
 632         s = []
 633         while amt > 0:
 634             chunk = self.fp.read(min(amt, MAXAMOUNT))
 635             if not chunk:
 636                 raise IncompleteRead(''.join(s), amt)
 637             s.append(chunk)
 638             amt -= len(chunk)
 639         return ''.join(s)
 640
 641     def getheader(self, name, default=None):
 642         if self.msg is None:
 643             raise ResponseNotReady()
 644         return self.msg.getheader(name, default)
 645
 646     def getheaders(self):
 647         """Return list of (header, value) tuples."""
 648         if self.msg is None:
 649             raise ResponseNotReady()
 650         return self.msg.items()
 651
 652
 653 class HTTPConnection:
 654
 655     _http_vsn = 11
 656     _http_vsn_str = 'HTTP/1.1'
 657
 658     response_class = HTTPResponse
 659     default_port = HTTP_PORT
 660     auto_open = 1
 661     debuglevel = 0
 662     strict = 0
 663
 664     def __init__(self, host, port=None, strict=None,
 665                  timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
 666         self.timeout = timeout
 667         self.source_address = source_address
 668         self.sock = None
 669         self._buffer = []
 670         self.__response = None
 671         self.__state = _CS_IDLE
 672         self._method = None
 673         self._tunnel_host = None
 674         self._tunnel_port = None
 675         self._tunnel_headers = {}
 676
 677         self._set_hostport(host, port)
 678         if strict is not None:
 679             self.strict = strict
 680
 681     def set_tunnel(self, host, port=None, headers=None):
 682         """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
 683
 684         The headers argument should be a mapping of extra HTTP headers
 685         to send with the CONNECT request.
 686         """
 687         self._tunnel_host = host
 688         self._tunnel_port = port
 689         if headers:
 690             self._tunnel_headers = headers
 691         else:
 692             self._tunnel_headers.clear()
 693
 694     def _set_hostport(self, host, port):
 695         if port is None:
 696             i = host.rfind(':')
 697             j = host.rfind(']')         # ipv6 addresses have [...]
 698             if i > j:
 699                 try:
 700                     port = int(host[i+1:])
 701                 except ValueError:
 702                     raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
 703                 host = host[:i]
 704             else:
 705                 port = self.default_port
 706             if host and host[0] == '[' and host[-1] == ']':
 707                 host = host[1:-1]
 708         self.host = host
 709         self.port = port
 710
 711     def set_debuglevel(self, level):
 712         self.debuglevel = level
 713
 714     def _tunnel(self):
 715         self._set_hostport(self._tunnel_host, self._tunnel_port)
 716         self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
 717         for header, value in self._tunnel_headers.iteritems():
 718             self.send("%s: %s\r\n" % (header, value))
 719         self.send("\r\n")
 720         response = self.response_class(self.sock, strict = self.strict,
 721                                        method = self._method)
 722         (version, code, message) = response._read_status()
 723
 724         if code != 200:
 725             self.close()
 726             raise socket.error("Tunnel connection failed: %d %s" % (code,
 727                                                                     message.strip()))
 728         while True:
 729             line = response.fp.readline()
 730             if line == '\r\n': break
 731
 732
 733     def connect(self):
 734         """Connect to the host and port specified in __init__."""
 735         self.sock = socket.create_connection((self.host,self.port),
 736                                              self.timeout, self.source_address)
 737
 738         if self._tunnel_host:
 739             self._tunnel()
 740
 741     def close(self):
 742         """Close the connection to the HTTP server."""
 743         if self.sock:
 744             self.sock.close()   # close it manually... there may be other refs
 745             self.sock = None
 746         if self.__response:
 747             self.__response.close()
 748             self.__response = None
 749         self.__state = _CS_IDLE
 750
 751     def send(self, str):
 752         """Send `str' to the server."""
 753         if self.sock is None:
 754             if self.auto_open:
 755                 self.connect()
 756             else:
 757                 raise NotConnected()
 758
 759         # send the data to the server. if we get a broken pipe, then close
 760         # the socket. we want to reconnect when somebody tries to send again.
 761         #
 762         # NOTE: we DO propagate the error, though, because we cannot simply
 763         #       ignore the error... the caller will know if they can retry.
 764         if self.debuglevel > 0:
 765             print "send:", repr(str)
 766         try:
 767             blocksize=8192
 768             if hasattr(str,'read') and not isinstance(str, array):
 769                 if self.debuglevel > 0: print "sendIng a read()able"
 770                 data=str.read(blocksize)
 771                 while data:
 772                     self.sock.sendall(data)
 773                     data=str.read(blocksize)
 774             else:
 775                 self.sock.sendall(str)
 776         except socket.error, v:
 777             if v.args[0] == 32:      # Broken pipe
 778                 self.close()
 779             raise
 780
 781     def _output(self, s):
 782         """Add a line of output to the current request buffer.
 783
 784         Assumes that the line does *not* end with \\r\\n.
 785         """
 786         self._buffer.append(s)
 787
 788     def _send_output(self, message_body=None):
 789         """Send the currently buffered request and clear the buffer.
 790
 791         Appends an extra \\r\\n to the buffer.
 792         A message_body may be specified, to be appended to the request.
 793         """
 794         self._buffer.extend(("", ""))
 795         msg = "\r\n".join(self._buffer)
 796         del self._buffer[:]
 797         # If msg and message_body are sent in a single send() call,
 798         # it will avoid performance problems caused by the interaction
 799         # between delayed ack and the Nagle algorithim.
 800         if isinstance(message_body, str):
 801             msg += message_body
 802             message_body = None
 803         self.send(msg)
 804         if message_body is not None:
 805             #message_body was not a string (i.e. it is a file) and
 806             #we must run the risk of Nagle
 807             self.send(message_body)
 808
 809     def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
 810         """Send a request to the server.
 811
 812         `method' specifies an HTTP request method, e.g. 'GET'.
 813         `url' specifies the object being requested, e.g. '/index.html'.
 814         `skip_host' if True does not add automatically a 'Host:' header
 815         `skip_accept_encoding' if True does not add automatically an
 816            'Accept-Encoding:' header
 817         """
 818
 819         # if a prior response has been completed, then forget about it.
 820         if self.__response and self.__response.isclosed():
 821             self.__response = None
 822
 823
 824         # in certain cases, we cannot issue another request on this connection.
 825         # this occurs when:
 826         #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
 827         #   2) a response to a previous request has signalled that it is going
 828         #      to close the connection upon completion.
 829         #   3) the headers for the previous response have not been read, thus
 830         #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
 831         #
 832         # if there is no prior response, then we can request at will.
 833         #
 834         # if point (2) is true, then we will have passed the socket to the
 835         # response (effectively meaning, "there is no prior response"), and
 836         # will open a new one when a new request is made.
 837         #
 838         # Note: if a prior response exists, then we *can* start a new request.
 839         #       We are not allowed to begin fetching the response to this new
 840         #       request, however, until that prior response is complete.
 841         #
 842         if self.__state == _CS_IDLE:
 843             self.__state = _CS_REQ_STARTED
 844         else:
 845             raise CannotSendRequest()
 846
 847         # Save the method we use, we need it later in the response phase
 848         self._method = method
 849         if not url:
 850             url = '/'
 851         str = '%s %s %s' % (method, url, self._http_vsn_str)
 852
 853         self._output(str)
 854
 855         if self._http_vsn == 11:
 856             # Issue some standard headers for better HTTP/1.1 compliance
 857
 858             if not skip_host:
 859                 # this header is issued *only* for HTTP/1.1
 860                 # connections. more specifically, this means it is
 861                 # only issued when the client uses the new
 862                 # HTTPConnection() class. backwards-compat clients
 863                 # will be using HTTP/1.0 and those clients may be
 864                 # issuing this header themselves. we should NOT issue
 865                 # it twice; some web servers (such as Apache) barf
 866                 # when they see two Host: headers
 867
 868                 # If we need a non-standard port,include it in the
 869                 # header.  If the request is going through a proxy,
 870                 # but the host of the actual URL, not the host of the
 871                 # proxy.
 872
 873                 netloc = ''
 874                 if url.startswith('http'):
 875                     nil, netloc, nil, nil, nil = urlsplit(url)
 876
 877                 if netloc:
 878                     try:
 879                         netloc_enc = netloc.encode("ascii")
 880                     except UnicodeEncodeError:
 881                         netloc_enc = netloc.encode("idna")
 882                     self.putheader('Host', netloc_enc)
 883                 else:
 884                     try:
 885                         host_enc = self.host.encode("ascii")
 886                     except UnicodeEncodeError:
 887                         host_enc = self.host.encode("idna")
 888                     if self.port == self.default_port:
 889                         self.putheader('Host', host_enc)
 890                     else:
 891                         self.putheader('Host', "%s:%s" % (host_enc, self.port))
 892
 893             # note: we are assuming that clients will not attempt to set these
 894             #       headers since *this* library must deal with the
 895             #       consequences. this also means that when the supporting
 896             #       libraries are updated to recognize other forms, then this
 897             #       code should be changed (removed or updated).
 898
 899             # we only want a Content-Encoding of "identity" since we don't
 900             # support encodings such as x-gzip or x-deflate.
 901             if not skip_accept_encoding:
 902                 self.putheader('Accept-Encoding', 'identity')
 903
 904             # we can accept "chunked" Transfer-Encodings, but no others
 905             # NOTE: no TE header implies *only* "chunked"
 906             #self.putheader('TE', 'chunked')
 907
 908             # if TE is supplied in the header, then it must appear in a
 909             # Connection header.
 910             #self.putheader('Connection', 'TE')
 911
 912         else:
 913             # For HTTP/1.0, the server will assume "not chunked"
 914             pass
 915
 916     def putheader(self, header, *values):
 917         """Send a request header line to the server.
 918
 919         For example: h.putheader('Accept', 'text/html')
 920         """
 921         if self.__state != _CS_REQ_STARTED:
 922             raise CannotSendHeader()
 923
 924         str = '%s: %s' % (header, '\r\n\t'.join(values))
 925         self._output(str)
 926
 927     def endheaders(self, message_body=None):
 928         """Indicate that the last header line has been sent to the server.
 929
 930         This method sends the request to the server.  The optional
 931         message_body argument can be used to pass message body
 932         associated with the request.  The message body will be sent in
 933         the same packet as the message headers if possible.  The
 934         message_body should be a string.
 935         """
 936         if self.__state == _CS_REQ_STARTED:
 937             self.__state = _CS_REQ_SENT
 938         else:
 939             raise CannotSendHeader()
 940         self._send_output(message_body)
 941
 942     def request(self, method, url, body=None, headers={}):
 943         """Send a complete request to the server."""
 944
 945         try:
 946             self._send_request(method, url, body, headers)
 947         except socket.error, v:
 948             # trap 'Broken pipe' if we're allowed to automatically reconnect
 949             if v.args[0] != 32 or not self.auto_open:
 950                 raise
 951             # try one more time
 952             self._send_request(method, url, body, headers)
 953
 954     def _set_content_length(self, body):
 955         # Set the content-length based on the body.
 956         thelen = None
 957         try:
 958             thelen = str(len(body))
 959         except TypeError, te:
 960             # If this is a file-like object, try to
 961             # fstat its file descriptor
 962             import os
 963             try:
 964                 thelen = str(os.fstat(body.fileno()).st_size)
 965             except (AttributeError, OSError):
 966                 # Don't send a length if this failed
 967                 if self.debuglevel > 0: print "Cannot stat!!"
 968
 969         if thelen is not None:
 970             self.putheader('Content-Length', thelen)
 971
 972     def _send_request(self, method, url, body, headers):
 973         # honour explicitly requested Host: and Accept-Encoding headers
 974         header_names = dict.fromkeys([k.lower() for k in headers])
 975         skips = {}
 976         if 'host' in header_names:
 977             skips['skip_host'] = 1
 978         if 'accept-encoding' in header_names:
 979             skips['skip_accept_encoding'] = 1
 980
 981         self.putrequest(method, url, **skips)
 982
 983         if body and ('content-length' not in header_names):
 984             self._set_content_length(body)
 985         for hdr, value in headers.iteritems():
 986             self.putheader(hdr, value)
 987         self.endheaders(body)
 988
 989     def getresponse(self, buffering=False):
 990         "Get the response from the server."
 991
 992         # if a prior response has been completed, then forget about it.
 993         if self.__response and self.__response.isclosed():
 994             self.__response = None
 995
 996         #
 997         # if a prior response exists, then it must be completed (otherwise, we
 998         # cannot read this response's header to determine the connection-close
 999         # behavior)
1000         #
1001         # note: if a prior response existed, but was connection-close, then the
1002         # socket and response were made independent of this HTTPConnection
1003         # object since a new request requires that we open a whole new
1004         # connection
1005         #
1006         # this means the prior response had one of two states:
1007         #   1) will_close: this connection was reset and the prior socket and
1008         #                  response operate independently
1009         #   2) persistent: the response was retained and we await its
1010         #                  isclosed() status to become true.
1011         #
1012         if self.__state != _CS_REQ_SENT or self.__response:
1013             raise ResponseNotReady()
1014
1015         args = (self.sock,)
1016         kwds = {"strict":self.strict, "method":self._method}
1017         if self.debuglevel > 0:
1018             args += (self.debuglevel,)
1019         if buffering:
1020             #only add this keyword if non-default, for compatibility with
1021             #other response_classes.
1022             kwds["buffering"] = True;
1023         response = self.response_class(*args, **kwds)
1024
1025         response.begin()
1026         assert response.will_close != _UNKNOWN
1027         self.__state = _CS_IDLE
1028
1029         if response.will_close:
1030             # this effectively passes the connection to the response
1031             self.close()
1032         else:
1033             # remember this, so we can tell when it is complete
1034             self.__response = response
1035
1036         return response
1037
1038
1039 class HTTP:
1040     "Compatibility class with httplib.py from 1.5."
1041
1042     _http_vsn = 10
1043     _http_vsn_str = 'HTTP/1.0'
1044
1045     debuglevel = 0
1046
1047     _connection_class = HTTPConnection
1048
1049     def __init__(self, host='', port=None, strict=None):
1050         "Provide a default host, since the superclass requires one."
1051
1052         # some joker passed 0 explicitly, meaning default port
1053         if port == 0:
1054             port = None
1055
1056         # Note that we may pass an empty string as the host; this will throw
1057         # an error when we attempt to connect. Presumably, the client code
1058         # will call connect before then, with a proper host.
1059         self._setup(self._connection_class(host, port, strict))
1060
1061     def _setup(self, conn):
1062         self._conn = conn
1063
1064         # set up delegation to flesh out interface
1065         self.send = conn.send
1066         self.putrequest = conn.putrequest
1067         self.putheader = conn.putheader
1068         self.endheaders = conn.endheaders
1069         self.set_debuglevel = conn.set_debuglevel
1070
1071         conn._http_vsn = self._http_vsn
1072         conn._http_vsn_str = self._http_vsn_str
1073
1074         self.file = None
1075
1076     def connect(self, host=None, port=None):
1077         "Accept arguments to set the host/port, since the superclass doesn't."
1078
1079         if host is not None:
1080             self._conn._set_hostport(host, port)
1081         self._conn.connect()
1082
1083     def getfile(self):
1084         "Provide a getfile, since the superclass' does not use this concept."
1085         return self.file
1086
1087     def getreply(self, buffering=False):
1088         """Compat definition since superclass does not define it.
1089
1090         Returns a tuple consisting of:
1091         - server status code (e.g. '200' if all goes well)
1092         - server "reason" corresponding to status code
1093         - any RFC822 headers in the response from the server
1094         """
1095         try:
1096             if not buffering:
1097                 response = self._conn.getresponse()
1098             else:
1099                 #only add this keyword if non-default for compatibility
1100                 #with other connection classes
1101                 response = self._conn.getresponse(buffering)
1102         except BadStatusLine, e:
1103             ### hmm. if getresponse() ever closes the socket on a bad request,
1104             ### then we are going to have problems with self.sock
1105
1106             ### should we keep this behavior? do people use it?
1107             # keep the socket open (as a file), and return it
1108             self.file = self._conn.sock.makefile('rb', 0)
1109
1110             # close our socket -- we want to restart after any protocol error
1111             self.close()
1112
1113             self.headers = None
1114             return -1, e.line, None
1115
1116         self.headers = response.msg
1117         self.file = response.fp
1118         return response.status, response.reason, response.msg
1119
1120     def close(self):
1121         self._conn.close()
1122
1123         # note that self.file == response.fp, which gets closed by the
1124         # superclass. just clear the object ref here.
1125         ### hmm. messy. if status==-1, then self.file is owned by us.
1126         ### well... we aren't explicitly closing, but losing this ref will
1127         ### do it
1128         self.file = None
1129
1130 try:
1131     import ssl
1132 except ImportError:
1133     pass
1134 else:
1135     class HTTPSConnection(HTTPConnection):
1136         "This class allows communication via SSL."
1137
1138         default_port = HTTPS_PORT
1139
1140         def __init__(self, host, port=None, key_file=None, cert_file=None,
1141                      strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1142                      source_address=None):
1143             HTTPConnection.__init__(self, host, port, strict, timeout,
1144                                     source_address)
1145             self.key_file = key_file
1146             self.cert_file = cert_file
1147
1148         def connect(self):
1149             "Connect to a host on a given (SSL) port."
1150
1151             sock = socket.create_connection((self.host, self.port),
1152                                             self.timeout, self.source_address)
1153             if self._tunnel_host:
1154                 self.sock = sock
1155                 self._tunnel()
1156             self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1157
1158     __all__.append("HTTPSConnection")
1159
1160     class HTTPS(HTTP):
1161         """Compatibility with 1.5 httplib interface
1162
1163         Python 1.5.2 did not have an HTTPS class, but it defined an
1164         interface for sending http requests that is also useful for
1165         https.
1166         """
1167
1168         _connection_class = HTTPSConnection
1169
1170         def __init__(self, host='', port=None, key_file=None, cert_file=None,
1171                      strict=None):
1172             # provide a default host, pass the X509 cert info
1173
1174             # urf. compensate for bad input.
1175             if port == 0:
1176                 port = None
1177             self._setup(self._connection_class(host, port, key_file,
1178                                                cert_file, strict))
1179
1180             # we never actually use these for anything, but we keep them
1181             # here for compatibility with post-1.5.2 CVS.
1182             self.key_file = key_file
1183             self.cert_file = cert_file
1184
1185
1186     def FakeSocket (sock, sslobj):
1187         warnings.warn("FakeSocket is deprecated, and won't be in 3.x.  " +
1188                       "Use the result of ssl.wrap_socket() directly instead.",
1189                       DeprecationWarning, stacklevel=2)
1190         return sslobj
1191
1192
1193 class HTTPException(Exception):
1194     # Subclasses that define an __init__ must call Exception.__init__
1195     # or define self.args.  Otherwise, str() will fail.
1196     pass
1197
1198 class NotConnected(HTTPException):
1199     pass
1200
1201 class InvalidURL(HTTPException):
1202     pass
1203
1204 class UnknownProtocol(HTTPException):
1205     def __init__(self, version):
1206         self.args = version,
1207         self.version = version
1208
1209 class UnknownTransferEncoding(HTTPException):
1210     pass
1211
1212 class UnimplementedFileMode(HTTPException):
1213     pass
1214
1215 class IncompleteRead(HTTPException):
1216     def __init__(self, partial, expected=None):
1217         self.args = partial,
1218         self.partial = partial
1219         self.expected = expected
1220     def __repr__(self):
1221         if self.expected is not None:
1222             e = ', %i more expected' % self.expected
1223         else:
1224             e = ''
1225         return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1226     def __str__(self):
1227         return repr(self)
1228
1229 class ImproperConnectionState(HTTPException):
1230     pass
1231
1232 class CannotSendRequest(ImproperConnectionState):
1233     pass
1234
1235 class CannotSendHeader(ImproperConnectionState):
1236     pass
1237
1238 class ResponseNotReady(ImproperConnectionState):
1239     pass
1240
1241 class BadStatusLine(HTTPException):
1242     def __init__(self, line):
1243         if not line:
1244             line = repr(line)
1245         self.args = line,
1246         self.line = line
1247
1248 # for backwards compatibility
1249 error = HTTPException
1250
1251 class LineAndFileWrapper:
1252     """A limited file-like object for HTTP/0.9 responses."""
1253
1254     # The status-line parsing code calls readline(), which normally
1255     # get the HTTP status line.  For a 0.9 response, however, this is
1256     # actually the first line of the body!  Clients need to get a
1257     # readable file object that contains that line.
1258
1259     def __init__(self, line, file):
1260         self._line = line
1261         self._file = file
1262         self._line_consumed = 0
1263         self._line_offset = 0
1264         self._line_left = len(line)
1265
1266     def __getattr__(self, attr):
1267         return getattr(self._file, attr)
1268
1269     def _done(self):
1270         # called when the last byte is read from the line.  After the
1271         # call, all read methods are delegated to the underlying file
1272         # object.
1273         self._line_consumed = 1
1274         self.read = self._file.read
1275         self.readline = self._file.readline
1276         self.readlines = self._file.readlines
1277
1278     def read(self, amt=None):
1279         if self._line_consumed:
1280             return self._file.read(amt)
1281         assert self._line_left
1282         if amt is None or amt > self._line_left:
1283             s = self._line[self._line_offset:]
1284             self._done()
1285             if amt is None:
1286                 return s + self._file.read()
1287             else:
1288                 return s + self._file.read(amt - len(s))
1289         else:
1290             assert amt <= self._line_left
1291             i = self._line_offset
1292             j = i + amt
1293             s = self._line[i:j]
1294             self._line_offset = j
1295             self._line_left -= amt
1296             if self._line_left == 0:
1297                 self._done()
1298             return s
1299
1300     def readline(self):
1301         if self._line_consumed:
1302             return self._file.readline()
1303         assert self._line_left
1304         s = self._line[self._line_offset:]
1305         self._done()
1306         return s
1307
1308     def readlines(self, size=None):
1309         if self._line_consumed:
1310             return self._file.readlines(size)
1311         assert self._line_left
1312         L = [self._line[self._line_offset:]]
1313         self._done()
1314         if size is None:
1315             return L + self._file.readlines()
1316         else:
1317             return L + self._file.readlines(size)
1318
1319 def test():
1320     """Test this module.
1321
1322     A hodge podge of tests collected here, because they have too many
1323     external dependencies for the regular test suite.
1324     """
1325
1326     import sys
1327     import getopt
1328     opts, args = getopt.getopt(sys.argv[1:], 'd')
1329     dl = 0
1330     for o, a in opts:
1331         if o == '-d': dl = dl + 1
1332     host = 'www.python.org'
1333     selector = '/'
1334     if args[0:]: host = args[0]
1335     if args[1:]: selector = args[1]
1336     h = HTTP()
1337     h.set_debuglevel(dl)
1338     h.connect(host)
1339     h.putrequest('GET', selector)
1340     h.endheaders()
1341     status, reason, headers = h.getreply()
1342     print 'status =', status
1343     print 'reason =', reason
1344     print "read", len(h.getfile().read())
1345     print
1346     if headers:
1347         for header in headers.headers: print header.strip()
1348     print
1349
1350     # minimal test that code to extract host from url works
1351     class HTTP11(HTTP):
1352         _http_vsn = 11
1353         _http_vsn_str = 'HTTP/1.1'
1354
1355     h = HTTP11('www.python.org')
1356     h.putrequest('GET', 'http://www.python.org/~jeremy/')
1357     h.endheaders()
1358     h.getreply()
1359     h.close()
1360
1361     try:
1362         import ssl
1363     except ImportError:
1364         pass
1365     else:
1366
1367         for host, selector in (('sourceforge.net', '/projects/python'),
1368                                ):
1369             print "https://%s%s" % (host, selector)
1370             hs = HTTPS()
1371             hs.set_debuglevel(dl)
1372             hs.connect(host)
1373             hs.putrequest('GET', selector)
1374             hs.endheaders()
1375             status, reason, headers = hs.getreply()
1376             print 'status =', status
1377             print 'reason =', reason
1378             print "read", len(hs.getfile().read())
1379             print
1380             if headers:
1381                 for header in headers.headers: print header.strip()
1382             print
1383
1384 if __name__ == '__main__':
1385     test()