1 """HTTP/1.1 client library
3 <intro stuff goes here>
6 HTTPConnection go through a number of "states", which defines when a client
7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions:
20 | ( putheader() )* endheaders()
24 | response = getresponse()
26 Unread-response [Response-headers-read]
27 |\____________________
29 | response.read() | putrequest()
31 Idle Req-started-unread-response
34 response.read() | | ( putheader() )* endheaders()
36 Request-started Req-sent-unread-response
42 This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
48 Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
59 Logical State __state __response
60 ------------- ------- ----------
62 Request-started _CS_REQ_STARTED None
63 Request-sent _CS_REQ_SENT None
64 Unread-response _CS_IDLE <response_class>
65 Req-started-unread-response _CS_REQ_STARTED <response_class>
66 Req-sent-unread-response _CS_REQ_SENT <response_class>
74 from cStringIO
import StringIO
76 from StringIO
import StringIO
85 _CS_REQ_STARTED
= 'Request-started'
86 _CS_REQ_SENT
= 'Request-sent'
90 def __init__(self
, sock
, debuglevel
=0):
91 self
.fp
= sock
.makefile('rb', 0)
92 self
.debuglevel
= debuglevel
96 # from the Status-Line of the response
97 self
.version
= _UNKNOWN
# HTTP-Version
98 self
.status
= _UNKNOWN
# Status-Code
99 self
.reason
= _UNKNOWN
# Reason-Phrase
101 self
.chunked
= _UNKNOWN
# is "chunked" being used?
102 self
.chunk_left
= _UNKNOWN
# bytes left to read in current chunk
103 self
.length
= _UNKNOWN
# number of bytes left in response
104 self
.will_close
= _UNKNOWN
# conn will close at end of response
107 if self
.msg
is not None:
108 # we've already started reading the response
111 line
= self
.fp
.readline()
112 if self
.debuglevel
> 0:
113 print "reply:", repr(line
)
115 [version
, status
, reason
] = string
.split(line
, None, 2)
118 [version
, status
] = string
.split(line
, None, 1)
124 if version
[:5] != 'HTTP/':
126 raise BadStatusLine(line
)
128 self
.status
= status
= int(status
)
129 self
.reason
= string
.strip(reason
)
131 if version
== 'HTTP/1.0':
133 elif version
.startswith('HTTP/1.'):
134 self
.version
= 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
135 elif version
== 'HTTP/0.9':
138 raise UnknownProtocol(version
)
140 if self
.version
== 9:
141 self
.msg
= mimetools
.Message(StringIO())
144 self
.msg
= mimetools
.Message(self
.fp
, 0)
145 if self
.debuglevel
> 0:
146 for hdr
in self
.msg
.headers
:
147 print "header:", hdr
,
149 # don't let the msg keep an fp
152 # are we using the chunked-style of transfer encoding?
153 tr_enc
= self
.msg
.getheader('transfer-encoding')
155 if string
.lower(tr_enc
) != 'chunked':
156 raise UnknownTransferEncoding()
158 self
.chunk_left
= None
162 # will the connection close at the end of the response?
163 conn
= self
.msg
.getheader('connection')
165 conn
= string
.lower(conn
)
166 # a "Connection: close" will always close the connection. if we
167 # don't see that and this is not HTTP/1.1, then the connection will
168 # close unless we see a Keep-Alive header.
169 self
.will_close
= string
.find(conn
, 'close') != -1 or \
170 ( self
.version
!= 11 and \
171 not self
.msg
.getheader('keep-alive') )
173 # for HTTP/1.1, the connection will always remain open
174 # otherwise, it will remain open IFF we see a Keep-Alive header
175 self
.will_close
= self
.version
!= 11 and \
176 not self
.msg
.getheader('keep-alive')
178 # do we have a Content-Length?
179 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
180 length
= self
.msg
.getheader('content-length')
181 if length
and not self
.chunked
:
183 self
.length
= int(length
)
189 # does the body have a fixed length? (of zero)
190 if (status
== 204 or # No Content
191 status
== 304 or # Not Modified
192 100 <= status
< 200): # 1xx codes
195 # if the connection remains open, and we aren't using chunked, and
196 # a content-length was not provided, then assume that the connection
198 if not self
.will_close
and \
199 not self
.chunked
and \
209 # NOTE: it is possible that we will not ever call self.close(). This
210 # case occurs when will_close is TRUE, length is None, and we
211 # read up to the last byte, but NOT past it.
213 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
214 # called, meaning self.isclosed() is meaningful.
215 return self
.fp
is None
217 def read(self
, amt
=None):
222 chunk_left
= self
.chunk_left
225 if chunk_left
is None:
226 line
= self
.fp
.readline()
227 i
= string
.find(line
, ';')
229 line
= line
[:i
] # strip chunk-extensions
230 chunk_left
= string
.atoi(line
, 16)
234 value
= value
+ self
._safe
_read
(chunk_left
)
235 elif amt
< chunk_left
:
236 value
= value
+ self
._safe
_read
(amt
)
237 self
.chunk_left
= chunk_left
- amt
239 elif amt
== chunk_left
:
240 value
= value
+ self
._safe
_read
(amt
)
241 self
._safe
_read
(2) # toss the CRLF at the end of the chunk
242 self
.chunk_left
= None
245 value
= value
+ self
._safe
_read
(chunk_left
)
246 amt
= amt
- chunk_left
248 # we read the whole chunk, get another
249 self
._safe
_read
(2) # toss the CRLF at the end of the chunk
252 # read and discard trailer up to the CRLF terminator
253 ### note: we shouldn't have any trailers!
255 line
= self
.fp
.readline()
259 # we read everything; close the "file"
269 s
= self
._safe
_read
(self
.length
)
270 self
.close() # we read everything
273 if self
.length
is not None:
274 if amt
> self
.length
:
275 # clip the read to the "end of response"
277 self
.length
= self
.length
- amt
279 # we do not use _safe_read() here because this may be a .will_close
280 # connection, and the user is reading more bytes than will be provided
281 # (for example, reading in 1k chunks)
282 s
= self
.fp
.read(amt
)
286 def _safe_read(self
, amt
):
287 """Read the number of bytes requested, compensating for partial reads.
289 Normally, we have a blocking socket, but a read() can be interrupted
290 by a signal (resulting in a partial read).
292 Note that we cannot distinguish between EOF and an interrupt when zero
293 bytes have been read. IncompleteRead() will be raised in this
296 This function should be used when <amt> bytes "should" be present for
297 reading. If the bytes are truly not available (due to EOF), then the
298 IncompleteRead exception can be used to detect the problem.
302 chunk
= self
.fp
.read(amt
)
304 raise IncompleteRead(s
)
306 amt
= amt
- len(chunk
)
309 def getheader(self
, name
, default
=None):
311 raise ResponseNotReady()
312 return self
.msg
.getheader(name
, default
)
315 class HTTPConnection
:
318 _http_vsn_str
= 'HTTP/1.1'
320 response_class
= HTTPResponse
321 default_port
= HTTP_PORT
325 def __init__(self
, host
, port
=None):
327 self
.__response
= None
328 self
.__state
= _CS_IDLE
330 self
._set
_hostport
(host
, port
)
332 def _set_hostport(self
, host
, port
):
334 i
= string
.find(host
, ':')
336 port
= int(host
[i
+1:])
339 port
= self
.default_port
343 def set_debuglevel(self
, level
):
344 self
.debuglevel
= level
347 """Connect to the host and port specified in __init__."""
348 self
.sock
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
349 if self
.debuglevel
> 0:
350 print "connect: (%s, %s)" % (self
.host
, self
.port
)
351 self
.sock
.connect((self
.host
, self
.port
))
354 """Close the connection to the HTTP server."""
356 self
.sock
.close() # close it manually... there may be other refs
359 self
.__response
.close()
360 self
.__response
= None
361 self
.__state
= _CS_IDLE
364 """Send `str' to the server."""
365 if self
.sock
is None:
371 # send the data to the server. if we get a broken pipe, then close
372 # the socket. we want to reconnect when somebody tries to send again.
374 # NOTE: we DO propagate the error, though, because we cannot simply
375 # ignore the error... the caller will know if they can retry.
376 if self
.debuglevel
> 0:
377 print "send:", repr(str)
380 except socket
.error
, v
:
381 if v
[0] == 32: # Broken pipe
385 def putrequest(self
, method
, url
):
386 """Send a request to the server.
388 `method' specifies an HTTP request method, e.g. 'GET'.
389 `url' specifies the object being requested, e.g. '/index.html'.
392 # check if a prior response has been completed
393 if self
.__response
and self
.__response
.isclosed():
394 self
.__response
= None
397 # in certain cases, we cannot issue another request on this connection.
399 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
400 # 2) a response to a previous request has signalled that it is going
401 # to close the connection upon completion.
402 # 3) the headers for the previous response have not been read, thus
403 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
405 # if there is no prior response, then we can request at will.
407 # if point (2) is true, then we will have passed the socket to the
408 # response (effectively meaning, "there is no prior response"), and
409 # will open a new one when a new request is made.
411 # Note: if a prior response exists, then we *can* start a new request.
412 # We are not allowed to begin fetching the response to this new
413 # request, however, until that prior response is complete.
415 if self
.__state
== _CS_IDLE
:
416 self
.__state
= _CS_REQ_STARTED
418 raise CannotSendRequest()
422 str = '%s %s %s\r\n' % (method
, url
, self
._http
_vsn
_str
)
426 except socket
.error
, v
:
427 # trap 'Broken pipe' if we're allowed to automatically reconnect
428 if v
[0] != 32 or not self
.auto_open
:
430 # try one more time (the socket was closed; this will reopen)
433 if self
._http
_vsn
== 11:
434 # Issue some standard headers for better HTTP/1.1 compliance
436 # this header is issued *only* for HTTP/1.1 connections. more
437 # specifically, this means it is only issued when the client uses
438 # the new HTTPConnection() class. backwards-compat clients will
439 # be using HTTP/1.0 and those clients may be issuing this header
440 # themselves. we should NOT issue it twice; some web servers (such
441 # as Apache) barf when they see two Host: headers
442 self
.putheader('Host', self
.host
)
444 # note: we are assuming that clients will not attempt to set these
445 # headers since *this* library must deal with the
446 # consequences. this also means that when the supporting
447 # libraries are updated to recognize other forms, then this
448 # code should be changed (removed or updated).
450 # we only want a Content-Encoding of "identity" since we don't
451 # support encodings such as x-gzip or x-deflate.
452 self
.putheader('Accept-Encoding', 'identity')
454 # we can accept "chunked" Transfer-Encodings, but no others
455 # NOTE: no TE header implies *only* "chunked"
456 #self.putheader('TE', 'chunked')
458 # if TE is supplied in the header, then it must appear in a
460 #self.putheader('Connection', 'TE')
463 # For HTTP/1.0, the server will assume "not chunked"
466 def putheader(self
, header
, value
):
467 """Send a request header line to the server.
469 For example: h.putheader('Accept', 'text/html')
471 if self
.__state
!= _CS_REQ_STARTED
:
472 raise CannotSendHeader()
474 str = '%s: %s\r\n' % (header
, value
)
477 def endheaders(self
):
478 """Indicate that the last header line has been sent to the server."""
480 if self
.__state
== _CS_REQ_STARTED
:
481 self
.__state
= _CS_REQ_SENT
483 raise CannotSendHeader()
487 def request(self
, method
, url
, body
=None, headers
={}):
488 """Send a complete request to the server."""
491 self
._send
_request
(method
, url
, body
, headers
)
492 except socket
.error
, v
:
493 # trap 'Broken pipe' if we're allowed to automatically reconnect
494 if v
[0] != 32 or not self
.auto_open
:
497 self
._send
_request
(method
, url
, body
, headers
)
499 def _send_request(self
, method
, url
, body
, headers
):
500 self
.putrequest(method
, url
)
503 self
.putheader('Content-Length', str(len(body
)))
504 for hdr
, value
in headers
.items():
505 self
.putheader(hdr
, value
)
511 def getresponse(self
):
512 "Get the response from the server."
514 # check if a prior response has been completed
515 if self
.__response
and self
.__response
.isclosed():
516 self
.__response
= None
519 # if a prior response exists, then it must be completed (otherwise, we
520 # cannot read this response's header to determine the connection-close
523 # note: if a prior response existed, but was connection-close, then the
524 # socket and response were made independent of this HTTPConnection
525 # object since a new request requires that we open a whole new
528 # this means the prior response had one of two states:
529 # 1) will_close: this connection was reset and the prior socket and
530 # response operate independently
531 # 2) persistent: the response was retained and we await its
532 # isclosed() status to become true.
534 if self
.__state
!= _CS_REQ_SENT
or self
.__response
:
535 raise ResponseNotReady()
537 if self
.debuglevel
> 0:
538 response
= self
.response_class(self
.sock
, self
.debuglevel
)
540 response
= self
.response_class(self
.sock
)
543 self
.__state
= _CS_IDLE
545 if response
.will_close
:
546 # this effectively passes the connection to the response
549 # remember this, so we can tell when it is complete
550 self
.__response
= response
556 def __init__(self
, sock
, ssl
):
560 def makefile(self
, mode
, bufsize
=None):
561 """Return a readable file-like object with data from socket.
563 This method offers only partial support for the makefile
564 interface of a real socket. It only supports modes 'r' and
565 'rb' and the bufsize argument is ignored.
567 The returned object contains *all* of the file data
569 if mode
!= 'r' and mode
!= 'rb':
570 raise UnimplementedFileMode()
575 msgbuf
= msgbuf
+ self
.__ssl
.read()
576 except socket
.sslerror
, msg
:
578 return StringIO(msgbuf
)
580 def send(self
, stuff
, flags
= 0):
581 return self
.__ssl
.write(stuff
)
583 def recv(self
, len = 1024, flags
= 0):
584 return self
.__ssl
.read(len)
586 def __getattr__(self
, attr
):
587 return getattr(self
.__sock
, attr
)
590 class HTTPSConnection(HTTPConnection
):
591 "This class allows communication via SSL."
593 default_port
= HTTPS_PORT
595 def __init__(self
, host
, port
=None, **x509
):
598 keys
.remove('key_file')
602 keys
.remove('cert_file')
606 raise IllegalKeywordArgument()
607 HTTPConnection
.__init
__(self
, host
, port
)
608 self
.key_file
= x509
.get('key_file')
609 self
.cert_file
= x509
.get('cert_file')
612 "Connect to a host on a given (SSL) port."
614 sock
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
615 sock
.connect((self
.host
, self
.port
))
617 if hasattr(sock
, "_sock"):
618 realsock
= sock
._sock
619 ssl
= socket
.ssl(realsock
, self
.key_file
, self
.cert_file
)
620 self
.sock
= FakeSocket(sock
, ssl
)
624 "Compatibility class with httplib.py from 1.5."
627 _http_vsn_str
= 'HTTP/1.0'
631 _connection_class
= HTTPConnection
633 def __init__(self
, host
='', port
=None, **x509
):
634 "Provide a default host, since the superclass requires one."
636 # some joker passed 0 explicitly, meaning default port
640 # Note that we may pass an empty string as the host; this will throw
641 # an error when we attempt to connect. Presumably, the client code
642 # will call connect before then, with a proper host.
643 self
._conn
= self
._connection
_class
(host
, port
)
644 # set up delegation to flesh out interface
645 self
.send
= self
._conn
.send
646 self
.putrequest
= self
._conn
.putrequest
647 self
.endheaders
= self
._conn
.endheaders
648 self
._conn
._http
_vsn
= self
._http
_vsn
649 self
._conn
._http
_vsn
_str
= self
._http
_vsn
_str
651 # we never actually use these for anything, but we keep them here for
652 # compatibility with post-1.5.2 CVS.
653 self
.key_file
= x509
.get('key_file')
654 self
.cert_file
= x509
.get('cert_file')
658 def connect(self
, host
=None, port
=None):
659 "Accept arguments to set the host/port, since the superclass doesn't."
662 self
._conn
._set
_hostport
(host
, port
)
665 def set_debuglevel(self
, debuglevel
):
666 self
._conn
.set_debuglevel(debuglevel
)
669 "Provide a getfile, since the superclass' does not use this concept."
672 def putheader(self
, header
, *values
):
673 "The superclass allows only one value argument."
674 self
._conn
.putheader(header
,
675 string
.joinfields(values
, '\r\n\t'))
678 """Compat definition since superclass does not define it.
680 Returns a tuple consisting of:
681 - server status code (e.g. '200' if all goes well)
682 - server "reason" corresponding to status code
683 - any RFC822 headers in the response from the server
686 response
= self
._conn
.getresponse()
687 except BadStatusLine
, e
:
688 ### hmm. if getresponse() ever closes the socket on a bad request,
689 ### then we are going to have problems with self.sock
691 ### should we keep this behavior? do people use it?
692 # keep the socket open (as a file), and return it
693 self
.file = self
._conn
.sock
.makefile('rb', 0)
695 # close our socket -- we want to restart after any protocol error
699 return -1, e
.line
, None
701 self
.headers
= response
.msg
702 self
.file = response
.fp
703 return response
.status
, response
.reason
, response
.msg
708 # note that self.file == response.fp, which gets closed by the
709 # superclass. just clear the object ref here.
710 ### hmm. messy. if status==-1, then self.file is owned by us.
711 ### well... we aren't explicitly closing, but losing this ref will
715 if hasattr(socket
, 'ssl'):
717 """Compatibility with 1.5 httplib interface
719 Python 1.5.2 did not have an HTTPS class, but it defined an
720 interface for sending http requests that is also useful for
724 _connection_class
= HTTPSConnection
727 class HTTPException(Exception):
730 class NotConnected(HTTPException
):
733 class UnknownProtocol(HTTPException
):
734 def __init__(self
, version
):
735 self
.version
= version
737 class UnknownTransferEncoding(HTTPException
):
740 class IllegalKeywordArgument(HTTPException
):
743 class UnimplementedFileMode(HTTPException
):
746 class IncompleteRead(HTTPException
):
747 def __init__(self
, partial
):
748 self
.partial
= partial
750 class ImproperConnectionState(HTTPException
):
753 class CannotSendRequest(ImproperConnectionState
):
756 class CannotSendHeader(ImproperConnectionState
):
759 class ResponseNotReady(ImproperConnectionState
):
762 class BadStatusLine(HTTPException
):
763 def __init__(self
, line
):
766 # for backwards compatibility
767 error
= HTTPException
771 # snarfed from httplib.py for now...
776 The test consists of retrieving and displaying the Python
777 home page, along with the error code and error string returned
778 by the www.python.org server.
783 opts
, args
= getopt
.getopt(sys
.argv
[1:], 'd')
786 if o
== '-d': dl
= dl
+ 1
787 host
= 'www.python.org'
789 if args
[0:]: host
= args
[0]
790 if args
[1:]: selector
= args
[1]
794 h
.putrequest('GET', selector
)
796 status
, reason
, headers
= h
.getreply()
797 print 'status =', status
798 print 'reason =', reason
801 for header
in headers
.headers
: print string
.strip(header
)
803 print h
.getfile().read()
805 if hasattr(socket
, 'ssl'):
806 host
= 'sourceforge.net'
809 hs
.putrequest('GET', selector
)
811 status
, reason
, headers
= hs
.getreply()
812 print 'status =', status
813 print 'reason =', reason
816 for header
in headers
.headers
: print string
.strip(header
)
818 print hs
.getfile().read()
821 if __name__
== '__main__':