1 """HTTP/1.1 client library
3 <intro stuff goes here>
6 HTTPConnection go through a number of "states", which defines when a client
7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions:
20 | ( putheader() )* endheaders()
24 | response = getresponse()
26 Unread-response [Response-headers-read]
27 |\____________________
29 | response.read() | putrequest()
31 Idle Req-started-unread-response
34 response.read() | | ( putheader() )* endheaders()
36 Request-started Req-sent-unread-response
42 This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
48 Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
59 Logical State __state __response
60 ------------- ------- ----------
62 Request-started _CS_REQ_STARTED None
63 Request-sent _CS_REQ_SENT None
64 Unread-response _CS_IDLE <response_class>
65 Req-started-unread-response _CS_REQ_STARTED <response_class>
66 Req-sent-unread-response _CS_REQ_SENT <response_class>
73 from cStringIO
import StringIO
75 from StringIO
import StringIO
77 __all__
= ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
78 "HTTPException", "NotConnected", "UnknownProtocol",
79 "UnknownTransferEncoding", "IllegalKeywordArgument",
80 "UnimplementedFileMode", "IncompleteRead",
81 "ImproperConnectionState", "CannotSendRequest", "CannotSendHeader",
82 "ResponseNotReady", "BadStatusLine", "error"]
91 _CS_REQ_STARTED
= 'Request-started'
92 _CS_REQ_SENT
= 'Request-sent'
96 def __init__(self
, sock
, debuglevel
=0):
97 self
.fp
= sock
.makefile('rb', 0)
98 self
.debuglevel
= debuglevel
102 # from the Status-Line of the response
103 self
.version
= _UNKNOWN
# HTTP-Version
104 self
.status
= _UNKNOWN
# Status-Code
105 self
.reason
= _UNKNOWN
# Reason-Phrase
107 self
.chunked
= _UNKNOWN
# is "chunked" being used?
108 self
.chunk_left
= _UNKNOWN
# bytes left to read in current chunk
109 self
.length
= _UNKNOWN
# number of bytes left in response
110 self
.will_close
= _UNKNOWN
# conn will close at end of response
113 if self
.msg
is not None:
114 # we've already started reading the response
117 line
= self
.fp
.readline()
118 if self
.debuglevel
> 0:
119 print "reply:", repr(line
)
121 [version
, status
, reason
] = line
.split(None, 2)
124 [version
, status
] = line
.split(None, 1)
130 if version
[:5] != 'HTTP/':
132 raise BadStatusLine(line
)
134 # The status code is a three-digit number
136 self
.status
= status
= int(status
)
137 if status
< 100 or status
> 999:
138 raise BadStatusLine(line
)
140 raise BadStatusLine(line
)
141 self
.reason
= reason
.strip()
143 if version
== 'HTTP/1.0':
145 elif version
.startswith('HTTP/1.'):
146 self
.version
= 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
147 elif version
== 'HTTP/0.9':
150 raise UnknownProtocol(version
)
152 if self
.version
== 9:
153 self
.msg
= mimetools
.Message(StringIO())
156 self
.msg
= mimetools
.Message(self
.fp
, 0)
157 if self
.debuglevel
> 0:
158 for hdr
in self
.msg
.headers
:
159 print "header:", hdr
,
161 # don't let the msg keep an fp
164 # are we using the chunked-style of transfer encoding?
165 tr_enc
= self
.msg
.getheader('transfer-encoding')
167 if tr_enc
.lower() != 'chunked':
168 raise UnknownTransferEncoding()
170 self
.chunk_left
= None
174 # will the connection close at the end of the response?
175 conn
= self
.msg
.getheader('connection')
178 # a "Connection: close" will always close the connection. if we
179 # don't see that and this is not HTTP/1.1, then the connection will
180 # close unless we see a Keep-Alive header.
181 self
.will_close
= conn
.find('close') != -1 or \
182 ( self
.version
!= 11 and \
183 not self
.msg
.getheader('keep-alive') )
185 # for HTTP/1.1, the connection will always remain open
186 # otherwise, it will remain open IFF we see a Keep-Alive header
187 self
.will_close
= self
.version
!= 11 and \
188 not self
.msg
.getheader('keep-alive')
190 # do we have a Content-Length?
191 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
192 length
= self
.msg
.getheader('content-length')
193 if length
and not self
.chunked
:
195 self
.length
= int(length
)
201 # does the body have a fixed length? (of zero)
202 if (status
== 204 or # No Content
203 status
== 304 or # Not Modified
204 100 <= status
< 200): # 1xx codes
207 # if the connection remains open, and we aren't using chunked, and
208 # a content-length was not provided, then assume that the connection
210 if not self
.will_close
and \
211 not self
.chunked
and \
221 # NOTE: it is possible that we will not ever call self.close(). This
222 # case occurs when will_close is TRUE, length is None, and we
223 # read up to the last byte, but NOT past it.
225 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
226 # called, meaning self.isclosed() is meaningful.
227 return self
.fp
is None
229 def read(self
, amt
=None):
234 chunk_left
= self
.chunk_left
237 if chunk_left
is None:
238 line
= self
.fp
.readline()
241 line
= line
[:i
] # strip chunk-extensions
242 chunk_left
= int(line
, 16)
246 value
= value
+ self
._safe
_read
(chunk_left
)
247 elif amt
< chunk_left
:
248 value
= value
+ self
._safe
_read
(amt
)
249 self
.chunk_left
= chunk_left
- amt
251 elif amt
== chunk_left
:
252 value
= value
+ self
._safe
_read
(amt
)
253 self
._safe
_read
(2) # toss the CRLF at the end of the chunk
254 self
.chunk_left
= None
257 value
= value
+ self
._safe
_read
(chunk_left
)
258 amt
= amt
- chunk_left
260 # we read the whole chunk, get another
261 self
._safe
_read
(2) # toss the CRLF at the end of the chunk
264 # read and discard trailer up to the CRLF terminator
265 ### note: we shouldn't have any trailers!
267 line
= self
.fp
.readline()
271 # we read everything; close the "file"
281 s
= self
._safe
_read
(self
.length
)
282 self
.close() # we read everything
285 if self
.length
is not None:
286 if amt
> self
.length
:
287 # clip the read to the "end of response"
289 self
.length
= self
.length
- amt
291 # we do not use _safe_read() here because this may be a .will_close
292 # connection, and the user is reading more bytes than will be provided
293 # (for example, reading in 1k chunks)
294 s
= self
.fp
.read(amt
)
298 def _safe_read(self
, amt
):
299 """Read the number of bytes requested, compensating for partial reads.
301 Normally, we have a blocking socket, but a read() can be interrupted
302 by a signal (resulting in a partial read).
304 Note that we cannot distinguish between EOF and an interrupt when zero
305 bytes have been read. IncompleteRead() will be raised in this
308 This function should be used when <amt> bytes "should" be present for
309 reading. If the bytes are truly not available (due to EOF), then the
310 IncompleteRead exception can be used to detect the problem.
314 chunk
= self
.fp
.read(amt
)
316 raise IncompleteRead(s
)
318 amt
= amt
- len(chunk
)
321 def getheader(self
, name
, default
=None):
323 raise ResponseNotReady()
324 return self
.msg
.getheader(name
, default
)
327 class HTTPConnection
:
330 _http_vsn_str
= 'HTTP/1.1'
332 response_class
= HTTPResponse
333 default_port
= HTTP_PORT
337 def __init__(self
, host
, port
=None):
339 self
.__response
= None
340 self
.__state
= _CS_IDLE
342 self
._set
_hostport
(host
, port
)
344 def _set_hostport(self
, host
, port
):
348 port
= int(host
[i
+1:])
351 port
= self
.default_port
355 def set_debuglevel(self
, level
):
356 self
.debuglevel
= level
359 """Connect to the host and port specified in __init__."""
360 msg
= "getaddrinfo returns an empty list"
361 for res
in socket
.getaddrinfo(self
.host
, self
.port
, 0, socket
.SOCK_STREAM
):
362 af
, socktype
, proto
, canonname
, sa
= res
364 self
.sock
= socket
.socket(af
, socktype
, proto
)
365 if self
.debuglevel
> 0:
366 print "connect: (%s, %s)" % (self
.host
, self
.port
)
367 self
.sock
.connect(sa
)
368 except socket
.error
, msg
:
369 if self
.debuglevel
> 0:
370 print 'connect fail:', (self
.host
, self
.port
)
376 raise socket
.error
, msg
379 """Close the connection to the HTTP server."""
381 self
.sock
.close() # close it manually... there may be other refs
384 self
.__response
.close()
385 self
.__response
= None
386 self
.__state
= _CS_IDLE
389 """Send `str' to the server."""
390 if self
.sock
is None:
396 # send the data to the server. if we get a broken pipe, then close
397 # the socket. we want to reconnect when somebody tries to send again.
399 # NOTE: we DO propagate the error, though, because we cannot simply
400 # ignore the error... the caller will know if they can retry.
401 if self
.debuglevel
> 0:
402 print "send:", repr(str)
405 except socket
.error
, v
:
406 if v
[0] == 32: # Broken pipe
410 def putrequest(self
, method
, url
):
411 """Send a request to the server.
413 `method' specifies an HTTP request method, e.g. 'GET'.
414 `url' specifies the object being requested, e.g. '/index.html'.
417 # check if a prior response has been completed
418 if self
.__response
and self
.__response
.isclosed():
419 self
.__response
= None
422 # in certain cases, we cannot issue another request on this connection.
424 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
425 # 2) a response to a previous request has signalled that it is going
426 # to close the connection upon completion.
427 # 3) the headers for the previous response have not been read, thus
428 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
430 # if there is no prior response, then we can request at will.
432 # if point (2) is true, then we will have passed the socket to the
433 # response (effectively meaning, "there is no prior response"), and
434 # will open a new one when a new request is made.
436 # Note: if a prior response exists, then we *can* start a new request.
437 # We are not allowed to begin fetching the response to this new
438 # request, however, until that prior response is complete.
440 if self
.__state
== _CS_IDLE
:
441 self
.__state
= _CS_REQ_STARTED
443 raise CannotSendRequest()
447 str = '%s %s %s\r\n' % (method
, url
, self
._http
_vsn
_str
)
451 except socket
.error
, v
:
452 # trap 'Broken pipe' if we're allowed to automatically reconnect
453 if v
[0] != 32 or not self
.auto_open
:
455 # try one more time (the socket was closed; this will reopen)
458 if self
._http
_vsn
== 11:
459 # Issue some standard headers for better HTTP/1.1 compliance
461 # this header is issued *only* for HTTP/1.1 connections. more
462 # specifically, this means it is only issued when the client uses
463 # the new HTTPConnection() class. backwards-compat clients will
464 # be using HTTP/1.0 and those clients may be issuing this header
465 # themselves. we should NOT issue it twice; some web servers (such
466 # as Apache) barf when they see two Host: headers
468 # if we need a non-standard port,include it in the header
469 if self
.port
== HTTP_PORT
:
470 self
.putheader('Host', self
.host
)
472 self
.putheader('Host', "%s:%s" % (self
.host
, self
.port
))
474 # note: we are assuming that clients will not attempt to set these
475 # headers since *this* library must deal with the
476 # consequences. this also means that when the supporting
477 # libraries are updated to recognize other forms, then this
478 # code should be changed (removed or updated).
480 # we only want a Content-Encoding of "identity" since we don't
481 # support encodings such as x-gzip or x-deflate.
482 self
.putheader('Accept-Encoding', 'identity')
484 # we can accept "chunked" Transfer-Encodings, but no others
485 # NOTE: no TE header implies *only* "chunked"
486 #self.putheader('TE', 'chunked')
488 # if TE is supplied in the header, then it must appear in a
490 #self.putheader('Connection', 'TE')
493 # For HTTP/1.0, the server will assume "not chunked"
496 def putheader(self
, header
, value
):
497 """Send a request header line to the server.
499 For example: h.putheader('Accept', 'text/html')
501 if self
.__state
!= _CS_REQ_STARTED
:
502 raise CannotSendHeader()
504 str = '%s: %s\r\n' % (header
, value
)
507 def endheaders(self
):
508 """Indicate that the last header line has been sent to the server."""
510 if self
.__state
== _CS_REQ_STARTED
:
511 self
.__state
= _CS_REQ_SENT
513 raise CannotSendHeader()
517 def request(self
, method
, url
, body
=None, headers
={}):
518 """Send a complete request to the server."""
521 self
._send
_request
(method
, url
, body
, headers
)
522 except socket
.error
, v
:
523 # trap 'Broken pipe' if we're allowed to automatically reconnect
524 if v
[0] != 32 or not self
.auto_open
:
527 self
._send
_request
(method
, url
, body
, headers
)
529 def _send_request(self
, method
, url
, body
, headers
):
530 self
.putrequest(method
, url
)
533 self
.putheader('Content-Length', str(len(body
)))
534 for hdr
, value
in headers
.items():
535 self
.putheader(hdr
, value
)
541 def getresponse(self
):
542 "Get the response from the server."
544 # check if a prior response has been completed
545 if self
.__response
and self
.__response
.isclosed():
546 self
.__response
= None
549 # if a prior response exists, then it must be completed (otherwise, we
550 # cannot read this response's header to determine the connection-close
553 # note: if a prior response existed, but was connection-close, then the
554 # socket and response were made independent of this HTTPConnection
555 # object since a new request requires that we open a whole new
558 # this means the prior response had one of two states:
559 # 1) will_close: this connection was reset and the prior socket and
560 # response operate independently
561 # 2) persistent: the response was retained and we await its
562 # isclosed() status to become true.
564 if self
.__state
!= _CS_REQ_SENT
or self
.__response
:
565 raise ResponseNotReady()
567 if self
.debuglevel
> 0:
568 response
= self
.response_class(self
.sock
, self
.debuglevel
)
570 response
= self
.response_class(self
.sock
)
573 self
.__state
= _CS_IDLE
575 if response
.will_close
:
576 # this effectively passes the connection to the response
579 # remember this, so we can tell when it is complete
580 self
.__response
= response
586 def __init__(self
, sock
, ssl
):
590 def makefile(self
, mode
, bufsize
=None):
591 """Return a readable file-like object with data from socket.
593 This method offers only partial support for the makefile
594 interface of a real socket. It only supports modes 'r' and
595 'rb' and the bufsize argument is ignored.
597 The returned object contains *all* of the file data
599 if mode
!= 'r' and mode
!= 'rb':
600 raise UnimplementedFileMode()
605 buf
= self
.__ssl
.read()
606 except socket
.sslerror
, msg
:
611 return StringIO("".join(msgbuf
))
613 def send(self
, stuff
, flags
= 0):
614 return self
.__ssl
.write(stuff
)
616 def recv(self
, len = 1024, flags
= 0):
617 return self
.__ssl
.read(len)
619 def __getattr__(self
, attr
):
620 return getattr(self
.__sock
, attr
)
623 class HTTPSConnection(HTTPConnection
):
624 "This class allows communication via SSL."
626 default_port
= HTTPS_PORT
628 def __init__(self
, host
, port
=None, **x509
):
631 keys
.remove('key_file')
635 keys
.remove('cert_file')
639 raise IllegalKeywordArgument()
640 HTTPConnection
.__init
__(self
, host
, port
)
641 self
.key_file
= x509
.get('key_file')
642 self
.cert_file
= x509
.get('cert_file')
645 "Connect to a host on a given (SSL) port."
647 sock
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
648 sock
.connect((self
.host
, self
.port
))
650 if hasattr(sock
, "_sock"):
651 realsock
= sock
._sock
652 ssl
= socket
.ssl(realsock
, self
.key_file
, self
.cert_file
)
653 self
.sock
= FakeSocket(sock
, ssl
)
657 "Compatibility class with httplib.py from 1.5."
660 _http_vsn_str
= 'HTTP/1.0'
664 _connection_class
= HTTPConnection
666 def __init__(self
, host
='', port
=None):
667 "Provide a default host, since the superclass requires one."
669 # some joker passed 0 explicitly, meaning default port
673 # Note that we may pass an empty string as the host; this will throw
674 # an error when we attempt to connect. Presumably, the client code
675 # will call connect before then, with a proper host.
676 self
._setup
(self
._connection
_class
(host
, port
))
678 def _setup(self
, conn
):
681 # set up delegation to flesh out interface
682 self
.send
= conn
.send
683 self
.putrequest
= conn
.putrequest
684 self
.endheaders
= conn
.endheaders
685 self
.set_debuglevel
= conn
.set_debuglevel
687 conn
._http
_vsn
= self
._http
_vsn
688 conn
._http
_vsn
_str
= self
._http
_vsn
_str
692 def connect(self
, host
=None, port
=None):
693 "Accept arguments to set the host/port, since the superclass doesn't."
696 self
._conn
._set
_hostport
(host
, port
)
700 "Provide a getfile, since the superclass' does not use this concept."
703 def putheader(self
, header
, *values
):
704 "The superclass allows only one value argument."
705 self
._conn
.putheader(header
, '\r\n\t'.join(values
))
708 """Compat definition since superclass does not define it.
710 Returns a tuple consisting of:
711 - server status code (e.g. '200' if all goes well)
712 - server "reason" corresponding to status code
713 - any RFC822 headers in the response from the server
716 response
= self
._conn
.getresponse()
717 except BadStatusLine
, e
:
718 ### hmm. if getresponse() ever closes the socket on a bad request,
719 ### then we are going to have problems with self.sock
721 ### should we keep this behavior? do people use it?
722 # keep the socket open (as a file), and return it
723 self
.file = self
._conn
.sock
.makefile('rb', 0)
725 # close our socket -- we want to restart after any protocol error
729 return -1, e
.line
, None
731 self
.headers
= response
.msg
732 self
.file = response
.fp
733 return response
.status
, response
.reason
, response
.msg
738 # note that self.file == response.fp, which gets closed by the
739 # superclass. just clear the object ref here.
740 ### hmm. messy. if status==-1, then self.file is owned by us.
741 ### well... we aren't explicitly closing, but losing this ref will
745 if hasattr(socket
, 'ssl'):
747 """Compatibility with 1.5 httplib interface
749 Python 1.5.2 did not have an HTTPS class, but it defined an
750 interface for sending http requests that is also useful for
754 _connection_class
= HTTPSConnection
756 def __init__(self
, host
='', port
=None, **x509
):
757 # provide a default host, pass the X509 cert info
759 # urf. compensate for bad input.
762 self
._setup
(self
._connection
_class
(host
, port
, **x509
))
764 # we never actually use these for anything, but we keep them
765 # here for compatibility with post-1.5.2 CVS.
766 self
.key_file
= x509
.get('key_file')
767 self
.cert_file
= x509
.get('cert_file')
770 class HTTPException(Exception):
773 class NotConnected(HTTPException
):
776 class UnknownProtocol(HTTPException
):
777 def __init__(self
, version
):
778 self
.version
= version
780 class UnknownTransferEncoding(HTTPException
):
783 class IllegalKeywordArgument(HTTPException
):
786 class UnimplementedFileMode(HTTPException
):
789 class IncompleteRead(HTTPException
):
790 def __init__(self
, partial
):
791 self
.partial
= partial
793 class ImproperConnectionState(HTTPException
):
796 class CannotSendRequest(ImproperConnectionState
):
799 class CannotSendHeader(ImproperConnectionState
):
802 class ResponseNotReady(ImproperConnectionState
):
805 class BadStatusLine(HTTPException
):
806 def __init__(self
, line
):
809 # for backwards compatibility
810 error
= HTTPException
814 # snarfed from httplib.py for now...
819 The test consists of retrieving and displaying the Python
820 home page, along with the error code and error string returned
821 by the www.python.org server.
826 opts
, args
= getopt
.getopt(sys
.argv
[1:], 'd')
829 if o
== '-d': dl
= dl
+ 1
830 host
= 'www.python.org'
832 if args
[0:]: host
= args
[0]
833 if args
[1:]: selector
= args
[1]
837 h
.putrequest('GET', selector
)
839 status
, reason
, headers
= h
.getreply()
840 print 'status =', status
841 print 'reason =', reason
844 for header
in headers
.headers
: print header
.strip()
846 print h
.getfile().read()
848 if hasattr(socket
, 'ssl'):
849 host
= 'sourceforge.net'
850 selector
= '/projects/python'
853 hs
.putrequest('GET', selector
)
855 status
, reason
, headers
= hs
.getreply()
856 print 'status =', status
857 print 'reason =', reason
860 for header
in headers
.headers
: print header
.strip()
862 print hs
.getfile().read()
865 if __name__
== '__main__':