1 """HTTP/1.1 client library
3 <intro stuff goes here>
6 HTTPConnection go through a number of "states", which defines when a client
7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions:
20 | ( putheader() )* endheaders()
24 | response = getresponse()
26 Unread-response [Response-headers-read]
27 |\____________________
29 | response.read() | putrequest()
31 Idle Req-started-unread-response
34 response.read() | | ( putheader() )* endheaders()
36 Request-started Req-sent-unread-response
42 This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
48 Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
59 Logical State __state __response
60 ------------- ------- ----------
62 Request-started _CS_REQ_STARTED None
63 Request-sent _CS_REQ_SENT None
64 Unread-response _CS_IDLE <response_class>
65 Req-started-unread-response _CS_REQ_STARTED <response_class>
66 Req-sent-unread-response _CS_REQ_SENT <response_class>
74 from cStringIO
import StringIO
76 from StringIO
import StringIO
78 __all__
= ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
79 "HTTPException", "NotConnected", "UnknownProtocol",
80 "UnknownTransferEncoding", "IllegalKeywordArgument",
81 "UnimplementedFileMode", "IncompleteRead",
82 "ImproperConnectionState", "CannotSendRequest", "CannotSendHeader",
83 "ResponseNotReady", "BadStatusLine", "error"]
92 _CS_REQ_STARTED
= 'Request-started'
93 _CS_REQ_SENT
= 'Request-sent'
97 def __init__(self
, sock
, debuglevel
=0):
98 self
.fp
= sock
.makefile('rb', 0)
99 self
.debuglevel
= debuglevel
103 # from the Status-Line of the response
104 self
.version
= _UNKNOWN
# HTTP-Version
105 self
.status
= _UNKNOWN
# Status-Code
106 self
.reason
= _UNKNOWN
# Reason-Phrase
108 self
.chunked
= _UNKNOWN
# is "chunked" being used?
109 self
.chunk_left
= _UNKNOWN
# bytes left to read in current chunk
110 self
.length
= _UNKNOWN
# number of bytes left in response
111 self
.will_close
= _UNKNOWN
# conn will close at end of response
114 if self
.msg
is not None:
115 # we've already started reading the response
118 line
= self
.fp
.readline()
119 if self
.debuglevel
> 0:
120 print "reply:", repr(line
)
122 [version
, status
, reason
] = line
.split(None, 2)
125 [version
, status
] = line
.split(None, 1)
131 if version
[:5] != 'HTTP/':
133 raise BadStatusLine(line
)
135 # The status code is a three-digit number
137 self
.status
= status
= int(status
)
138 if status
< 100 or status
> 999:
139 raise BadStatusLine(line
)
141 raise BadStatusLine(line
)
142 self
.reason
= reason
.strip()
144 if version
== 'HTTP/1.0':
146 elif version
.startswith('HTTP/1.'):
147 self
.version
= 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
148 elif version
== 'HTTP/0.9':
151 raise UnknownProtocol(version
)
153 if self
.version
== 9:
154 self
.msg
= mimetools
.Message(StringIO())
157 self
.msg
= mimetools
.Message(self
.fp
, 0)
158 if self
.debuglevel
> 0:
159 for hdr
in self
.msg
.headers
:
160 print "header:", hdr
,
162 # don't let the msg keep an fp
165 # are we using the chunked-style of transfer encoding?
166 tr_enc
= self
.msg
.getheader('transfer-encoding')
168 if tr_enc
.lower() != 'chunked':
169 raise UnknownTransferEncoding()
171 self
.chunk_left
= None
175 # will the connection close at the end of the response?
176 conn
= self
.msg
.getheader('connection')
179 # a "Connection: close" will always close the connection. if we
180 # don't see that and this is not HTTP/1.1, then the connection will
181 # close unless we see a Keep-Alive header.
182 self
.will_close
= conn
.find('close') != -1 or \
183 ( self
.version
!= 11 and \
184 not self
.msg
.getheader('keep-alive') )
186 # for HTTP/1.1, the connection will always remain open
187 # otherwise, it will remain open IFF we see a Keep-Alive header
188 self
.will_close
= self
.version
!= 11 and \
189 not self
.msg
.getheader('keep-alive')
191 # do we have a Content-Length?
192 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
193 length
= self
.msg
.getheader('content-length')
194 if length
and not self
.chunked
:
196 self
.length
= int(length
)
202 # does the body have a fixed length? (of zero)
203 if (status
== 204 or # No Content
204 status
== 304 or # Not Modified
205 100 <= status
< 200): # 1xx codes
208 # if the connection remains open, and we aren't using chunked, and
209 # a content-length was not provided, then assume that the connection
211 if not self
.will_close
and \
212 not self
.chunked
and \
222 # NOTE: it is possible that we will not ever call self.close(). This
223 # case occurs when will_close is TRUE, length is None, and we
224 # read up to the last byte, but NOT past it.
226 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
227 # called, meaning self.isclosed() is meaningful.
228 return self
.fp
is None
230 def read(self
, amt
=None):
235 chunk_left
= self
.chunk_left
238 if chunk_left
is None:
239 line
= self
.fp
.readline()
242 line
= line
[:i
] # strip chunk-extensions
243 chunk_left
= int(line
, 16)
247 value
= value
+ self
._safe
_read
(chunk_left
)
248 elif amt
< chunk_left
:
249 value
= value
+ self
._safe
_read
(amt
)
250 self
.chunk_left
= chunk_left
- amt
252 elif amt
== chunk_left
:
253 value
= value
+ self
._safe
_read
(amt
)
254 self
._safe
_read
(2) # toss the CRLF at the end of the chunk
255 self
.chunk_left
= None
258 value
= value
+ self
._safe
_read
(chunk_left
)
259 amt
= amt
- chunk_left
261 # we read the whole chunk, get another
262 self
._safe
_read
(2) # toss the CRLF at the end of the chunk
265 # read and discard trailer up to the CRLF terminator
266 ### note: we shouldn't have any trailers!
268 line
= self
.fp
.readline()
272 # we read everything; close the "file"
282 s
= self
._safe
_read
(self
.length
)
283 self
.close() # we read everything
286 if self
.length
is not None:
287 if amt
> self
.length
:
288 # clip the read to the "end of response"
290 self
.length
= self
.length
- amt
292 # we do not use _safe_read() here because this may be a .will_close
293 # connection, and the user is reading more bytes than will be provided
294 # (for example, reading in 1k chunks)
295 s
= self
.fp
.read(amt
)
299 def _safe_read(self
, amt
):
300 """Read the number of bytes requested, compensating for partial reads.
302 Normally, we have a blocking socket, but a read() can be interrupted
303 by a signal (resulting in a partial read).
305 Note that we cannot distinguish between EOF and an interrupt when zero
306 bytes have been read. IncompleteRead() will be raised in this
309 This function should be used when <amt> bytes "should" be present for
310 reading. If the bytes are truly not available (due to EOF), then the
311 IncompleteRead exception can be used to detect the problem.
315 chunk
= self
.fp
.read(amt
)
317 raise IncompleteRead(s
)
319 amt
= amt
- len(chunk
)
322 def getheader(self
, name
, default
=None):
324 raise ResponseNotReady()
325 return self
.msg
.getheader(name
, default
)
328 class HTTPConnection
:
331 _http_vsn_str
= 'HTTP/1.1'
333 response_class
= HTTPResponse
334 default_port
= HTTP_PORT
338 def __init__(self
, host
, port
=None):
340 self
.__response
= None
341 self
.__state
= _CS_IDLE
343 self
._set
_hostport
(host
, port
)
345 def _set_hostport(self
, host
, port
):
349 port
= int(host
[i
+1:])
352 port
= self
.default_port
356 def set_debuglevel(self
, level
):
357 self
.debuglevel
= level
360 """Connect to the host and port specified in __init__."""
361 msg
= "getaddrinfo returns an empty list"
362 for res
in socket
.getaddrinfo(self
.host
, self
.port
, 0, socket
.SOCK_STREAM
):
363 af
, socktype
, proto
, canonname
, sa
= res
365 self
.sock
= socket
.socket(af
, socktype
, proto
)
366 if self
.debuglevel
> 0:
367 print "connect: (%s, %s)" % (self
.host
, self
.port
)
368 self
.sock
.connect(sa
)
369 except socket
.error
, msg
:
370 if self
.debuglevel
> 0:
371 print 'connect fail:', (self
.host
, self
.port
)
378 raise socket
.error
, msg
381 """Close the connection to the HTTP server."""
383 self
.sock
.close() # close it manually... there may be other refs
386 self
.__response
.close()
387 self
.__response
= None
388 self
.__state
= _CS_IDLE
391 """Send `str' to the server."""
392 if self
.sock
is None:
398 # send the data to the server. if we get a broken pipe, then close
399 # the socket. we want to reconnect when somebody tries to send again.
401 # NOTE: we DO propagate the error, though, because we cannot simply
402 # ignore the error... the caller will know if they can retry.
403 if self
.debuglevel
> 0:
404 print "send:", repr(str)
407 except socket
.error
, v
:
408 if v
[0] == 32: # Broken pipe
412 def putrequest(self
, method
, url
):
413 """Send a request to the server.
415 `method' specifies an HTTP request method, e.g. 'GET'.
416 `url' specifies the object being requested, e.g. '/index.html'.
419 # check if a prior response has been completed
420 if self
.__response
and self
.__response
.isclosed():
421 self
.__response
= None
424 # in certain cases, we cannot issue another request on this connection.
426 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
427 # 2) a response to a previous request has signalled that it is going
428 # to close the connection upon completion.
429 # 3) the headers for the previous response have not been read, thus
430 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
432 # if there is no prior response, then we can request at will.
434 # if point (2) is true, then we will have passed the socket to the
435 # response (effectively meaning, "there is no prior response"), and
436 # will open a new one when a new request is made.
438 # Note: if a prior response exists, then we *can* start a new request.
439 # We are not allowed to begin fetching the response to this new
440 # request, however, until that prior response is complete.
442 if self
.__state
== _CS_IDLE
:
443 self
.__state
= _CS_REQ_STARTED
445 raise CannotSendRequest()
449 str = '%s %s %s\r\n' % (method
, url
, self
._http
_vsn
_str
)
453 except socket
.error
, v
:
454 # trap 'Broken pipe' if we're allowed to automatically reconnect
455 if v
[0] != 32 or not self
.auto_open
:
457 # try one more time (the socket was closed; this will reopen)
460 if self
._http
_vsn
== 11:
461 # Issue some standard headers for better HTTP/1.1 compliance
463 # this header is issued *only* for HTTP/1.1 connections. more
464 # specifically, this means it is only issued when the client uses
465 # the new HTTPConnection() class. backwards-compat clients will
466 # be using HTTP/1.0 and those clients may be issuing this header
467 # themselves. we should NOT issue it twice; some web servers (such
468 # as Apache) barf when they see two Host: headers
470 # if we need a non-standard port,include it in the header
471 if self
.port
== HTTP_PORT
:
472 self
.putheader('Host', self
.host
)
474 self
.putheader('Host', "%s:%s" % (self
.host
, self
.port
))
476 # note: we are assuming that clients will not attempt to set these
477 # headers since *this* library must deal with the
478 # consequences. this also means that when the supporting
479 # libraries are updated to recognize other forms, then this
480 # code should be changed (removed or updated).
482 # we only want a Content-Encoding of "identity" since we don't
483 # support encodings such as x-gzip or x-deflate.
484 self
.putheader('Accept-Encoding', 'identity')
486 # we can accept "chunked" Transfer-Encodings, but no others
487 # NOTE: no TE header implies *only* "chunked"
488 #self.putheader('TE', 'chunked')
490 # if TE is supplied in the header, then it must appear in a
492 #self.putheader('Connection', 'TE')
495 # For HTTP/1.0, the server will assume "not chunked"
498 def putheader(self
, header
, value
):
499 """Send a request header line to the server.
501 For example: h.putheader('Accept', 'text/html')
503 if self
.__state
!= _CS_REQ_STARTED
:
504 raise CannotSendHeader()
506 str = '%s: %s\r\n' % (header
, value
)
509 def endheaders(self
):
510 """Indicate that the last header line has been sent to the server."""
512 if self
.__state
== _CS_REQ_STARTED
:
513 self
.__state
= _CS_REQ_SENT
515 raise CannotSendHeader()
519 def request(self
, method
, url
, body
=None, headers
={}):
520 """Send a complete request to the server."""
523 self
._send
_request
(method
, url
, body
, headers
)
524 except socket
.error
, v
:
525 # trap 'Broken pipe' if we're allowed to automatically reconnect
526 if v
[0] != 32 or not self
.auto_open
:
529 self
._send
_request
(method
, url
, body
, headers
)
531 def _send_request(self
, method
, url
, body
, headers
):
532 self
.putrequest(method
, url
)
535 self
.putheader('Content-Length', str(len(body
)))
536 for hdr
, value
in headers
.items():
537 self
.putheader(hdr
, value
)
543 def getresponse(self
):
544 "Get the response from the server."
546 # check if a prior response has been completed
547 if self
.__response
and self
.__response
.isclosed():
548 self
.__response
= None
551 # if a prior response exists, then it must be completed (otherwise, we
552 # cannot read this response's header to determine the connection-close
555 # note: if a prior response existed, but was connection-close, then the
556 # socket and response were made independent of this HTTPConnection
557 # object since a new request requires that we open a whole new
560 # this means the prior response had one of two states:
561 # 1) will_close: this connection was reset and the prior socket and
562 # response operate independently
563 # 2) persistent: the response was retained and we await its
564 # isclosed() status to become true.
566 if self
.__state
!= _CS_REQ_SENT
or self
.__response
:
567 raise ResponseNotReady()
569 if self
.debuglevel
> 0:
570 response
= self
.response_class(self
.sock
, self
.debuglevel
)
572 response
= self
.response_class(self
.sock
)
575 self
.__state
= _CS_IDLE
577 if response
.will_close
:
578 # this effectively passes the connection to the response
581 # remember this, so we can tell when it is complete
582 self
.__response
= response
588 def __init__(self
, sock
, ssl
):
592 def makefile(self
, mode
, bufsize
=None):
593 """Return a readable file-like object with data from socket.
595 This method offers only partial support for the makefile
596 interface of a real socket. It only supports modes 'r' and
597 'rb' and the bufsize argument is ignored.
599 The returned object contains *all* of the file data
601 if mode
!= 'r' and mode
!= 'rb':
602 raise UnimplementedFileMode()
607 buf
= self
.__ssl
.read()
608 except socket
.sslerror
, err
:
609 if (err
[0] == socket
.SSL_ERROR_WANT_READ
610 or err
[0] == socket
.SSL_ERROR_WANT_WRITE
613 if err
[0] == socket
.SSL_ERROR_ZERO_RETURN
:
616 except socket
.error
, err
:
617 if err
[0] == errno
.EINTR
:
623 return StringIO("".join(msgbuf
))
625 def send(self
, stuff
, flags
= 0):
626 return self
.__ssl
.write(stuff
)
628 def recv(self
, len = 1024, flags
= 0):
629 return self
.__ssl
.read(len)
631 def __getattr__(self
, attr
):
632 return getattr(self
.__sock
, attr
)
635 class HTTPSConnection(HTTPConnection
):
636 "This class allows communication via SSL."
638 default_port
= HTTPS_PORT
640 def __init__(self
, host
, port
=None, **x509
):
643 keys
.remove('key_file')
647 keys
.remove('cert_file')
651 raise IllegalKeywordArgument()
652 HTTPConnection
.__init
__(self
, host
, port
)
653 self
.key_file
= x509
.get('key_file')
654 self
.cert_file
= x509
.get('cert_file')
657 "Connect to a host on a given (SSL) port."
659 sock
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
660 sock
.connect((self
.host
, self
.port
))
662 if hasattr(sock
, "_sock"):
663 realsock
= sock
._sock
664 ssl
= socket
.ssl(realsock
, self
.key_file
, self
.cert_file
)
665 self
.sock
= FakeSocket(sock
, ssl
)
669 "Compatibility class with httplib.py from 1.5."
672 _http_vsn_str
= 'HTTP/1.0'
676 _connection_class
= HTTPConnection
678 def __init__(self
, host
='', port
=None):
679 "Provide a default host, since the superclass requires one."
681 # some joker passed 0 explicitly, meaning default port
685 # Note that we may pass an empty string as the host; this will throw
686 # an error when we attempt to connect. Presumably, the client code
687 # will call connect before then, with a proper host.
688 self
._setup
(self
._connection
_class
(host
, port
))
690 def _setup(self
, conn
):
693 # set up delegation to flesh out interface
694 self
.send
= conn
.send
695 self
.putrequest
= conn
.putrequest
696 self
.endheaders
= conn
.endheaders
697 self
.set_debuglevel
= conn
.set_debuglevel
699 conn
._http
_vsn
= self
._http
_vsn
700 conn
._http
_vsn
_str
= self
._http
_vsn
_str
704 def connect(self
, host
=None, port
=None):
705 "Accept arguments to set the host/port, since the superclass doesn't."
708 self
._conn
._set
_hostport
(host
, port
)
712 "Provide a getfile, since the superclass' does not use this concept."
715 def putheader(self
, header
, *values
):
716 "The superclass allows only one value argument."
717 self
._conn
.putheader(header
, '\r\n\t'.join(values
))
720 """Compat definition since superclass does not define it.
722 Returns a tuple consisting of:
723 - server status code (e.g. '200' if all goes well)
724 - server "reason" corresponding to status code
725 - any RFC822 headers in the response from the server
728 response
= self
._conn
.getresponse()
729 except BadStatusLine
, e
:
730 ### hmm. if getresponse() ever closes the socket on a bad request,
731 ### then we are going to have problems with self.sock
733 ### should we keep this behavior? do people use it?
734 # keep the socket open (as a file), and return it
735 self
.file = self
._conn
.sock
.makefile('rb', 0)
737 # close our socket -- we want to restart after any protocol error
741 return -1, e
.line
, None
743 self
.headers
= response
.msg
744 self
.file = response
.fp
745 return response
.status
, response
.reason
, response
.msg
750 # note that self.file == response.fp, which gets closed by the
751 # superclass. just clear the object ref here.
752 ### hmm. messy. if status==-1, then self.file is owned by us.
753 ### well... we aren't explicitly closing, but losing this ref will
757 if hasattr(socket
, 'ssl'):
759 """Compatibility with 1.5 httplib interface
761 Python 1.5.2 did not have an HTTPS class, but it defined an
762 interface for sending http requests that is also useful for
766 _connection_class
= HTTPSConnection
768 def __init__(self
, host
='', port
=None, **x509
):
769 # provide a default host, pass the X509 cert info
771 # urf. compensate for bad input.
774 self
._setup
(self
._connection
_class
(host
, port
, **x509
))
776 # we never actually use these for anything, but we keep them
777 # here for compatibility with post-1.5.2 CVS.
778 self
.key_file
= x509
.get('key_file')
779 self
.cert_file
= x509
.get('cert_file')
782 class HTTPException(Exception):
785 class NotConnected(HTTPException
):
788 class UnknownProtocol(HTTPException
):
789 def __init__(self
, version
):
790 self
.version
= version
792 class UnknownTransferEncoding(HTTPException
):
795 class IllegalKeywordArgument(HTTPException
):
798 class UnimplementedFileMode(HTTPException
):
801 class IncompleteRead(HTTPException
):
802 def __init__(self
, partial
):
803 self
.partial
= partial
805 class ImproperConnectionState(HTTPException
):
808 class CannotSendRequest(ImproperConnectionState
):
811 class CannotSendHeader(ImproperConnectionState
):
814 class ResponseNotReady(ImproperConnectionState
):
817 class BadStatusLine(HTTPException
):
818 def __init__(self
, line
):
821 # for backwards compatibility
822 error
= HTTPException
826 # snarfed from httplib.py for now...
831 The test consists of retrieving and displaying the Python
832 home page, along with the error code and error string returned
833 by the www.python.org server.
838 opts
, args
= getopt
.getopt(sys
.argv
[1:], 'd')
841 if o
== '-d': dl
= dl
+ 1
842 host
= 'www.python.org'
844 if args
[0:]: host
= args
[0]
845 if args
[1:]: selector
= args
[1]
849 h
.putrequest('GET', selector
)
851 status
, reason
, headers
= h
.getreply()
852 print 'status =', status
853 print 'reason =', reason
856 for header
in headers
.headers
: print header
.strip()
858 print h
.getfile().read()
860 if hasattr(socket
, 'ssl'):
861 host
= 'sourceforge.net'
862 selector
= '/projects/python'
865 hs
.putrequest('GET', selector
)
867 status
, reason
, headers
= hs
.getreply()
868 print 'status =', status
869 print 'reason =', reason
872 for header
in headers
.headers
: print header
.strip()
874 print hs
.getfile().read()
877 if __name__
== '__main__':