1 """HTTP/1.1 client library
3 <intro stuff goes here>
6 HTTPConnection go through a number of "states", which defines when a client
7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions:
20 | ( putheader() )* endheaders()
24 | response = getresponse()
26 Unread-response [Response-headers-read]
27 |\____________________
29 | response.read() | putrequest()
31 Idle Req-started-unread-response
34 response.read() | | ( putheader() )* endheaders()
36 Request-started Req-sent-unread-response
42 This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
48 Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
59 Logical State __state __response
60 ------------- ------- ----------
62 Request-started _CS_REQ_STARTED None
63 Request-sent _CS_REQ_SENT None
64 Unread-response _CS_IDLE <response_class>
65 Req-started-unread-response _CS_REQ_STARTED <response_class>
66 Req-sent-unread-response _CS_REQ_SENT <response_class>
74 from cStringIO
import StringIO
76 from StringIO
import StringIO
85 _CS_REQ_STARTED
= 'Request-started'
86 _CS_REQ_SENT
= 'Request-sent'
90 def __init__(self
, sock
):
91 self
.fp
= sock
.makefile('rb', 0)
95 # from the Status-Line of the response
96 self
.version
= _UNKNOWN
# HTTP-Version
97 self
.status
= _UNKNOWN
# Status-Code
98 self
.reason
= _UNKNOWN
# Reason-Phrase
100 self
.chunked
= _UNKNOWN
# is "chunked" being used?
101 self
.chunk_left
= _UNKNOWN
# bytes left to read in current chunk
102 self
.length
= _UNKNOWN
# number of bytes left in response
103 self
.will_close
= _UNKNOWN
# conn will close at end of response
106 if self
.msg
is not None:
107 # we've already started reading the response
110 line
= self
.fp
.readline()
112 [version
, status
, reason
] = string
.split(line
, None, 2)
115 [version
, status
] = string
.split(line
, None, 1)
119 raise BadStatusLine(line
)
120 if version
[:5] != 'HTTP/':
122 raise BadStatusLine(line
)
124 self
.status
= status
= int(status
)
125 self
.reason
= string
.strip(reason
)
127 if version
== 'HTTP/1.0':
129 elif version
[:7] == 'HTTP/1.':
130 self
.version
= 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
132 raise UnknownProtocol(version
)
134 self
.msg
= mimetools
.Message(self
.fp
, 0)
136 # don't let the msg keep an fp
139 # are we using the chunked-style of transfer encoding?
140 tr_enc
= self
.msg
.getheader('transfer-encoding')
142 if string
.lower(tr_enc
) != 'chunked':
143 raise UnknownTransferEncoding()
145 self
.chunk_left
= None
149 # will the connection close at the end of the response?
150 conn
= self
.msg
.getheader('connection')
152 conn
= string
.lower(conn
)
153 # a "Connection: close" will always close the connection. if we
154 # don't see that and this is not HTTP/1.1, then the connection will
155 # close unless we see a Keep-Alive header.
156 self
.will_close
= string
.find(conn
, 'close') != -1 or \
157 ( self
.version
!= 11 and \
158 not self
.msg
.getheader('keep-alive') )
160 # for HTTP/1.1, the connection will always remain open
161 # otherwise, it will remain open IFF we see a Keep-Alive header
162 self
.will_close
= self
.version
!= 11 and \
163 not self
.msg
.getheader('keep-alive')
165 # do we have a Content-Length?
166 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
167 length
= self
.msg
.getheader('content-length')
168 if length
and not self
.chunked
:
169 self
.length
= int(length
)
173 # does the body have a fixed length? (of zero)
174 if (status
== 204 or # No Content
175 status
== 304 or # Not Modified
176 100 <= status
< 200): # 1xx codes
179 # if the connection remains open, and we aren't using chunked, and
180 # a content-length was not provided, then assume that the connection
182 if not self
.will_close
and \
183 not self
.chunked
and \
187 # if there is no body, then close NOW. read() may never be called, thus
188 # we will never mark self as closed.
198 # NOTE: it is possible that we will not ever call self.close(). This
199 # case occurs when will_close is TRUE, length is None, and we
200 # read up to the last byte, but NOT past it.
202 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
203 # called, meaning self.isclosed() is meaningful.
204 return self
.fp
is None
206 def read(self
, amt
=None):
211 chunk_left
= self
.chunk_left
214 if chunk_left
is None:
215 line
= self
.fp
.readline()
216 i
= string
.find(line
, ';')
218 line
= line
[:i
] # strip chunk-extensions
219 chunk_left
= string
.atoi(line
, 16)
223 value
= value
+ self
._safe
_read
(chunk_left
)
224 elif amt
< chunk_left
:
225 value
= value
+ self
._safe
_read
(amt
)
226 self
.chunk_left
= chunk_left
- amt
228 elif amt
== chunk_left
:
229 value
= value
+ self
._safe
_read
(amt
)
230 self
._safe
_read
(2) # toss the CRLF at the end of the chunk
231 self
.chunk_left
= None
234 value
= value
+ self
._safe
_read
(chunk_left
)
235 amt
= amt
- chunk_left
237 # we read the whole chunk, get another
238 self
._safe
_read
(2) # toss the CRLF at the end of the chunk
241 # read and discard trailer up to the CRLF terminator
242 ### note: we shouldn't have any trailers!
244 line
= self
.fp
.readline()
248 # we read everything; close the "file"
258 s
= self
._safe
_read
(self
.length
)
259 self
.close() # we read everything
262 if self
.length
is not None:
263 if amt
> self
.length
:
264 # clip the read to the "end of response"
266 self
.length
= self
.length
- amt
268 # we do not use _safe_read() here because this may be a .will_close
269 # connection, and the user is reading more bytes than will be provided
270 # (for example, reading in 1k chunks)
271 s
= self
.fp
.read(amt
)
273 # close our "file" if we know we should
274 ### I'm not sure about the len(s) < amt part; we should be safe because
275 ### we shouldn't be using non-blocking sockets
276 if self
.length
== 0 or len(s
) < amt
:
281 def _safe_read(self
, amt
):
282 """Read the number of bytes requested, compensating for partial reads.
284 Normally, we have a blocking socket, but a read() can be interrupted
285 by a signal (resulting in a partial read).
287 Note that we cannot distinguish between EOF and an interrupt when zero
288 bytes have been read. IncompleteRead() will be raised in this
291 This function should be used when <amt> bytes "should" be present for
292 reading. If the bytes are truly not available (due to EOF), then the
293 IncompleteRead exception can be used to detect the problem.
297 chunk
= self
.fp
.read(amt
)
299 raise IncompleteRead(s
)
301 amt
= amt
- len(chunk
)
304 def getheader(self
, name
, default
=None):
306 raise ResponseNotReady()
307 return self
.msg
.getheader(name
, default
)
310 class HTTPConnection
:
313 _http_vsn_str
= 'HTTP/1.1'
315 response_class
= HTTPResponse
316 default_port
= HTTP_PORT
319 def __init__(self
, host
, port
=None):
321 self
.__response
= None
322 self
.__state
= _CS_IDLE
324 self
._set
_hostport
(host
, port
)
326 def _set_hostport(self
, host
, port
):
328 i
= string
.find(host
, ':')
330 port
= int(host
[i
+1:])
333 port
= self
.default_port
338 """Connect to the host and port specified in __init__."""
339 self
.sock
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
340 self
.sock
.connect((self
.host
, self
.port
))
343 """Close the connection to the HTTP server."""
345 self
.sock
.close() # close it manually... there may be other refs
348 self
.__response
.close()
349 self
.__response
= None
350 self
.__state
= _CS_IDLE
353 """Send `str' to the server."""
354 if self
.sock
is None:
360 # send the data to the server. if we get a broken pipe, then close
361 # the socket. we want to reconnect when somebody tries to send again.
363 # NOTE: we DO propagate the error, though, because we cannot simply
364 # ignore the error... the caller will know if they can retry.
367 except socket
.error
, v
:
368 if v
[0] == 32: # Broken pipe
372 def putrequest(self
, method
, url
):
373 """Send a request to the server.
375 `method' specifies an HTTP request method, e.g. 'GET'.
376 `url' specifies the object being requested, e.g. '/index.html'.
379 # check if a prior response has been completed
380 if self
.__response
and self
.__response
.isclosed():
381 self
.__response
= None
384 # in certain cases, we cannot issue another request on this connection.
386 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
387 # 2) a response to a previous request has signalled that it is going
388 # to close the connection upon completion.
389 # 3) the headers for the previous response have not been read, thus
390 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
392 # if there is no prior response, then we can request at will.
394 # if point (2) is true, then we will have passed the socket to the
395 # response (effectively meaning, "there is no prior response"), and
396 # will open a new one when a new request is made.
398 # Note: if a prior response exists, then we *can* start a new request.
399 # We are not allowed to begin fetching the response to this new
400 # request, however, until that prior response is complete.
402 if self
.__state
== _CS_IDLE
:
403 self
.__state
= _CS_REQ_STARTED
405 raise CannotSendRequest()
409 str = '%s %s %s\r\n' % (method
, url
, self
._http
_vsn
_str
)
413 except socket
.error
, v
:
414 # trap 'Broken pipe' if we're allowed to automatically reconnect
415 if v
[0] != 32 or not self
.auto_open
:
417 # try one more time (the socket was closed; this will reopen)
420 if self
._http
_vsn
== 11:
421 # Issue some standard headers for better HTTP/1.1 compliance
423 # this header is issued *only* for HTTP/1.1 connections. more
424 # specifically, this means it is only issued when the client uses
425 # the new HTTPConnection() class. backwards-compat clients will
426 # be using HTTP/1.0 and those clients may be issuing this header
427 # themselves. we should NOT issue it twice; some web servers (such
428 # as Apache) barf when they see two Host: headers
429 self
.putheader('Host', self
.host
)
431 # note: we are assuming that clients will not attempt to set these
432 # headers since *this* library must deal with the
433 # consequences. this also means that when the supporting
434 # libraries are updated to recognize other forms, then this
435 # code should be changed (removed or updated).
437 # we only want a Content-Encoding of "identity" since we don't
438 # support encodings such as x-gzip or x-deflate.
439 self
.putheader('Accept-Encoding', 'identity')
441 # we can accept "chunked" Transfer-Encodings, but no others
442 # NOTE: no TE header implies *only* "chunked"
443 #self.putheader('TE', 'chunked')
445 # if TE is supplied in the header, then it must appear in a
447 #self.putheader('Connection', 'TE')
450 # For HTTP/1.0, the server will assume "not chunked"
453 def putheader(self
, header
, value
):
454 """Send a request header line to the server.
456 For example: h.putheader('Accept', 'text/html')
458 if self
.__state
!= _CS_REQ_STARTED
:
459 raise CannotSendHeader()
461 str = '%s: %s\r\n' % (header
, value
)
464 def endheaders(self
):
465 """Indicate that the last header line has been sent to the server."""
467 if self
.__state
== _CS_REQ_STARTED
:
468 self
.__state
= _CS_REQ_SENT
470 raise CannotSendHeader()
474 def request(self
, method
, url
, body
=None, headers
={}):
475 """Send a complete request to the server."""
478 self
._send
_request
(method
, url
, body
, headers
)
479 except socket
.error
, v
:
480 # trap 'Broken pipe' if we're allowed to automatically reconnect
481 if v
[0] != 32 or not self
.auto_open
:
484 self
._send
_request
(method
, url
, body
, headers
)
486 def _send_request(self
, method
, url
, body
, headers
):
487 self
.putrequest(method
, url
)
490 self
.putheader('Content-Length', str(len(body
)))
491 for hdr
, value
in headers
.items():
492 self
.putheader(hdr
, value
)
498 def getresponse(self
):
499 "Get the response from the server."
501 # check if a prior response has been completed
502 if self
.__response
and self
.__response
.isclosed():
503 self
.__response
= None
506 # if a prior response exists, then it must be completed (otherwise, we
507 # cannot read this response's header to determine the connection-close
510 # note: if a prior response existed, but was connection-close, then the
511 # socket and response were made independent of this HTTPConnection
512 # object since a new request requires that we open a whole new
515 # this means the prior response had one of two states:
516 # 1) will_close: this connection was reset and the prior socket and
517 # response operate independently
518 # 2) persistent: the response was retained and we await its
519 # isclosed() status to become true.
521 if self
.__state
!= _CS_REQ_SENT
or self
.__response
:
522 raise ResponseNotReady()
524 response
= self
.response_class(self
.sock
)
527 self
.__state
= _CS_IDLE
529 if response
.will_close
:
530 # this effectively passes the connection to the response
533 # remember this, so we can tell when it is complete
534 self
.__response
= response
540 def __init__(self
, sock
, ssl
):
544 def makefile(self
, mode
): # hopefully, never have to write
545 if mode
!= 'r' and mode
!= 'rb':
546 raise UnimplementedFileMode()
551 msgbuf
= msgbuf
+ self
.__ssl
.read()
552 except socket
.sslerror
, msg
:
554 return StringIO(msgbuf
)
556 def send(self
, stuff
, flags
= 0):
557 return self
.__ssl
.write(stuff
)
559 def recv(self
, len = 1024, flags
= 0):
560 return self
.__ssl
.read(len)
562 def __getattr__(self
, attr
):
563 return getattr(self
.__sock
, attr
)
566 class HTTPSConnection(HTTPConnection
):
567 "This class allows communication via SSL."
569 default_port
= HTTPS_PORT
571 def __init__(self
, host
, port
=None, **x509
):
574 keys
.remove('key_file')
578 keys
.remove('cert_file')
582 raise IllegalKeywordArgument()
583 HTTPConnection
.__init
__(self
, host
, port
)
584 self
.key_file
= x509
.get('key_file')
585 self
.cert_file
= x509
.get('cert_file')
588 "Connect to a host on a given (SSL) port."
590 sock
= socket
.socket(socket
.AF_INET
, socket
.SOCK_STREAM
)
591 sock
.connect((self
.host
, self
.port
))
592 ssl
= socket
.ssl(sock
, self
.key_file
, self
.cert_file
)
593 self
.sock
= FakeSocket(sock
, ssl
)
597 "Compatibility class with httplib.py from 1.5."
600 _http_vsn_str
= 'HTTP/1.0'
604 _connection_class
= HTTPConnection
606 def __init__(self
, host
='', port
=None, **x509
):
607 "Provide a default host, since the superclass requires one."
609 # some joker passed 0 explicitly, meaning default port
613 # Note that we may pass an empty string as the host; this will throw
614 # an error when we attempt to connect. Presumably, the client code
615 # will call connect before then, with a proper host.
616 self
._conn
= self
._connection
_class
(host
, port
)
617 # set up delegation to flesh out interface
618 self
.send
= self
._conn
.send
619 self
.putrequest
= self
._conn
.putrequest
620 self
.endheaders
= self
._conn
.endheaders
622 # we never actually use these for anything, but we keep them here for
623 # compatibility with post-1.5.2 CVS.
624 self
.key_file
= x509
.get('key_file')
625 self
.cert_file
= x509
.get('cert_file')
629 def connect(self
, host
=None, port
=None):
630 "Accept arguments to set the host/port, since the superclass doesn't."
633 self
._conn
._set
_hostport
(host
, port
)
636 def set_debuglevel(self
, debuglevel
):
637 "The class no longer supports the debuglevel."
641 "Provide a getfile, since the superclass' does not use this concept."
644 def putheader(self
, header
, *values
):
645 "The superclass allows only one value argument."
646 self
._conn
.putheader(header
,
647 string
.joinfields(values
, '\r\n\t'))
650 """Compat definition since superclass does not define it.
652 Returns a tuple consisting of:
653 - server status code (e.g. '200' if all goes well)
654 - server "reason" corresponding to status code
655 - any RFC822 headers in the response from the server
658 response
= self
._conn
.getresponse()
659 except BadStatusLine
, e
:
660 ### hmm. if getresponse() ever closes the socket on a bad request,
661 ### then we are going to have problems with self.sock
663 ### should we keep this behavior? do people use it?
664 # keep the socket open (as a file), and return it
665 self
.file = self
._conn
.sock
.makefile('rb', 0)
667 # close our socket -- we want to restart after any protocol error
671 return -1, e
.line
, None
673 self
.headers
= response
.msg
674 self
.file = response
.fp
675 return response
.status
, response
.reason
, response
.msg
680 # note that self.file == response.fp, which gets closed by the
681 # superclass. just clear the object ref here.
682 ### hmm. messy. if status==-1, then self.file is owned by us.
683 ### well... we aren't explicitly closing, but losing this ref will
687 if hasattr(socket
, 'ssl'):
689 """Compatibility with 1.5 httplib interface
691 Python 1.5.2 did not have an HTTPS class, but it defined an
692 interface for sending http requests that is also useful for
696 _connection_class
= HTTPSConnection
699 class HTTPException(Exception):
702 class NotConnected(HTTPException
):
705 class UnknownProtocol(HTTPException
):
706 def __init__(self
, version
):
707 self
.version
= version
709 class UnknownTransferEncoding(HTTPException
):
712 class IllegalKeywordArgument(HTTPException
):
715 class UnimplementedFileMode(HTTPException
):
718 class IncompleteRead(HTTPException
):
719 def __init__(self
, partial
):
720 self
.partial
= partial
722 class ImproperConnectionState(HTTPException
):
725 class CannotSendRequest(ImproperConnectionState
):
728 class CannotSendHeader(ImproperConnectionState
):
731 class ResponseNotReady(ImproperConnectionState
):
734 class BadStatusLine(HTTPException
):
735 def __init__(self
, line
):
738 # for backwards compatibility
739 error
= HTTPException
743 # snarfed from httplib.py for now...
748 The test consists of retrieving and displaying the Python
749 home page, along with the error code and error string returned
750 by the www.python.org server.
755 opts
, args
= getopt
.getopt(sys
.argv
[1:], 'd')
758 if o
== '-d': dl
= dl
+ 1
759 host
= 'www.python.org'
761 if args
[0:]: host
= args
[0]
762 if args
[1:]: selector
= args
[1]
766 h
.putrequest('GET', selector
)
768 status
, reason
, headers
= h
.getreply()
769 print 'status =', status
770 print 'reason =', reason
773 for header
in headers
.headers
: print string
.strip(header
)
775 print h
.getfile().read()
777 if hasattr(socket
, 'ssl'):
778 host
= 'sourceforge.net'
781 hs
.putrequest('GET', selector
)
783 status
, reason
, headers
= hs
.getreply()
784 print 'status =', status
785 print 'reason =', reason
788 for header
in headers
.headers
: print string
.strip(header
)
790 print hs
.getfile().read()
793 if __name__
== '__main__':