Bump version to 0.9.1.
[python/dscho.git] / Lib / httplib.py
blob395ffbf978681839dffceace2f0c30ab762b8163
1 """HTTP/1.1 client library
3 <intro stuff goes here>
4 <other stuff, too>
6 HTTPConnection go through a number of "states", which defines when a client
7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions:
10 (null)
12 | HTTPConnection()
14 Idle
16 | putrequest()
18 Request-started
20 | ( putheader() )* endheaders()
22 Request-sent
24 | response = getresponse()
26 Unread-response [Response-headers-read]
27 |\____________________
28 | \
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 _______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
38 | response.read()
40 Request-sent
42 This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
48 Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
59 Logical State __state __response
60 ------------- ------- ----------
61 Idle _CS_IDLE None
62 Request-started _CS_REQ_STARTED None
63 Request-sent _CS_REQ_SENT None
64 Unread-response _CS_IDLE <response_class>
65 Req-started-unread-response _CS_REQ_STARTED <response_class>
66 Req-sent-unread-response _CS_REQ_SENT <response_class>
67 """
69 import socket
70 import string
71 import mimetools
73 try:
74 from cStringIO import StringIO
75 except ImportError:
76 from StringIO import StringIO
78 HTTP_PORT = 80
79 HTTPS_PORT = 443
81 _UNKNOWN = 'UNKNOWN'
83 # connection states
84 _CS_IDLE = 'Idle'
85 _CS_REQ_STARTED = 'Request-started'
86 _CS_REQ_SENT = 'Request-sent'
89 class HTTPResponse:
90 def __init__(self, sock):
91 self.fp = sock.makefile('rb', 0)
93 self.msg = None
95 # from the Status-Line of the response
96 self.version = _UNKNOWN # HTTP-Version
97 self.status = _UNKNOWN # Status-Code
98 self.reason = _UNKNOWN # Reason-Phrase
100 self.chunked = _UNKNOWN # is "chunked" being used?
101 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
102 self.length = _UNKNOWN # number of bytes left in response
103 self.will_close = _UNKNOWN # conn will close at end of response
105 def begin(self):
106 if self.msg is not None:
107 # we've already started reading the response
108 return
110 line = self.fp.readline()
111 try:
112 [version, status, reason] = string.split(line, None, 2)
113 except ValueError:
114 try:
115 [version, status] = string.split(line, None, 1)
116 reason = ""
117 except ValueError:
118 self.close()
119 raise BadStatusLine(line)
120 if version[:5] != 'HTTP/':
121 self.close()
122 raise BadStatusLine(line)
124 self.status = status = int(status)
125 self.reason = string.strip(reason)
127 if version == 'HTTP/1.0':
128 self.version = 10
129 elif version[:7] == 'HTTP/1.':
130 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
131 else:
132 raise UnknownProtocol(version)
134 self.msg = mimetools.Message(self.fp, 0)
136 # don't let the msg keep an fp
137 self.msg.fp = None
139 # are we using the chunked-style of transfer encoding?
140 tr_enc = self.msg.getheader('transfer-encoding')
141 if tr_enc:
142 if string.lower(tr_enc) != 'chunked':
143 raise UnknownTransferEncoding()
144 self.chunked = 1
145 self.chunk_left = None
146 else:
147 self.chunked = 0
149 # will the connection close at the end of the response?
150 conn = self.msg.getheader('connection')
151 if conn:
152 conn = string.lower(conn)
153 # a "Connection: close" will always close the connection. if we
154 # don't see that and this is not HTTP/1.1, then the connection will
155 # close unless we see a Keep-Alive header.
156 self.will_close = string.find(conn, 'close') != -1 or \
157 ( self.version != 11 and \
158 not self.msg.getheader('keep-alive') )
159 else:
160 # for HTTP/1.1, the connection will always remain open
161 # otherwise, it will remain open IFF we see a Keep-Alive header
162 self.will_close = self.version != 11 and \
163 not self.msg.getheader('keep-alive')
165 # do we have a Content-Length?
166 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
167 length = self.msg.getheader('content-length')
168 if length and not self.chunked:
169 self.length = int(length)
170 else:
171 self.length = None
173 # does the body have a fixed length? (of zero)
174 if (status == 204 or # No Content
175 status == 304 or # Not Modified
176 100 <= status < 200): # 1xx codes
177 self.length = 0
179 # if the connection remains open, and we aren't using chunked, and
180 # a content-length was not provided, then assume that the connection
181 # WILL close.
182 if not self.will_close and \
183 not self.chunked and \
184 self.length is None:
185 self.will_close = 1
187 # if there is no body, then close NOW. read() may never be called, thus
188 # we will never mark self as closed.
189 if self.length == 0:
190 self.close()
192 def close(self):
193 if self.fp:
194 self.fp.close()
195 self.fp = None
197 def isclosed(self):
198 # NOTE: it is possible that we will not ever call self.close(). This
199 # case occurs when will_close is TRUE, length is None, and we
200 # read up to the last byte, but NOT past it.
202 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
203 # called, meaning self.isclosed() is meaningful.
204 return self.fp is None
206 def read(self, amt=None):
207 if self.fp is None:
208 return ''
210 if self.chunked:
211 chunk_left = self.chunk_left
212 value = ''
213 while 1:
214 if chunk_left is None:
215 line = self.fp.readline()
216 i = string.find(line, ';')
217 if i >= 0:
218 line = line[:i] # strip chunk-extensions
219 chunk_left = string.atoi(line, 16)
220 if chunk_left == 0:
221 break
222 if amt is None:
223 value = value + self._safe_read(chunk_left)
224 elif amt < chunk_left:
225 value = value + self._safe_read(amt)
226 self.chunk_left = chunk_left - amt
227 return value
228 elif amt == chunk_left:
229 value = value + self._safe_read(amt)
230 self._safe_read(2) # toss the CRLF at the end of the chunk
231 self.chunk_left = None
232 return value
233 else:
234 value = value + self._safe_read(chunk_left)
235 amt = amt - chunk_left
237 # we read the whole chunk, get another
238 self._safe_read(2) # toss the CRLF at the end of the chunk
239 chunk_left = None
241 # read and discard trailer up to the CRLF terminator
242 ### note: we shouldn't have any trailers!
243 while 1:
244 line = self.fp.readline()
245 if line == '\r\n':
246 break
248 # we read everything; close the "file"
249 self.close()
251 return value
253 elif amt is None:
254 # unbounded read
255 if self.will_close:
256 s = self.fp.read()
257 else:
258 s = self._safe_read(self.length)
259 self.close() # we read everything
260 return s
262 if self.length is not None:
263 if amt > self.length:
264 # clip the read to the "end of response"
265 amt = self.length
266 self.length = self.length - amt
268 # we do not use _safe_read() here because this may be a .will_close
269 # connection, and the user is reading more bytes than will be provided
270 # (for example, reading in 1k chunks)
271 s = self.fp.read(amt)
273 # close our "file" if we know we should
274 ### I'm not sure about the len(s) < amt part; we should be safe because
275 ### we shouldn't be using non-blocking sockets
276 if self.length == 0 or len(s) < amt:
277 self.close()
279 return s
281 def _safe_read(self, amt):
282 """Read the number of bytes requested, compensating for partial reads.
284 Normally, we have a blocking socket, but a read() can be interrupted
285 by a signal (resulting in a partial read).
287 Note that we cannot distinguish between EOF and an interrupt when zero
288 bytes have been read. IncompleteRead() will be raised in this
289 situation.
291 This function should be used when <amt> bytes "should" be present for
292 reading. If the bytes are truly not available (due to EOF), then the
293 IncompleteRead exception can be used to detect the problem.
295 s = ''
296 while amt > 0:
297 chunk = self.fp.read(amt)
298 if not chunk:
299 raise IncompleteRead(s)
300 s = s + chunk
301 amt = amt - len(chunk)
302 return s
304 def getheader(self, name, default=None):
305 if self.msg is None:
306 raise ResponseNotReady()
307 return self.msg.getheader(name, default)
310 class HTTPConnection:
312 _http_vsn = 11
313 _http_vsn_str = 'HTTP/1.1'
315 response_class = HTTPResponse
316 default_port = HTTP_PORT
317 auto_open = 1
319 def __init__(self, host, port=None):
320 self.sock = None
321 self.__response = None
322 self.__state = _CS_IDLE
324 self._set_hostport(host, port)
326 def _set_hostport(self, host, port):
327 if port is None:
328 i = string.find(host, ':')
329 if i >= 0:
330 port = int(host[i+1:])
331 host = host[:i]
332 else:
333 port = self.default_port
334 self.host = host
335 self.port = port
337 def connect(self):
338 """Connect to the host and port specified in __init__."""
339 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
340 self.sock.connect((self.host, self.port))
342 def close(self):
343 """Close the connection to the HTTP server."""
344 if self.sock:
345 self.sock.close() # close it manually... there may be other refs
346 self.sock = None
347 if self.__response:
348 self.__response.close()
349 self.__response = None
350 self.__state = _CS_IDLE
352 def send(self, str):
353 """Send `str' to the server."""
354 if self.sock is None:
355 if self.auto_open:
356 self.connect()
357 else:
358 raise NotConnected()
360 # send the data to the server. if we get a broken pipe, then close
361 # the socket. we want to reconnect when somebody tries to send again.
363 # NOTE: we DO propagate the error, though, because we cannot simply
364 # ignore the error... the caller will know if they can retry.
365 try:
366 self.sock.send(str)
367 except socket.error, v:
368 if v[0] == 32: # Broken pipe
369 self.close()
370 raise
372 def putrequest(self, method, url):
373 """Send a request to the server.
375 `method' specifies an HTTP request method, e.g. 'GET'.
376 `url' specifies the object being requested, e.g. '/index.html'.
379 # check if a prior response has been completed
380 if self.__response and self.__response.isclosed():
381 self.__response = None
384 # in certain cases, we cannot issue another request on this connection.
385 # this occurs when:
386 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
387 # 2) a response to a previous request has signalled that it is going
388 # to close the connection upon completion.
389 # 3) the headers for the previous response have not been read, thus
390 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
392 # if there is no prior response, then we can request at will.
394 # if point (2) is true, then we will have passed the socket to the
395 # response (effectively meaning, "there is no prior response"), and
396 # will open a new one when a new request is made.
398 # Note: if a prior response exists, then we *can* start a new request.
399 # We are not allowed to begin fetching the response to this new
400 # request, however, until that prior response is complete.
402 if self.__state == _CS_IDLE:
403 self.__state = _CS_REQ_STARTED
404 else:
405 raise CannotSendRequest()
407 if not url:
408 url = '/'
409 str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
411 try:
412 self.send(str)
413 except socket.error, v:
414 # trap 'Broken pipe' if we're allowed to automatically reconnect
415 if v[0] != 32 or not self.auto_open:
416 raise
417 # try one more time (the socket was closed; this will reopen)
418 self.send(str)
420 if self._http_vsn == 11:
421 # Issue some standard headers for better HTTP/1.1 compliance
423 # this header is issued *only* for HTTP/1.1 connections. more
424 # specifically, this means it is only issued when the client uses
425 # the new HTTPConnection() class. backwards-compat clients will
426 # be using HTTP/1.0 and those clients may be issuing this header
427 # themselves. we should NOT issue it twice; some web servers (such
428 # as Apache) barf when they see two Host: headers
429 self.putheader('Host', self.host)
431 # note: we are assuming that clients will not attempt to set these
432 # headers since *this* library must deal with the
433 # consequences. this also means that when the supporting
434 # libraries are updated to recognize other forms, then this
435 # code should be changed (removed or updated).
437 # we only want a Content-Encoding of "identity" since we don't
438 # support encodings such as x-gzip or x-deflate.
439 self.putheader('Accept-Encoding', 'identity')
441 # we can accept "chunked" Transfer-Encodings, but no others
442 # NOTE: no TE header implies *only* "chunked"
443 #self.putheader('TE', 'chunked')
445 # if TE is supplied in the header, then it must appear in a
446 # Connection header.
447 #self.putheader('Connection', 'TE')
449 else:
450 # For HTTP/1.0, the server will assume "not chunked"
451 pass
453 def putheader(self, header, value):
454 """Send a request header line to the server.
456 For example: h.putheader('Accept', 'text/html')
458 if self.__state != _CS_REQ_STARTED:
459 raise CannotSendHeader()
461 str = '%s: %s\r\n' % (header, value)
462 self.send(str)
464 def endheaders(self):
465 """Indicate that the last header line has been sent to the server."""
467 if self.__state == _CS_REQ_STARTED:
468 self.__state = _CS_REQ_SENT
469 else:
470 raise CannotSendHeader()
472 self.send('\r\n')
474 def request(self, method, url, body=None, headers={}):
475 """Send a complete request to the server."""
477 try:
478 self._send_request(method, url, body, headers)
479 except socket.error, v:
480 # trap 'Broken pipe' if we're allowed to automatically reconnect
481 if v[0] != 32 or not self.auto_open:
482 raise
483 # try one more time
484 self._send_request(method, url, body, headers)
486 def _send_request(self, method, url, body, headers):
487 self.putrequest(method, url)
489 if body:
490 self.putheader('Content-Length', str(len(body)))
491 for hdr, value in headers.items():
492 self.putheader(hdr, value)
493 self.endheaders()
495 if body:
496 self.send(body)
498 def getresponse(self):
499 "Get the response from the server."
501 # check if a prior response has been completed
502 if self.__response and self.__response.isclosed():
503 self.__response = None
506 # if a prior response exists, then it must be completed (otherwise, we
507 # cannot read this response's header to determine the connection-close
508 # behavior)
510 # note: if a prior response existed, but was connection-close, then the
511 # socket and response were made independent of this HTTPConnection
512 # object since a new request requires that we open a whole new
513 # connection
515 # this means the prior response had one of two states:
516 # 1) will_close: this connection was reset and the prior socket and
517 # response operate independently
518 # 2) persistent: the response was retained and we await its
519 # isclosed() status to become true.
521 if self.__state != _CS_REQ_SENT or self.__response:
522 raise ResponseNotReady()
524 response = self.response_class(self.sock)
526 response.begin()
527 self.__state = _CS_IDLE
529 if response.will_close:
530 # this effectively passes the connection to the response
531 self.close()
532 else:
533 # remember this, so we can tell when it is complete
534 self.__response = response
536 return response
539 class FakeSocket:
540 def __init__(self, sock, ssl):
541 self.__sock = sock
542 self.__ssl = ssl
544 def makefile(self, mode): # hopefully, never have to write
545 if mode != 'r' and mode != 'rb':
546 raise UnimplementedFileMode()
548 msgbuf = ""
549 while 1:
550 try:
551 msgbuf = msgbuf + self.__ssl.read()
552 except socket.sslerror, msg:
553 break
554 return StringIO(msgbuf)
556 def send(self, stuff, flags = 0):
557 return self.__ssl.write(stuff)
559 def recv(self, len = 1024, flags = 0):
560 return self.__ssl.read(len)
562 def __getattr__(self, attr):
563 return getattr(self.__sock, attr)
566 class HTTPSConnection(HTTPConnection):
567 "This class allows communication via SSL."
569 default_port = HTTPS_PORT
571 def __init__(self, host, port=None, **x509):
572 keys = x509.keys()
573 try:
574 keys.remove('key_file')
575 except ValueError:
576 pass
577 try:
578 keys.remove('cert_file')
579 except ValueError:
580 pass
581 if keys:
582 raise IllegalKeywordArgument()
583 HTTPConnection.__init__(self, host, port)
584 self.key_file = x509.get('key_file')
585 self.cert_file = x509.get('cert_file')
587 def connect(self):
588 "Connect to a host on a given (SSL) port."
590 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
591 sock.connect((self.host, self.port))
592 ssl = socket.ssl(sock, self.key_file, self.cert_file)
593 self.sock = FakeSocket(sock, ssl)
596 class HTTP:
597 "Compatibility class with httplib.py from 1.5."
599 _http_vsn = 10
600 _http_vsn_str = 'HTTP/1.0'
602 debuglevel = 0
604 _connection_class = HTTPConnection
606 def __init__(self, host='', port=None, **x509):
607 "Provide a default host, since the superclass requires one."
609 # some joker passed 0 explicitly, meaning default port
610 if port == 0:
611 port = None
613 # Note that we may pass an empty string as the host; this will throw
614 # an error when we attempt to connect. Presumably, the client code
615 # will call connect before then, with a proper host.
616 self._conn = self._connection_class(host, port)
617 # set up delegation to flesh out interface
618 self.send = self._conn.send
619 self.putrequest = self._conn.putrequest
620 self.endheaders = self._conn.endheaders
622 # we never actually use these for anything, but we keep them here for
623 # compatibility with post-1.5.2 CVS.
624 self.key_file = x509.get('key_file')
625 self.cert_file = x509.get('cert_file')
627 self.file = None
629 def connect(self, host=None, port=None):
630 "Accept arguments to set the host/port, since the superclass doesn't."
632 if host is not None:
633 self._conn._set_hostport(host, port)
634 self._conn.connect()
636 def set_debuglevel(self, debuglevel):
637 "The class no longer supports the debuglevel."
638 pass
640 def getfile(self):
641 "Provide a getfile, since the superclass' does not use this concept."
642 return self.file
644 def putheader(self, header, *values):
645 "The superclass allows only one value argument."
646 self._conn.putheader(header,
647 string.joinfields(values, '\r\n\t'))
649 def getreply(self):
650 """Compat definition since superclass does not define it.
652 Returns a tuple consisting of:
653 - server status code (e.g. '200' if all goes well)
654 - server "reason" corresponding to status code
655 - any RFC822 headers in the response from the server
657 try:
658 response = self._conn.getresponse()
659 except BadStatusLine, e:
660 ### hmm. if getresponse() ever closes the socket on a bad request,
661 ### then we are going to have problems with self.sock
663 ### should we keep this behavior? do people use it?
664 # keep the socket open (as a file), and return it
665 self.file = self._conn.sock.makefile('rb', 0)
667 # close our socket -- we want to restart after any protocol error
668 self.close()
670 self.headers = None
671 return -1, e.line, None
673 self.headers = response.msg
674 self.file = response.fp
675 return response.status, response.reason, response.msg
677 def close(self):
678 self._conn.close()
680 # note that self.file == response.fp, which gets closed by the
681 # superclass. just clear the object ref here.
682 ### hmm. messy. if status==-1, then self.file is owned by us.
683 ### well... we aren't explicitly closing, but losing this ref will
684 ### do it
685 self.file = None
687 if hasattr(socket, 'ssl'):
688 class HTTPS(HTTP):
689 """Compatibility with 1.5 httplib interface
691 Python 1.5.2 did not have an HTTPS class, but it defined an
692 interface for sending http requests that is also useful for
693 https.
696 _connection_class = HTTPSConnection
699 class HTTPException(Exception):
700 pass
702 class NotConnected(HTTPException):
703 pass
705 class UnknownProtocol(HTTPException):
706 def __init__(self, version):
707 self.version = version
709 class UnknownTransferEncoding(HTTPException):
710 pass
712 class IllegalKeywordArgument(HTTPException):
713 pass
715 class UnimplementedFileMode(HTTPException):
716 pass
718 class IncompleteRead(HTTPException):
719 def __init__(self, partial):
720 self.partial = partial
722 class ImproperConnectionState(HTTPException):
723 pass
725 class CannotSendRequest(ImproperConnectionState):
726 pass
728 class CannotSendHeader(ImproperConnectionState):
729 pass
731 class ResponseNotReady(ImproperConnectionState):
732 pass
734 class BadStatusLine(HTTPException):
735 def __init__(self, line):
736 self.line = line
738 # for backwards compatibility
739 error = HTTPException
743 # snarfed from httplib.py for now...
745 def test():
746 """Test this module.
748 The test consists of retrieving and displaying the Python
749 home page, along with the error code and error string returned
750 by the www.python.org server.
753 import sys
754 import getopt
755 opts, args = getopt.getopt(sys.argv[1:], 'd')
756 dl = 0
757 for o, a in opts:
758 if o == '-d': dl = dl + 1
759 host = 'www.python.org'
760 selector = '/'
761 if args[0:]: host = args[0]
762 if args[1:]: selector = args[1]
763 h = HTTP()
764 h.set_debuglevel(dl)
765 h.connect(host)
766 h.putrequest('GET', selector)
767 h.endheaders()
768 status, reason, headers = h.getreply()
769 print 'status =', status
770 print 'reason =', reason
771 print
772 if headers:
773 for header in headers.headers: print string.strip(header)
774 print
775 print h.getfile().read()
777 if hasattr(socket, 'ssl'):
778 host = 'sourceforge.net'
779 hs = HTTPS()
780 hs.connect(host)
781 hs.putrequest('GET', selector)
782 hs.endheaders()
783 status, reason, headers = hs.getreply()
784 print 'status =', status
785 print 'reason =', reason
786 print
787 if headers:
788 for header in headers.headers: print string.strip(header)
789 print
790 print hs.getfile().read()
793 if __name__ == '__main__':
794 test()