This commit was manufactured by cvs2svn to create tag 'r201'.
[python/dscho.git] / Lib / httplib.py
blob2b32a1bb57ef67256818cf3f794bfe7f4521fcca
1 """HTTP/1.1 client library
3 <intro stuff goes here>
4 <other stuff, too>
6 HTTPConnection go through a number of "states", which defines when a client
7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions:
10 (null)
12 | HTTPConnection()
14 Idle
16 | putrequest()
18 Request-started
20 | ( putheader() )* endheaders()
22 Request-sent
24 | response = getresponse()
26 Unread-response [Response-headers-read]
27 |\____________________
28 | \
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 _______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
38 | response.read()
40 Request-sent
42 This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
48 Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
59 Logical State __state __response
60 ------------- ------- ----------
61 Idle _CS_IDLE None
62 Request-started _CS_REQ_STARTED None
63 Request-sent _CS_REQ_SENT None
64 Unread-response _CS_IDLE <response_class>
65 Req-started-unread-response _CS_REQ_STARTED <response_class>
66 Req-sent-unread-response _CS_REQ_SENT <response_class>
67 """
69 import socket
70 import string
71 import mimetools
73 try:
74 from cStringIO import StringIO
75 except ImportError:
76 from StringIO import StringIO
78 HTTP_PORT = 80
79 HTTPS_PORT = 443
81 _UNKNOWN = 'UNKNOWN'
83 # connection states
84 _CS_IDLE = 'Idle'
85 _CS_REQ_STARTED = 'Request-started'
86 _CS_REQ_SENT = 'Request-sent'
89 class HTTPResponse:
90 def __init__(self, sock, debuglevel=0):
91 self.fp = sock.makefile('rb', 0)
92 self.debuglevel = debuglevel
94 self.msg = None
96 # from the Status-Line of the response
97 self.version = _UNKNOWN # HTTP-Version
98 self.status = _UNKNOWN # Status-Code
99 self.reason = _UNKNOWN # Reason-Phrase
101 self.chunked = _UNKNOWN # is "chunked" being used?
102 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
103 self.length = _UNKNOWN # number of bytes left in response
104 self.will_close = _UNKNOWN # conn will close at end of response
106 def begin(self):
107 if self.msg is not None:
108 # we've already started reading the response
109 return
111 line = self.fp.readline()
112 if self.debuglevel > 0:
113 print "reply:", repr(line)
114 try:
115 [version, status, reason] = string.split(line, None, 2)
116 except ValueError:
117 try:
118 [version, status] = string.split(line, None, 1)
119 reason = ""
120 except ValueError:
121 version = "HTTP/0.9"
122 status = "200"
123 reason = ""
124 if version[:5] != 'HTTP/':
125 self.close()
126 raise BadStatusLine(line)
128 self.status = status = int(status)
129 self.reason = string.strip(reason)
131 if version == 'HTTP/1.0':
132 self.version = 10
133 elif version.startswith('HTTP/1.'):
134 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
135 elif version == 'HTTP/0.9':
136 self.version = 9
137 else:
138 raise UnknownProtocol(version)
140 if self.version == 9:
141 self.msg = mimetools.Message(StringIO())
142 return
144 self.msg = mimetools.Message(self.fp, 0)
145 if self.debuglevel > 0:
146 for hdr in self.msg.headers:
147 print "header:", hdr,
149 # don't let the msg keep an fp
150 self.msg.fp = None
152 # are we using the chunked-style of transfer encoding?
153 tr_enc = self.msg.getheader('transfer-encoding')
154 if tr_enc:
155 if string.lower(tr_enc) != 'chunked':
156 raise UnknownTransferEncoding()
157 self.chunked = 1
158 self.chunk_left = None
159 else:
160 self.chunked = 0
162 # will the connection close at the end of the response?
163 conn = self.msg.getheader('connection')
164 if conn:
165 conn = string.lower(conn)
166 # a "Connection: close" will always close the connection. if we
167 # don't see that and this is not HTTP/1.1, then the connection will
168 # close unless we see a Keep-Alive header.
169 self.will_close = string.find(conn, 'close') != -1 or \
170 ( self.version != 11 and \
171 not self.msg.getheader('keep-alive') )
172 else:
173 # for HTTP/1.1, the connection will always remain open
174 # otherwise, it will remain open IFF we see a Keep-Alive header
175 self.will_close = self.version != 11 and \
176 not self.msg.getheader('keep-alive')
178 # do we have a Content-Length?
179 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
180 length = self.msg.getheader('content-length')
181 if length and not self.chunked:
182 try:
183 self.length = int(length)
184 except ValueError:
185 self.length = None
186 else:
187 self.length = None
189 # does the body have a fixed length? (of zero)
190 if (status == 204 or # No Content
191 status == 304 or # Not Modified
192 100 <= status < 200): # 1xx codes
193 self.length = 0
195 # if the connection remains open, and we aren't using chunked, and
196 # a content-length was not provided, then assume that the connection
197 # WILL close.
198 if not self.will_close and \
199 not self.chunked and \
200 self.length is None:
201 self.will_close = 1
203 def close(self):
204 if self.fp:
205 self.fp.close()
206 self.fp = None
208 def isclosed(self):
209 # NOTE: it is possible that we will not ever call self.close(). This
210 # case occurs when will_close is TRUE, length is None, and we
211 # read up to the last byte, but NOT past it.
213 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
214 # called, meaning self.isclosed() is meaningful.
215 return self.fp is None
217 def read(self, amt=None):
218 if self.fp is None:
219 return ''
221 if self.chunked:
222 chunk_left = self.chunk_left
223 value = ''
224 while 1:
225 if chunk_left is None:
226 line = self.fp.readline()
227 i = string.find(line, ';')
228 if i >= 0:
229 line = line[:i] # strip chunk-extensions
230 chunk_left = string.atoi(line, 16)
231 if chunk_left == 0:
232 break
233 if amt is None:
234 value = value + self._safe_read(chunk_left)
235 elif amt < chunk_left:
236 value = value + self._safe_read(amt)
237 self.chunk_left = chunk_left - amt
238 return value
239 elif amt == chunk_left:
240 value = value + self._safe_read(amt)
241 self._safe_read(2) # toss the CRLF at the end of the chunk
242 self.chunk_left = None
243 return value
244 else:
245 value = value + self._safe_read(chunk_left)
246 amt = amt - chunk_left
248 # we read the whole chunk, get another
249 self._safe_read(2) # toss the CRLF at the end of the chunk
250 chunk_left = None
252 # read and discard trailer up to the CRLF terminator
253 ### note: we shouldn't have any trailers!
254 while 1:
255 line = self.fp.readline()
256 if line == '\r\n':
257 break
259 # we read everything; close the "file"
260 self.close()
262 return value
264 elif amt is None:
265 # unbounded read
266 if self.will_close:
267 s = self.fp.read()
268 else:
269 s = self._safe_read(self.length)
270 self.close() # we read everything
271 return s
273 if self.length is not None:
274 if amt > self.length:
275 # clip the read to the "end of response"
276 amt = self.length
277 self.length = self.length - amt
279 # we do not use _safe_read() here because this may be a .will_close
280 # connection, and the user is reading more bytes than will be provided
281 # (for example, reading in 1k chunks)
282 s = self.fp.read(amt)
284 return s
286 def _safe_read(self, amt):
287 """Read the number of bytes requested, compensating for partial reads.
289 Normally, we have a blocking socket, but a read() can be interrupted
290 by a signal (resulting in a partial read).
292 Note that we cannot distinguish between EOF and an interrupt when zero
293 bytes have been read. IncompleteRead() will be raised in this
294 situation.
296 This function should be used when <amt> bytes "should" be present for
297 reading. If the bytes are truly not available (due to EOF), then the
298 IncompleteRead exception can be used to detect the problem.
300 s = ''
301 while amt > 0:
302 chunk = self.fp.read(amt)
303 if not chunk:
304 raise IncompleteRead(s)
305 s = s + chunk
306 amt = amt - len(chunk)
307 return s
309 def getheader(self, name, default=None):
310 if self.msg is None:
311 raise ResponseNotReady()
312 return self.msg.getheader(name, default)
315 class HTTPConnection:
317 _http_vsn = 11
318 _http_vsn_str = 'HTTP/1.1'
320 response_class = HTTPResponse
321 default_port = HTTP_PORT
322 auto_open = 1
323 debuglevel = 0
325 def __init__(self, host, port=None):
326 self.sock = None
327 self.__response = None
328 self.__state = _CS_IDLE
330 self._set_hostport(host, port)
332 def _set_hostport(self, host, port):
333 if port is None:
334 i = string.find(host, ':')
335 if i >= 0:
336 port = int(host[i+1:])
337 host = host[:i]
338 else:
339 port = self.default_port
340 self.host = host
341 self.port = port
343 def set_debuglevel(self, level):
344 self.debuglevel = level
346 def connect(self):
347 """Connect to the host and port specified in __init__."""
348 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
349 if self.debuglevel > 0:
350 print "connect: (%s, %s)" % (self.host, self.port)
351 self.sock.connect((self.host, self.port))
353 def close(self):
354 """Close the connection to the HTTP server."""
355 if self.sock:
356 self.sock.close() # close it manually... there may be other refs
357 self.sock = None
358 if self.__response:
359 self.__response.close()
360 self.__response = None
361 self.__state = _CS_IDLE
363 def send(self, str):
364 """Send `str' to the server."""
365 if self.sock is None:
366 if self.auto_open:
367 self.connect()
368 else:
369 raise NotConnected()
371 # send the data to the server. if we get a broken pipe, then close
372 # the socket. we want to reconnect when somebody tries to send again.
374 # NOTE: we DO propagate the error, though, because we cannot simply
375 # ignore the error... the caller will know if they can retry.
376 if self.debuglevel > 0:
377 print "send:", repr(str)
378 try:
379 self.sock.send(str)
380 except socket.error, v:
381 if v[0] == 32: # Broken pipe
382 self.close()
383 raise
385 def putrequest(self, method, url):
386 """Send a request to the server.
388 `method' specifies an HTTP request method, e.g. 'GET'.
389 `url' specifies the object being requested, e.g. '/index.html'.
392 # check if a prior response has been completed
393 if self.__response and self.__response.isclosed():
394 self.__response = None
397 # in certain cases, we cannot issue another request on this connection.
398 # this occurs when:
399 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
400 # 2) a response to a previous request has signalled that it is going
401 # to close the connection upon completion.
402 # 3) the headers for the previous response have not been read, thus
403 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
405 # if there is no prior response, then we can request at will.
407 # if point (2) is true, then we will have passed the socket to the
408 # response (effectively meaning, "there is no prior response"), and
409 # will open a new one when a new request is made.
411 # Note: if a prior response exists, then we *can* start a new request.
412 # We are not allowed to begin fetching the response to this new
413 # request, however, until that prior response is complete.
415 if self.__state == _CS_IDLE:
416 self.__state = _CS_REQ_STARTED
417 else:
418 raise CannotSendRequest()
420 if not url:
421 url = '/'
422 str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
424 try:
425 self.send(str)
426 except socket.error, v:
427 # trap 'Broken pipe' if we're allowed to automatically reconnect
428 if v[0] != 32 or not self.auto_open:
429 raise
430 # try one more time (the socket was closed; this will reopen)
431 self.send(str)
433 if self._http_vsn == 11:
434 # Issue some standard headers for better HTTP/1.1 compliance
436 # this header is issued *only* for HTTP/1.1 connections. more
437 # specifically, this means it is only issued when the client uses
438 # the new HTTPConnection() class. backwards-compat clients will
439 # be using HTTP/1.0 and those clients may be issuing this header
440 # themselves. we should NOT issue it twice; some web servers (such
441 # as Apache) barf when they see two Host: headers
442 self.putheader('Host', self.host)
444 # note: we are assuming that clients will not attempt to set these
445 # headers since *this* library must deal with the
446 # consequences. this also means that when the supporting
447 # libraries are updated to recognize other forms, then this
448 # code should be changed (removed or updated).
450 # we only want a Content-Encoding of "identity" since we don't
451 # support encodings such as x-gzip or x-deflate.
452 self.putheader('Accept-Encoding', 'identity')
454 # we can accept "chunked" Transfer-Encodings, but no others
455 # NOTE: no TE header implies *only* "chunked"
456 #self.putheader('TE', 'chunked')
458 # if TE is supplied in the header, then it must appear in a
459 # Connection header.
460 #self.putheader('Connection', 'TE')
462 else:
463 # For HTTP/1.0, the server will assume "not chunked"
464 pass
466 def putheader(self, header, value):
467 """Send a request header line to the server.
469 For example: h.putheader('Accept', 'text/html')
471 if self.__state != _CS_REQ_STARTED:
472 raise CannotSendHeader()
474 str = '%s: %s\r\n' % (header, value)
475 self.send(str)
477 def endheaders(self):
478 """Indicate that the last header line has been sent to the server."""
480 if self.__state == _CS_REQ_STARTED:
481 self.__state = _CS_REQ_SENT
482 else:
483 raise CannotSendHeader()
485 self.send('\r\n')
487 def request(self, method, url, body=None, headers={}):
488 """Send a complete request to the server."""
490 try:
491 self._send_request(method, url, body, headers)
492 except socket.error, v:
493 # trap 'Broken pipe' if we're allowed to automatically reconnect
494 if v[0] != 32 or not self.auto_open:
495 raise
496 # try one more time
497 self._send_request(method, url, body, headers)
499 def _send_request(self, method, url, body, headers):
500 self.putrequest(method, url)
502 if body:
503 self.putheader('Content-Length', str(len(body)))
504 for hdr, value in headers.items():
505 self.putheader(hdr, value)
506 self.endheaders()
508 if body:
509 self.send(body)
511 def getresponse(self):
512 "Get the response from the server."
514 # check if a prior response has been completed
515 if self.__response and self.__response.isclosed():
516 self.__response = None
519 # if a prior response exists, then it must be completed (otherwise, we
520 # cannot read this response's header to determine the connection-close
521 # behavior)
523 # note: if a prior response existed, but was connection-close, then the
524 # socket and response were made independent of this HTTPConnection
525 # object since a new request requires that we open a whole new
526 # connection
528 # this means the prior response had one of two states:
529 # 1) will_close: this connection was reset and the prior socket and
530 # response operate independently
531 # 2) persistent: the response was retained and we await its
532 # isclosed() status to become true.
534 if self.__state != _CS_REQ_SENT or self.__response:
535 raise ResponseNotReady()
537 if self.debuglevel > 0:
538 response = self.response_class(self.sock, self.debuglevel)
539 else:
540 response = self.response_class(self.sock)
542 response.begin()
543 self.__state = _CS_IDLE
545 if response.will_close:
546 # this effectively passes the connection to the response
547 self.close()
548 else:
549 # remember this, so we can tell when it is complete
550 self.__response = response
552 return response
555 class FakeSocket:
556 def __init__(self, sock, ssl):
557 self.__sock = sock
558 self.__ssl = ssl
560 def makefile(self, mode, bufsize=None):
561 """Return a readable file-like object with data from socket.
563 This method offers only partial support for the makefile
564 interface of a real socket. It only supports modes 'r' and
565 'rb' and the bufsize argument is ignored.
567 The returned object contains *all* of the file data
569 if mode != 'r' and mode != 'rb':
570 raise UnimplementedFileMode()
572 msgbuf = ""
573 while 1:
574 try:
575 msgbuf = msgbuf + self.__ssl.read()
576 except socket.sslerror, msg:
577 break
578 return StringIO(msgbuf)
580 def send(self, stuff, flags = 0):
581 return self.__ssl.write(stuff)
583 def recv(self, len = 1024, flags = 0):
584 return self.__ssl.read(len)
586 def __getattr__(self, attr):
587 return getattr(self.__sock, attr)
590 class HTTPSConnection(HTTPConnection):
591 "This class allows communication via SSL."
593 default_port = HTTPS_PORT
595 def __init__(self, host, port=None, **x509):
596 keys = x509.keys()
597 try:
598 keys.remove('key_file')
599 except ValueError:
600 pass
601 try:
602 keys.remove('cert_file')
603 except ValueError:
604 pass
605 if keys:
606 raise IllegalKeywordArgument()
607 HTTPConnection.__init__(self, host, port)
608 self.key_file = x509.get('key_file')
609 self.cert_file = x509.get('cert_file')
611 def connect(self):
612 "Connect to a host on a given (SSL) port."
614 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
615 sock.connect((self.host, self.port))
616 realsock = sock
617 if hasattr(sock, "_sock"):
618 realsock = sock._sock
619 ssl = socket.ssl(realsock, self.key_file, self.cert_file)
620 self.sock = FakeSocket(sock, ssl)
623 class HTTP:
624 "Compatibility class with httplib.py from 1.5."
626 _http_vsn = 10
627 _http_vsn_str = 'HTTP/1.0'
629 debuglevel = 0
631 _connection_class = HTTPConnection
633 def __init__(self, host='', port=None, **x509):
634 "Provide a default host, since the superclass requires one."
636 # some joker passed 0 explicitly, meaning default port
637 if port == 0:
638 port = None
640 # Note that we may pass an empty string as the host; this will throw
641 # an error when we attempt to connect. Presumably, the client code
642 # will call connect before then, with a proper host.
643 self._conn = self._connection_class(host, port)
644 # set up delegation to flesh out interface
645 self.send = self._conn.send
646 self.putrequest = self._conn.putrequest
647 self.endheaders = self._conn.endheaders
648 self._conn._http_vsn = self._http_vsn
649 self._conn._http_vsn_str = self._http_vsn_str
651 # we never actually use these for anything, but we keep them here for
652 # compatibility with post-1.5.2 CVS.
653 self.key_file = x509.get('key_file')
654 self.cert_file = x509.get('cert_file')
656 self.file = None
658 def connect(self, host=None, port=None):
659 "Accept arguments to set the host/port, since the superclass doesn't."
661 if host is not None:
662 self._conn._set_hostport(host, port)
663 self._conn.connect()
665 def set_debuglevel(self, debuglevel):
666 self._conn.set_debuglevel(debuglevel)
668 def getfile(self):
669 "Provide a getfile, since the superclass' does not use this concept."
670 return self.file
672 def putheader(self, header, *values):
673 "The superclass allows only one value argument."
674 self._conn.putheader(header,
675 string.joinfields(values, '\r\n\t'))
677 def getreply(self):
678 """Compat definition since superclass does not define it.
680 Returns a tuple consisting of:
681 - server status code (e.g. '200' if all goes well)
682 - server "reason" corresponding to status code
683 - any RFC822 headers in the response from the server
685 try:
686 response = self._conn.getresponse()
687 except BadStatusLine, e:
688 ### hmm. if getresponse() ever closes the socket on a bad request,
689 ### then we are going to have problems with self.sock
691 ### should we keep this behavior? do people use it?
692 # keep the socket open (as a file), and return it
693 self.file = self._conn.sock.makefile('rb', 0)
695 # close our socket -- we want to restart after any protocol error
696 self.close()
698 self.headers = None
699 return -1, e.line, None
701 self.headers = response.msg
702 self.file = response.fp
703 return response.status, response.reason, response.msg
705 def close(self):
706 self._conn.close()
708 # note that self.file == response.fp, which gets closed by the
709 # superclass. just clear the object ref here.
710 ### hmm. messy. if status==-1, then self.file is owned by us.
711 ### well... we aren't explicitly closing, but losing this ref will
712 ### do it
713 self.file = None
715 if hasattr(socket, 'ssl'):
716 class HTTPS(HTTP):
717 """Compatibility with 1.5 httplib interface
719 Python 1.5.2 did not have an HTTPS class, but it defined an
720 interface for sending http requests that is also useful for
721 https.
724 _connection_class = HTTPSConnection
727 class HTTPException(Exception):
728 pass
730 class NotConnected(HTTPException):
731 pass
733 class UnknownProtocol(HTTPException):
734 def __init__(self, version):
735 self.version = version
737 class UnknownTransferEncoding(HTTPException):
738 pass
740 class IllegalKeywordArgument(HTTPException):
741 pass
743 class UnimplementedFileMode(HTTPException):
744 pass
746 class IncompleteRead(HTTPException):
747 def __init__(self, partial):
748 self.partial = partial
750 class ImproperConnectionState(HTTPException):
751 pass
753 class CannotSendRequest(ImproperConnectionState):
754 pass
756 class CannotSendHeader(ImproperConnectionState):
757 pass
759 class ResponseNotReady(ImproperConnectionState):
760 pass
762 class BadStatusLine(HTTPException):
763 def __init__(self, line):
764 self.line = line
766 # for backwards compatibility
767 error = HTTPException
771 # snarfed from httplib.py for now...
773 def test():
774 """Test this module.
776 The test consists of retrieving and displaying the Python
777 home page, along with the error code and error string returned
778 by the www.python.org server.
781 import sys
782 import getopt
783 opts, args = getopt.getopt(sys.argv[1:], 'd')
784 dl = 0
785 for o, a in opts:
786 if o == '-d': dl = dl + 1
787 host = 'www.python.org'
788 selector = '/'
789 if args[0:]: host = args[0]
790 if args[1:]: selector = args[1]
791 h = HTTP()
792 h.set_debuglevel(dl)
793 h.connect(host)
794 h.putrequest('GET', selector)
795 h.endheaders()
796 status, reason, headers = h.getreply()
797 print 'status =', status
798 print 'reason =', reason
799 print
800 if headers:
801 for header in headers.headers: print string.strip(header)
802 print
803 print h.getfile().read()
805 if hasattr(socket, 'ssl'):
806 host = 'sourceforge.net'
807 hs = HTTPS()
808 hs.connect(host)
809 hs.putrequest('GET', selector)
810 hs.endheaders()
811 status, reason, headers = hs.getreply()
812 print 'status =', status
813 print 'reason =', reason
814 print
815 if headers:
816 for header in headers.headers: print string.strip(header)
817 print
818 print hs.getfile().read()
821 if __name__ == '__main__':
822 test()