This commit was manufactured by cvs2svn to create tag 'r211c1'.
[python/dscho.git] / Lib / httplib.py
blob1e08539e2fece0fd02324fbe35859f15f8ddc4fa
1 """HTTP/1.1 client library
3 <intro stuff goes here>
4 <other stuff, too>
6 HTTPConnection go through a number of "states", which defines when a client
7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions:
10 (null)
12 | HTTPConnection()
14 Idle
16 | putrequest()
18 Request-started
20 | ( putheader() )* endheaders()
22 Request-sent
24 | response = getresponse()
26 Unread-response [Response-headers-read]
27 |\____________________
28 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
38 | response.read()
40 Request-sent
42 This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
48 Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
59 Logical State __state __response
60 ------------- ------- ----------
61 Idle _CS_IDLE None
62 Request-started _CS_REQ_STARTED None
63 Request-sent _CS_REQ_SENT None
64 Unread-response _CS_IDLE <response_class>
65 Req-started-unread-response _CS_REQ_STARTED <response_class>
66 Req-sent-unread-response _CS_REQ_SENT <response_class>
67 """
69 import socket
70 import mimetools
72 try:
73 from cStringIO import StringIO
74 except ImportError:
75 from StringIO import StringIO
77 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
78 "HTTPException", "NotConnected", "UnknownProtocol",
79 "UnknownTransferEncoding", "IllegalKeywordArgument",
80 "UnimplementedFileMode", "IncompleteRead",
81 "ImproperConnectionState", "CannotSendRequest", "CannotSendHeader",
82 "ResponseNotReady", "BadStatusLine", "error"]
84 HTTP_PORT = 80
85 HTTPS_PORT = 443
87 _UNKNOWN = 'UNKNOWN'
89 # connection states
90 _CS_IDLE = 'Idle'
91 _CS_REQ_STARTED = 'Request-started'
92 _CS_REQ_SENT = 'Request-sent'
95 class HTTPResponse:
96 def __init__(self, sock, debuglevel=0):
97 self.fp = sock.makefile('rb', 0)
98 self.debuglevel = debuglevel
100 self.msg = None
102 # from the Status-Line of the response
103 self.version = _UNKNOWN # HTTP-Version
104 self.status = _UNKNOWN # Status-Code
105 self.reason = _UNKNOWN # Reason-Phrase
107 self.chunked = _UNKNOWN # is "chunked" being used?
108 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
109 self.length = _UNKNOWN # number of bytes left in response
110 self.will_close = _UNKNOWN # conn will close at end of response
112 def begin(self):
113 if self.msg is not None:
114 # we've already started reading the response
115 return
117 line = self.fp.readline()
118 if self.debuglevel > 0:
119 print "reply:", repr(line)
120 try:
121 [version, status, reason] = line.split(None, 2)
122 except ValueError:
123 try:
124 [version, status] = line.split(None, 1)
125 reason = ""
126 except ValueError:
127 version = "HTTP/0.9"
128 status = "200"
129 reason = ""
130 if version[:5] != 'HTTP/':
131 self.close()
132 raise BadStatusLine(line)
134 # The status code is a three-digit number
135 try:
136 self.status = status = int(status)
137 if status < 100 or status > 999:
138 raise BadStatusLine(line)
139 except ValueError:
140 raise BadStatusLine(line)
141 self.reason = reason.strip()
143 if version == 'HTTP/1.0':
144 self.version = 10
145 elif version.startswith('HTTP/1.'):
146 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
147 elif version == 'HTTP/0.9':
148 self.version = 9
149 else:
150 raise UnknownProtocol(version)
152 if self.version == 9:
153 self.msg = mimetools.Message(StringIO())
154 return
156 self.msg = mimetools.Message(self.fp, 0)
157 if self.debuglevel > 0:
158 for hdr in self.msg.headers:
159 print "header:", hdr,
161 # don't let the msg keep an fp
162 self.msg.fp = None
164 # are we using the chunked-style of transfer encoding?
165 tr_enc = self.msg.getheader('transfer-encoding')
166 if tr_enc:
167 if tr_enc.lower() != 'chunked':
168 raise UnknownTransferEncoding()
169 self.chunked = 1
170 self.chunk_left = None
171 else:
172 self.chunked = 0
174 # will the connection close at the end of the response?
175 conn = self.msg.getheader('connection')
176 if conn:
177 conn = conn.lower()
178 # a "Connection: close" will always close the connection. if we
179 # don't see that and this is not HTTP/1.1, then the connection will
180 # close unless we see a Keep-Alive header.
181 self.will_close = conn.find('close') != -1 or \
182 ( self.version != 11 and \
183 not self.msg.getheader('keep-alive') )
184 else:
185 # for HTTP/1.1, the connection will always remain open
186 # otherwise, it will remain open IFF we see a Keep-Alive header
187 self.will_close = self.version != 11 and \
188 not self.msg.getheader('keep-alive')
190 # do we have a Content-Length?
191 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
192 length = self.msg.getheader('content-length')
193 if length and not self.chunked:
194 try:
195 self.length = int(length)
196 except ValueError:
197 self.length = None
198 else:
199 self.length = None
201 # does the body have a fixed length? (of zero)
202 if (status == 204 or # No Content
203 status == 304 or # Not Modified
204 100 <= status < 200): # 1xx codes
205 self.length = 0
207 # if the connection remains open, and we aren't using chunked, and
208 # a content-length was not provided, then assume that the connection
209 # WILL close.
210 if not self.will_close and \
211 not self.chunked and \
212 self.length is None:
213 self.will_close = 1
215 def close(self):
216 if self.fp:
217 self.fp.close()
218 self.fp = None
220 def isclosed(self):
221 # NOTE: it is possible that we will not ever call self.close(). This
222 # case occurs when will_close is TRUE, length is None, and we
223 # read up to the last byte, but NOT past it.
225 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
226 # called, meaning self.isclosed() is meaningful.
227 return self.fp is None
229 def read(self, amt=None):
230 if self.fp is None:
231 return ''
233 if self.chunked:
234 chunk_left = self.chunk_left
235 value = ''
236 while 1:
237 if chunk_left is None:
238 line = self.fp.readline()
239 i = line.find(';')
240 if i >= 0:
241 line = line[:i] # strip chunk-extensions
242 chunk_left = int(line, 16)
243 if chunk_left == 0:
244 break
245 if amt is None:
246 value = value + self._safe_read(chunk_left)
247 elif amt < chunk_left:
248 value = value + self._safe_read(amt)
249 self.chunk_left = chunk_left - amt
250 return value
251 elif amt == chunk_left:
252 value = value + self._safe_read(amt)
253 self._safe_read(2) # toss the CRLF at the end of the chunk
254 self.chunk_left = None
255 return value
256 else:
257 value = value + self._safe_read(chunk_left)
258 amt = amt - chunk_left
260 # we read the whole chunk, get another
261 self._safe_read(2) # toss the CRLF at the end of the chunk
262 chunk_left = None
264 # read and discard trailer up to the CRLF terminator
265 ### note: we shouldn't have any trailers!
266 while 1:
267 line = self.fp.readline()
268 if line == '\r\n':
269 break
271 # we read everything; close the "file"
272 self.close()
274 return value
276 elif amt is None:
277 # unbounded read
278 if self.will_close:
279 s = self.fp.read()
280 else:
281 s = self._safe_read(self.length)
282 self.close() # we read everything
283 return s
285 if self.length is not None:
286 if amt > self.length:
287 # clip the read to the "end of response"
288 amt = self.length
289 self.length = self.length - amt
291 # we do not use _safe_read() here because this may be a .will_close
292 # connection, and the user is reading more bytes than will be provided
293 # (for example, reading in 1k chunks)
294 s = self.fp.read(amt)
296 return s
298 def _safe_read(self, amt):
299 """Read the number of bytes requested, compensating for partial reads.
301 Normally, we have a blocking socket, but a read() can be interrupted
302 by a signal (resulting in a partial read).
304 Note that we cannot distinguish between EOF and an interrupt when zero
305 bytes have been read. IncompleteRead() will be raised in this
306 situation.
308 This function should be used when <amt> bytes "should" be present for
309 reading. If the bytes are truly not available (due to EOF), then the
310 IncompleteRead exception can be used to detect the problem.
312 s = ''
313 while amt > 0:
314 chunk = self.fp.read(amt)
315 if not chunk:
316 raise IncompleteRead(s)
317 s = s + chunk
318 amt = amt - len(chunk)
319 return s
321 def getheader(self, name, default=None):
322 if self.msg is None:
323 raise ResponseNotReady()
324 return self.msg.getheader(name, default)
327 class HTTPConnection:
329 _http_vsn = 11
330 _http_vsn_str = 'HTTP/1.1'
332 response_class = HTTPResponse
333 default_port = HTTP_PORT
334 auto_open = 1
335 debuglevel = 0
337 def __init__(self, host, port=None):
338 self.sock = None
339 self.__response = None
340 self.__state = _CS_IDLE
342 self._set_hostport(host, port)
344 def _set_hostport(self, host, port):
345 if port is None:
346 i = host.find(':')
347 if i >= 0:
348 port = int(host[i+1:])
349 host = host[:i]
350 else:
351 port = self.default_port
352 self.host = host
353 self.port = port
355 def set_debuglevel(self, level):
356 self.debuglevel = level
358 def connect(self):
359 """Connect to the host and port specified in __init__."""
360 self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
361 if self.debuglevel > 0:
362 print "connect: (%s, %s)" % (self.host, self.port)
363 self.sock.connect((self.host, self.port))
365 def close(self):
366 """Close the connection to the HTTP server."""
367 if self.sock:
368 self.sock.close() # close it manually... there may be other refs
369 self.sock = None
370 if self.__response:
371 self.__response.close()
372 self.__response = None
373 self.__state = _CS_IDLE
375 def send(self, str):
376 """Send `str' to the server."""
377 if self.sock is None:
378 if self.auto_open:
379 self.connect()
380 else:
381 raise NotConnected()
383 # send the data to the server. if we get a broken pipe, then close
384 # the socket. we want to reconnect when somebody tries to send again.
386 # NOTE: we DO propagate the error, though, because we cannot simply
387 # ignore the error... the caller will know if they can retry.
388 if self.debuglevel > 0:
389 print "send:", repr(str)
390 try:
391 self.sock.send(str)
392 except socket.error, v:
393 if v[0] == 32: # Broken pipe
394 self.close()
395 raise
397 def putrequest(self, method, url):
398 """Send a request to the server.
400 `method' specifies an HTTP request method, e.g. 'GET'.
401 `url' specifies the object being requested, e.g. '/index.html'.
404 # check if a prior response has been completed
405 if self.__response and self.__response.isclosed():
406 self.__response = None
409 # in certain cases, we cannot issue another request on this connection.
410 # this occurs when:
411 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
412 # 2) a response to a previous request has signalled that it is going
413 # to close the connection upon completion.
414 # 3) the headers for the previous response have not been read, thus
415 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
417 # if there is no prior response, then we can request at will.
419 # if point (2) is true, then we will have passed the socket to the
420 # response (effectively meaning, "there is no prior response"), and
421 # will open a new one when a new request is made.
423 # Note: if a prior response exists, then we *can* start a new request.
424 # We are not allowed to begin fetching the response to this new
425 # request, however, until that prior response is complete.
427 if self.__state == _CS_IDLE:
428 self.__state = _CS_REQ_STARTED
429 else:
430 raise CannotSendRequest()
432 if not url:
433 url = '/'
434 str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
436 try:
437 self.send(str)
438 except socket.error, v:
439 # trap 'Broken pipe' if we're allowed to automatically reconnect
440 if v[0] != 32 or not self.auto_open:
441 raise
442 # try one more time (the socket was closed; this will reopen)
443 self.send(str)
445 if self._http_vsn == 11:
446 # Issue some standard headers for better HTTP/1.1 compliance
448 # this header is issued *only* for HTTP/1.1 connections. more
449 # specifically, this means it is only issued when the client uses
450 # the new HTTPConnection() class. backwards-compat clients will
451 # be using HTTP/1.0 and those clients may be issuing this header
452 # themselves. we should NOT issue it twice; some web servers (such
453 # as Apache) barf when they see two Host: headers
455 # if we need a non-standard port,include it in the header
456 if self.port == HTTP_PORT:
457 self.putheader('Host', self.host)
458 else:
459 self.putheader('Host', "%s:%s" % (self.host, self.port))
461 # note: we are assuming that clients will not attempt to set these
462 # headers since *this* library must deal with the
463 # consequences. this also means that when the supporting
464 # libraries are updated to recognize other forms, then this
465 # code should be changed (removed or updated).
467 # we only want a Content-Encoding of "identity" since we don't
468 # support encodings such as x-gzip or x-deflate.
469 self.putheader('Accept-Encoding', 'identity')
471 # we can accept "chunked" Transfer-Encodings, but no others
472 # NOTE: no TE header implies *only* "chunked"
473 #self.putheader('TE', 'chunked')
475 # if TE is supplied in the header, then it must appear in a
476 # Connection header.
477 #self.putheader('Connection', 'TE')
479 else:
480 # For HTTP/1.0, the server will assume "not chunked"
481 pass
483 def putheader(self, header, value):
484 """Send a request header line to the server.
486 For example: h.putheader('Accept', 'text/html')
488 if self.__state != _CS_REQ_STARTED:
489 raise CannotSendHeader()
491 str = '%s: %s\r\n' % (header, value)
492 self.send(str)
494 def endheaders(self):
495 """Indicate that the last header line has been sent to the server."""
497 if self.__state == _CS_REQ_STARTED:
498 self.__state = _CS_REQ_SENT
499 else:
500 raise CannotSendHeader()
502 self.send('\r\n')
504 def request(self, method, url, body=None, headers={}):
505 """Send a complete request to the server."""
507 try:
508 self._send_request(method, url, body, headers)
509 except socket.error, v:
510 # trap 'Broken pipe' if we're allowed to automatically reconnect
511 if v[0] != 32 or not self.auto_open:
512 raise
513 # try one more time
514 self._send_request(method, url, body, headers)
516 def _send_request(self, method, url, body, headers):
517 self.putrequest(method, url)
519 if body:
520 self.putheader('Content-Length', str(len(body)))
521 for hdr, value in headers.items():
522 self.putheader(hdr, value)
523 self.endheaders()
525 if body:
526 self.send(body)
528 def getresponse(self):
529 "Get the response from the server."
531 # check if a prior response has been completed
532 if self.__response and self.__response.isclosed():
533 self.__response = None
536 # if a prior response exists, then it must be completed (otherwise, we
537 # cannot read this response's header to determine the connection-close
538 # behavior)
540 # note: if a prior response existed, but was connection-close, then the
541 # socket and response were made independent of this HTTPConnection
542 # object since a new request requires that we open a whole new
543 # connection
545 # this means the prior response had one of two states:
546 # 1) will_close: this connection was reset and the prior socket and
547 # response operate independently
548 # 2) persistent: the response was retained and we await its
549 # isclosed() status to become true.
551 if self.__state != _CS_REQ_SENT or self.__response:
552 raise ResponseNotReady()
554 if self.debuglevel > 0:
555 response = self.response_class(self.sock, self.debuglevel)
556 else:
557 response = self.response_class(self.sock)
559 response.begin()
560 self.__state = _CS_IDLE
562 if response.will_close:
563 # this effectively passes the connection to the response
564 self.close()
565 else:
566 # remember this, so we can tell when it is complete
567 self.__response = response
569 return response
572 class FakeSocket:
573 def __init__(self, sock, ssl):
574 self.__sock = sock
575 self.__ssl = ssl
577 def makefile(self, mode, bufsize=None):
578 """Return a readable file-like object with data from socket.
580 This method offers only partial support for the makefile
581 interface of a real socket. It only supports modes 'r' and
582 'rb' and the bufsize argument is ignored.
584 The returned object contains *all* of the file data
586 if mode != 'r' and mode != 'rb':
587 raise UnimplementedFileMode()
589 msgbuf = []
590 while 1:
591 try:
592 buf = self.__ssl.read()
593 except socket.sslerror, msg:
594 break
595 if buf == '':
596 break
597 msgbuf.append(buf)
598 return StringIO("".join(msgbuf))
600 def send(self, stuff, flags = 0):
601 return self.__ssl.write(stuff)
603 def recv(self, len = 1024, flags = 0):
604 return self.__ssl.read(len)
606 def __getattr__(self, attr):
607 return getattr(self.__sock, attr)
610 class HTTPSConnection(HTTPConnection):
611 "This class allows communication via SSL."
613 default_port = HTTPS_PORT
615 def __init__(self, host, port=None, **x509):
616 keys = x509.keys()
617 try:
618 keys.remove('key_file')
619 except ValueError:
620 pass
621 try:
622 keys.remove('cert_file')
623 except ValueError:
624 pass
625 if keys:
626 raise IllegalKeywordArgument()
627 HTTPConnection.__init__(self, host, port)
628 self.key_file = x509.get('key_file')
629 self.cert_file = x509.get('cert_file')
631 def connect(self):
632 "Connect to a host on a given (SSL) port."
634 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
635 sock.connect((self.host, self.port))
636 realsock = sock
637 if hasattr(sock, "_sock"):
638 realsock = sock._sock
639 ssl = socket.ssl(realsock, self.key_file, self.cert_file)
640 self.sock = FakeSocket(sock, ssl)
643 class HTTP:
644 "Compatibility class with httplib.py from 1.5."
646 _http_vsn = 10
647 _http_vsn_str = 'HTTP/1.0'
649 debuglevel = 0
651 _connection_class = HTTPConnection
653 def __init__(self, host='', port=None, **x509):
654 "Provide a default host, since the superclass requires one."
656 # some joker passed 0 explicitly, meaning default port
657 if port == 0:
658 port = None
660 # Note that we may pass an empty string as the host; this will throw
661 # an error when we attempt to connect. Presumably, the client code
662 # will call connect before then, with a proper host.
663 self._conn = self._connection_class(host, port)
664 # set up delegation to flesh out interface
665 self.send = self._conn.send
666 self.putrequest = self._conn.putrequest
667 self.endheaders = self._conn.endheaders
668 self._conn._http_vsn = self._http_vsn
669 self._conn._http_vsn_str = self._http_vsn_str
671 # we never actually use these for anything, but we keep them here for
672 # compatibility with post-1.5.2 CVS.
673 self.key_file = x509.get('key_file')
674 self.cert_file = x509.get('cert_file')
676 self.file = None
678 def connect(self, host=None, port=None):
679 "Accept arguments to set the host/port, since the superclass doesn't."
681 if host is not None:
682 self._conn._set_hostport(host, port)
683 self._conn.connect()
685 def set_debuglevel(self, debuglevel):
686 self._conn.set_debuglevel(debuglevel)
688 def getfile(self):
689 "Provide a getfile, since the superclass' does not use this concept."
690 return self.file
692 def putheader(self, header, *values):
693 "The superclass allows only one value argument."
694 self._conn.putheader(header, '\r\n\t'.join(values))
696 def getreply(self):
697 """Compat definition since superclass does not define it.
699 Returns a tuple consisting of:
700 - server status code (e.g. '200' if all goes well)
701 - server "reason" corresponding to status code
702 - any RFC822 headers in the response from the server
704 try:
705 response = self._conn.getresponse()
706 except BadStatusLine, e:
707 ### hmm. if getresponse() ever closes the socket on a bad request,
708 ### then we are going to have problems with self.sock
710 ### should we keep this behavior? do people use it?
711 # keep the socket open (as a file), and return it
712 self.file = self._conn.sock.makefile('rb', 0)
714 # close our socket -- we want to restart after any protocol error
715 self.close()
717 self.headers = None
718 return -1, e.line, None
720 self.headers = response.msg
721 self.file = response.fp
722 return response.status, response.reason, response.msg
724 def close(self):
725 self._conn.close()
727 # note that self.file == response.fp, which gets closed by the
728 # superclass. just clear the object ref here.
729 ### hmm. messy. if status==-1, then self.file is owned by us.
730 ### well... we aren't explicitly closing, but losing this ref will
731 ### do it
732 self.file = None
734 if hasattr(socket, 'ssl'):
735 class HTTPS(HTTP):
736 """Compatibility with 1.5 httplib interface
738 Python 1.5.2 did not have an HTTPS class, but it defined an
739 interface for sending http requests that is also useful for
740 https.
743 _connection_class = HTTPSConnection
746 class HTTPException(Exception):
747 pass
749 class NotConnected(HTTPException):
750 pass
752 class UnknownProtocol(HTTPException):
753 def __init__(self, version):
754 self.version = version
756 class UnknownTransferEncoding(HTTPException):
757 pass
759 class IllegalKeywordArgument(HTTPException):
760 pass
762 class UnimplementedFileMode(HTTPException):
763 pass
765 class IncompleteRead(HTTPException):
766 def __init__(self, partial):
767 self.partial = partial
769 class ImproperConnectionState(HTTPException):
770 pass
772 class CannotSendRequest(ImproperConnectionState):
773 pass
775 class CannotSendHeader(ImproperConnectionState):
776 pass
778 class ResponseNotReady(ImproperConnectionState):
779 pass
781 class BadStatusLine(HTTPException):
782 def __init__(self, line):
783 self.line = line
785 # for backwards compatibility
786 error = HTTPException
790 # snarfed from httplib.py for now...
792 def test():
793 """Test this module.
795 The test consists of retrieving and displaying the Python
796 home page, along with the error code and error string returned
797 by the www.python.org server.
800 import sys
801 import getopt
802 opts, args = getopt.getopt(sys.argv[1:], 'd')
803 dl = 0
804 for o, a in opts:
805 if o == '-d': dl = dl + 1
806 host = 'www.python.org'
807 selector = '/'
808 if args[0:]: host = args[0]
809 if args[1:]: selector = args[1]
810 h = HTTP()
811 h.set_debuglevel(dl)
812 h.connect(host)
813 h.putrequest('GET', selector)
814 h.endheaders()
815 status, reason, headers = h.getreply()
816 print 'status =', status
817 print 'reason =', reason
818 print
819 if headers:
820 for header in headers.headers: print header.strip()
821 print
822 print h.getfile().read()
824 if hasattr(socket, 'ssl'):
825 host = 'sourceforge.net'
826 selector = '/projects/python'
827 hs = HTTPS()
828 hs.connect(host)
829 hs.putrequest('GET', selector)
830 hs.endheaders()
831 status, reason, headers = hs.getreply()
832 print 'status =', status
833 print 'reason =', reason
834 print
835 if headers:
836 for header in headers.headers: print header.strip()
837 print
838 print hs.getfile().read()
841 if __name__ == '__main__':
842 test()