move sections
[python/dscho.git] / Lib / httplib.py
blobd66a9fcd05b184b0565c3f56948d1f80ee31ad93
1 """HTTP/1.1 client library
3 <intro stuff goes here>
4 <other stuff, too>
6 HTTPConnection goes through a number of "states", which define when a client
7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions:
10 (null)
12 | HTTPConnection()
14 Idle
16 | putrequest()
18 Request-started
20 | ( putheader() )* endheaders()
22 Request-sent
24 | response = getresponse()
26 Unread-response [Response-headers-read]
27 |\____________________
28 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
38 | response.read()
40 Request-sent
42 This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
48 Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
59 Logical State __state __response
60 ------------- ------- ----------
61 Idle _CS_IDLE None
62 Request-started _CS_REQ_STARTED None
63 Request-sent _CS_REQ_SENT None
64 Unread-response _CS_IDLE <response_class>
65 Req-started-unread-response _CS_REQ_STARTED <response_class>
66 Req-sent-unread-response _CS_REQ_SENT <response_class>
67 """
69 from array import array
70 import socket
71 from sys import py3kwarning
72 from urlparse import urlsplit
73 import warnings
74 with warnings.catch_warnings():
75 if py3kwarning:
76 warnings.filterwarnings("ignore", ".*mimetools has been removed",
77 DeprecationWarning)
78 import mimetools
80 try:
81 from cStringIO import StringIO
82 except ImportError:
83 from StringIO import StringIO
85 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
86 "HTTPException", "NotConnected", "UnknownProtocol",
87 "UnknownTransferEncoding", "UnimplementedFileMode",
88 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
89 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
90 "BadStatusLine", "error", "responses"]
92 HTTP_PORT = 80
93 HTTPS_PORT = 443
95 _UNKNOWN = 'UNKNOWN'
97 # connection states
98 _CS_IDLE = 'Idle'
99 _CS_REQ_STARTED = 'Request-started'
100 _CS_REQ_SENT = 'Request-sent'
102 # status codes
103 # informational
104 CONTINUE = 100
105 SWITCHING_PROTOCOLS = 101
106 PROCESSING = 102
108 # successful
109 OK = 200
110 CREATED = 201
111 ACCEPTED = 202
112 NON_AUTHORITATIVE_INFORMATION = 203
113 NO_CONTENT = 204
114 RESET_CONTENT = 205
115 PARTIAL_CONTENT = 206
116 MULTI_STATUS = 207
117 IM_USED = 226
119 # redirection
120 MULTIPLE_CHOICES = 300
121 MOVED_PERMANENTLY = 301
122 FOUND = 302
123 SEE_OTHER = 303
124 NOT_MODIFIED = 304
125 USE_PROXY = 305
126 TEMPORARY_REDIRECT = 307
128 # client error
129 BAD_REQUEST = 400
130 UNAUTHORIZED = 401
131 PAYMENT_REQUIRED = 402
132 FORBIDDEN = 403
133 NOT_FOUND = 404
134 METHOD_NOT_ALLOWED = 405
135 NOT_ACCEPTABLE = 406
136 PROXY_AUTHENTICATION_REQUIRED = 407
137 REQUEST_TIMEOUT = 408
138 CONFLICT = 409
139 GONE = 410
140 LENGTH_REQUIRED = 411
141 PRECONDITION_FAILED = 412
142 REQUEST_ENTITY_TOO_LARGE = 413
143 REQUEST_URI_TOO_LONG = 414
144 UNSUPPORTED_MEDIA_TYPE = 415
145 REQUESTED_RANGE_NOT_SATISFIABLE = 416
146 EXPECTATION_FAILED = 417
147 UNPROCESSABLE_ENTITY = 422
148 LOCKED = 423
149 FAILED_DEPENDENCY = 424
150 UPGRADE_REQUIRED = 426
152 # server error
153 INTERNAL_SERVER_ERROR = 500
154 NOT_IMPLEMENTED = 501
155 BAD_GATEWAY = 502
156 SERVICE_UNAVAILABLE = 503
157 GATEWAY_TIMEOUT = 504
158 HTTP_VERSION_NOT_SUPPORTED = 505
159 INSUFFICIENT_STORAGE = 507
160 NOT_EXTENDED = 510
162 # Mapping status codes to official W3C names
163 responses = {
164 100: 'Continue',
165 101: 'Switching Protocols',
167 200: 'OK',
168 201: 'Created',
169 202: 'Accepted',
170 203: 'Non-Authoritative Information',
171 204: 'No Content',
172 205: 'Reset Content',
173 206: 'Partial Content',
175 300: 'Multiple Choices',
176 301: 'Moved Permanently',
177 302: 'Found',
178 303: 'See Other',
179 304: 'Not Modified',
180 305: 'Use Proxy',
181 306: '(Unused)',
182 307: 'Temporary Redirect',
184 400: 'Bad Request',
185 401: 'Unauthorized',
186 402: 'Payment Required',
187 403: 'Forbidden',
188 404: 'Not Found',
189 405: 'Method Not Allowed',
190 406: 'Not Acceptable',
191 407: 'Proxy Authentication Required',
192 408: 'Request Timeout',
193 409: 'Conflict',
194 410: 'Gone',
195 411: 'Length Required',
196 412: 'Precondition Failed',
197 413: 'Request Entity Too Large',
198 414: 'Request-URI Too Long',
199 415: 'Unsupported Media Type',
200 416: 'Requested Range Not Satisfiable',
201 417: 'Expectation Failed',
203 500: 'Internal Server Error',
204 501: 'Not Implemented',
205 502: 'Bad Gateway',
206 503: 'Service Unavailable',
207 504: 'Gateway Timeout',
208 505: 'HTTP Version Not Supported',
211 # maximal amount of data to read at one time in _safe_read
212 MAXAMOUNT = 1048576
214 class HTTPMessage(mimetools.Message):
216 def addheader(self, key, value):
217 """Add header for field key handling repeats."""
218 prev = self.dict.get(key)
219 if prev is None:
220 self.dict[key] = value
221 else:
222 combined = ", ".join((prev, value))
223 self.dict[key] = combined
225 def addcontinue(self, key, more):
226 """Add more field data from a continuation line."""
227 prev = self.dict[key]
228 self.dict[key] = prev + "\n " + more
230 def readheaders(self):
231 """Read header lines.
233 Read header lines up to the entirely blank line that terminates them.
234 The (normally blank) line that ends the headers is skipped, but not
235 included in the returned list. If a non-header line ends the headers,
236 (which is an error), an attempt is made to backspace over it; it is
237 never included in the returned list.
239 The variable self.status is set to the empty string if all went well,
240 otherwise it is an error message. The variable self.headers is a
241 completely uninterpreted list of lines contained in the header (so
242 printing them will reproduce the header exactly as it appears in the
243 file).
245 If multiple header fields with the same name occur, they are combined
246 according to the rules in RFC 2616 sec 4.2:
248 Appending each subsequent field-value to the first, each separated
249 by a comma. The order in which header fields with the same field-name
250 are received is significant to the interpretation of the combined
251 field value.
253 # XXX The implementation overrides the readheaders() method of
254 # rfc822.Message. The base class design isn't amenable to
255 # customized behavior here so the method here is a copy of the
256 # base class code with a few small changes.
258 self.dict = {}
259 self.unixfrom = ''
260 self.headers = hlist = []
261 self.status = ''
262 headerseen = ""
263 firstline = 1
264 startofline = unread = tell = None
265 if hasattr(self.fp, 'unread'):
266 unread = self.fp.unread
267 elif self.seekable:
268 tell = self.fp.tell
269 while True:
270 if tell:
271 try:
272 startofline = tell()
273 except IOError:
274 startofline = tell = None
275 self.seekable = 0
276 line = self.fp.readline()
277 if not line:
278 self.status = 'EOF in headers'
279 break
280 # Skip unix From name time lines
281 if firstline and line.startswith('From '):
282 self.unixfrom = self.unixfrom + line
283 continue
284 firstline = 0
285 if headerseen and line[0] in ' \t':
286 # XXX Not sure if continuation lines are handled properly
287 # for http and/or for repeating headers
288 # It's a continuation line.
289 hlist.append(line)
290 self.addcontinue(headerseen, line.strip())
291 continue
292 elif self.iscomment(line):
293 # It's a comment. Ignore it.
294 continue
295 elif self.islast(line):
296 # Note! No pushback here! The delimiter line gets eaten.
297 break
298 headerseen = self.isheader(line)
299 if headerseen:
300 # It's a legal header line, save it.
301 hlist.append(line)
302 self.addheader(headerseen, line[len(headerseen)+1:].strip())
303 continue
304 else:
305 # It's not a header line; throw it back and stop here.
306 if not self.dict:
307 self.status = 'No headers'
308 else:
309 self.status = 'Non-header line where header expected'
310 # Try to undo the read.
311 if unread:
312 unread(line)
313 elif tell:
314 self.fp.seek(startofline)
315 else:
316 self.status = self.status + '; bad seek'
317 break
319 class HTTPResponse:
321 # strict: If true, raise BadStatusLine if the status line can't be
322 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
323 # false because it prevents clients from talking to HTTP/0.9
324 # servers. Note that a response with a sufficiently corrupted
325 # status line will look like an HTTP/0.9 response.
327 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
329 def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
330 if buffering:
331 # The caller won't be using any sock.recv() calls, so buffering
332 # is fine and recommended for performance.
333 self.fp = sock.makefile('rb')
334 else:
335 # The buffer size is specified as zero, because the headers of
336 # the response are read with readline(). If the reads were
337 # buffered the readline() calls could consume some of the
338 # response, which make be read via a recv() on the underlying
339 # socket.
340 self.fp = sock.makefile('rb', 0)
341 self.debuglevel = debuglevel
342 self.strict = strict
343 self._method = method
345 self.msg = None
347 # from the Status-Line of the response
348 self.version = _UNKNOWN # HTTP-Version
349 self.status = _UNKNOWN # Status-Code
350 self.reason = _UNKNOWN # Reason-Phrase
352 self.chunked = _UNKNOWN # is "chunked" being used?
353 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
354 self.length = _UNKNOWN # number of bytes left in response
355 self.will_close = _UNKNOWN # conn will close at end of response
357 def _read_status(self):
358 # Initialize with Simple-Response defaults
359 line = self.fp.readline()
360 if self.debuglevel > 0:
361 print "reply:", repr(line)
362 if not line:
363 # Presumably, the server closed the connection before
364 # sending a valid response.
365 raise BadStatusLine(line)
366 try:
367 [version, status, reason] = line.split(None, 2)
368 except ValueError:
369 try:
370 [version, status] = line.split(None, 1)
371 reason = ""
372 except ValueError:
373 # empty version will cause next test to fail and status
374 # will be treated as 0.9 response.
375 version = ""
376 if not version.startswith('HTTP/'):
377 if self.strict:
378 self.close()
379 raise BadStatusLine(line)
380 else:
381 # assume it's a Simple-Response from an 0.9 server
382 self.fp = LineAndFileWrapper(line, self.fp)
383 return "HTTP/0.9", 200, ""
385 # The status code is a three-digit number
386 try:
387 status = int(status)
388 if status < 100 or status > 999:
389 raise BadStatusLine(line)
390 except ValueError:
391 raise BadStatusLine(line)
392 return version, status, reason
394 def begin(self):
395 if self.msg is not None:
396 # we've already started reading the response
397 return
399 # read until we get a non-100 response
400 while True:
401 version, status, reason = self._read_status()
402 if status != CONTINUE:
403 break
404 # skip the header from the 100 response
405 while True:
406 skip = self.fp.readline().strip()
407 if not skip:
408 break
409 if self.debuglevel > 0:
410 print "header:", skip
412 self.status = status
413 self.reason = reason.strip()
414 if version == 'HTTP/1.0':
415 self.version = 10
416 elif version.startswith('HTTP/1.'):
417 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
418 elif version == 'HTTP/0.9':
419 self.version = 9
420 else:
421 raise UnknownProtocol(version)
423 if self.version == 9:
424 self.length = None
425 self.chunked = 0
426 self.will_close = 1
427 self.msg = HTTPMessage(StringIO())
428 return
430 self.msg = HTTPMessage(self.fp, 0)
431 if self.debuglevel > 0:
432 for hdr in self.msg.headers:
433 print "header:", hdr,
435 # don't let the msg keep an fp
436 self.msg.fp = None
438 # are we using the chunked-style of transfer encoding?
439 tr_enc = self.msg.getheader('transfer-encoding')
440 if tr_enc and tr_enc.lower() == "chunked":
441 self.chunked = 1
442 self.chunk_left = None
443 else:
444 self.chunked = 0
446 # will the connection close at the end of the response?
447 self.will_close = self._check_close()
449 # do we have a Content-Length?
450 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
451 length = self.msg.getheader('content-length')
452 if length and not self.chunked:
453 try:
454 self.length = int(length)
455 except ValueError:
456 self.length = None
457 else:
458 if self.length < 0: # ignore nonsensical negative lengths
459 self.length = None
460 else:
461 self.length = None
463 # does the body have a fixed length? (of zero)
464 if (status == NO_CONTENT or status == NOT_MODIFIED or
465 100 <= status < 200 or # 1xx codes
466 self._method == 'HEAD'):
467 self.length = 0
469 # if the connection remains open, and we aren't using chunked, and
470 # a content-length was not provided, then assume that the connection
471 # WILL close.
472 if not self.will_close and \
473 not self.chunked and \
474 self.length is None:
475 self.will_close = 1
477 def _check_close(self):
478 conn = self.msg.getheader('connection')
479 if self.version == 11:
480 # An HTTP/1.1 proxy is assumed to stay open unless
481 # explicitly closed.
482 conn = self.msg.getheader('connection')
483 if conn and "close" in conn.lower():
484 return True
485 return False
487 # Some HTTP/1.0 implementations have support for persistent
488 # connections, using rules different than HTTP/1.1.
490 # For older HTTP, Keep-Alive indicates persistent connection.
491 if self.msg.getheader('keep-alive'):
492 return False
494 # At least Akamai returns a "Connection: Keep-Alive" header,
495 # which was supposed to be sent by the client.
496 if conn and "keep-alive" in conn.lower():
497 return False
499 # Proxy-Connection is a netscape hack.
500 pconn = self.msg.getheader('proxy-connection')
501 if pconn and "keep-alive" in pconn.lower():
502 return False
504 # otherwise, assume it will close
505 return True
507 def close(self):
508 if self.fp:
509 self.fp.close()
510 self.fp = None
512 def isclosed(self):
513 # NOTE: it is possible that we will not ever call self.close(). This
514 # case occurs when will_close is TRUE, length is None, and we
515 # read up to the last byte, but NOT past it.
517 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
518 # called, meaning self.isclosed() is meaningful.
519 return self.fp is None
521 # XXX It would be nice to have readline and __iter__ for this, too.
523 def read(self, amt=None):
524 if self.fp is None:
525 return ''
527 if self._method == 'HEAD':
528 self.close()
529 return ''
531 if self.chunked:
532 return self._read_chunked(amt)
534 if amt is None:
535 # unbounded read
536 if self.length is None:
537 s = self.fp.read()
538 else:
539 s = self._safe_read(self.length)
540 self.length = 0
541 self.close() # we read everything
542 return s
544 if self.length is not None:
545 if amt > self.length:
546 # clip the read to the "end of response"
547 amt = self.length
549 # we do not use _safe_read() here because this may be a .will_close
550 # connection, and the user is reading more bytes than will be provided
551 # (for example, reading in 1k chunks)
552 s = self.fp.read(amt)
553 if self.length is not None:
554 self.length -= len(s)
555 if not self.length:
556 self.close()
557 return s
559 def _read_chunked(self, amt):
560 assert self.chunked != _UNKNOWN
561 chunk_left = self.chunk_left
562 value = []
563 while True:
564 if chunk_left is None:
565 line = self.fp.readline()
566 i = line.find(';')
567 if i >= 0:
568 line = line[:i] # strip chunk-extensions
569 try:
570 chunk_left = int(line, 16)
571 except ValueError:
572 # close the connection as protocol synchronisation is
573 # probably lost
574 self.close()
575 raise IncompleteRead(''.join(value))
576 if chunk_left == 0:
577 break
578 if amt is None:
579 value.append(self._safe_read(chunk_left))
580 elif amt < chunk_left:
581 value.append(self._safe_read(amt))
582 self.chunk_left = chunk_left - amt
583 return ''.join(value)
584 elif amt == chunk_left:
585 value.append(self._safe_read(amt))
586 self._safe_read(2) # toss the CRLF at the end of the chunk
587 self.chunk_left = None
588 return ''.join(value)
589 else:
590 value.append(self._safe_read(chunk_left))
591 amt -= chunk_left
593 # we read the whole chunk, get another
594 self._safe_read(2) # toss the CRLF at the end of the chunk
595 chunk_left = None
597 # read and discard trailer up to the CRLF terminator
598 ### note: we shouldn't have any trailers!
599 while True:
600 line = self.fp.readline()
601 if not line:
602 # a vanishingly small number of sites EOF without
603 # sending the trailer
604 break
605 if line == '\r\n':
606 break
608 # we read everything; close the "file"
609 self.close()
611 return ''.join(value)
613 def _safe_read(self, amt):
614 """Read the number of bytes requested, compensating for partial reads.
616 Normally, we have a blocking socket, but a read() can be interrupted
617 by a signal (resulting in a partial read).
619 Note that we cannot distinguish between EOF and an interrupt when zero
620 bytes have been read. IncompleteRead() will be raised in this
621 situation.
623 This function should be used when <amt> bytes "should" be present for
624 reading. If the bytes are truly not available (due to EOF), then the
625 IncompleteRead exception can be used to detect the problem.
627 # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
628 # return less than x bytes unless EOF is encountered. It now handles
629 # signal interruptions (socket.error EINTR) internally. This code
630 # never caught that exception anyways. It seems largely pointless.
631 # self.fp.read(amt) will work fine.
632 s = []
633 while amt > 0:
634 chunk = self.fp.read(min(amt, MAXAMOUNT))
635 if not chunk:
636 raise IncompleteRead(''.join(s), amt)
637 s.append(chunk)
638 amt -= len(chunk)
639 return ''.join(s)
641 def getheader(self, name, default=None):
642 if self.msg is None:
643 raise ResponseNotReady()
644 return self.msg.getheader(name, default)
646 def getheaders(self):
647 """Return list of (header, value) tuples."""
648 if self.msg is None:
649 raise ResponseNotReady()
650 return self.msg.items()
653 class HTTPConnection:
655 _http_vsn = 11
656 _http_vsn_str = 'HTTP/1.1'
658 response_class = HTTPResponse
659 default_port = HTTP_PORT
660 auto_open = 1
661 debuglevel = 0
662 strict = 0
664 def __init__(self, host, port=None, strict=None,
665 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
666 self.timeout = timeout
667 self.source_address = source_address
668 self.sock = None
669 self._buffer = []
670 self.__response = None
671 self.__state = _CS_IDLE
672 self._method = None
673 self._tunnel_host = None
674 self._tunnel_port = None
675 self._tunnel_headers = {}
677 self._set_hostport(host, port)
678 if strict is not None:
679 self.strict = strict
681 def set_tunnel(self, host, port=None, headers=None):
682 """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
684 The headers argument should be a mapping of extra HTTP headers
685 to send with the CONNECT request.
687 self._tunnel_host = host
688 self._tunnel_port = port
689 if headers:
690 self._tunnel_headers = headers
691 else:
692 self._tunnel_headers.clear()
694 def _set_hostport(self, host, port):
695 if port is None:
696 i = host.rfind(':')
697 j = host.rfind(']') # ipv6 addresses have [...]
698 if i > j:
699 try:
700 port = int(host[i+1:])
701 except ValueError:
702 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
703 host = host[:i]
704 else:
705 port = self.default_port
706 if host and host[0] == '[' and host[-1] == ']':
707 host = host[1:-1]
708 self.host = host
709 self.port = port
711 def set_debuglevel(self, level):
712 self.debuglevel = level
714 def _tunnel(self):
715 self._set_hostport(self._tunnel_host, self._tunnel_port)
716 self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
717 for header, value in self._tunnel_headers.iteritems():
718 self.send("%s: %s\r\n" % (header, value))
719 self.send("\r\n")
720 response = self.response_class(self.sock, strict = self.strict,
721 method = self._method)
722 (version, code, message) = response._read_status()
724 if code != 200:
725 self.close()
726 raise socket.error("Tunnel connection failed: %d %s" % (code,
727 message.strip()))
728 while True:
729 line = response.fp.readline()
730 if line == '\r\n': break
733 def connect(self):
734 """Connect to the host and port specified in __init__."""
735 self.sock = socket.create_connection((self.host,self.port),
736 self.timeout, self.source_address)
738 if self._tunnel_host:
739 self._tunnel()
741 def close(self):
742 """Close the connection to the HTTP server."""
743 if self.sock:
744 self.sock.close() # close it manually... there may be other refs
745 self.sock = None
746 if self.__response:
747 self.__response.close()
748 self.__response = None
749 self.__state = _CS_IDLE
751 def send(self, str):
752 """Send `str' to the server."""
753 if self.sock is None:
754 if self.auto_open:
755 self.connect()
756 else:
757 raise NotConnected()
759 # send the data to the server. if we get a broken pipe, then close
760 # the socket. we want to reconnect when somebody tries to send again.
762 # NOTE: we DO propagate the error, though, because we cannot simply
763 # ignore the error... the caller will know if they can retry.
764 if self.debuglevel > 0:
765 print "send:", repr(str)
766 try:
767 blocksize=8192
768 if hasattr(str,'read') and not isinstance(str, array):
769 if self.debuglevel > 0: print "sendIng a read()able"
770 data=str.read(blocksize)
771 while data:
772 self.sock.sendall(data)
773 data=str.read(blocksize)
774 else:
775 self.sock.sendall(str)
776 except socket.error, v:
777 if v.args[0] == 32: # Broken pipe
778 self.close()
779 raise
781 def _output(self, s):
782 """Add a line of output to the current request buffer.
784 Assumes that the line does *not* end with \\r\\n.
786 self._buffer.append(s)
788 def _send_output(self, message_body=None):
789 """Send the currently buffered request and clear the buffer.
791 Appends an extra \\r\\n to the buffer.
792 A message_body may be specified, to be appended to the request.
794 self._buffer.extend(("", ""))
795 msg = "\r\n".join(self._buffer)
796 del self._buffer[:]
797 # If msg and message_body are sent in a single send() call,
798 # it will avoid performance problems caused by the interaction
799 # between delayed ack and the Nagle algorithim.
800 if isinstance(message_body, str):
801 msg += message_body
802 message_body = None
803 self.send(msg)
804 if message_body is not None:
805 #message_body was not a string (i.e. it is a file) and
806 #we must run the risk of Nagle
807 self.send(message_body)
809 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
810 """Send a request to the server.
812 `method' specifies an HTTP request method, e.g. 'GET'.
813 `url' specifies the object being requested, e.g. '/index.html'.
814 `skip_host' if True does not add automatically a 'Host:' header
815 `skip_accept_encoding' if True does not add automatically an
816 'Accept-Encoding:' header
819 # if a prior response has been completed, then forget about it.
820 if self.__response and self.__response.isclosed():
821 self.__response = None
824 # in certain cases, we cannot issue another request on this connection.
825 # this occurs when:
826 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
827 # 2) a response to a previous request has signalled that it is going
828 # to close the connection upon completion.
829 # 3) the headers for the previous response have not been read, thus
830 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
832 # if there is no prior response, then we can request at will.
834 # if point (2) is true, then we will have passed the socket to the
835 # response (effectively meaning, "there is no prior response"), and
836 # will open a new one when a new request is made.
838 # Note: if a prior response exists, then we *can* start a new request.
839 # We are not allowed to begin fetching the response to this new
840 # request, however, until that prior response is complete.
842 if self.__state == _CS_IDLE:
843 self.__state = _CS_REQ_STARTED
844 else:
845 raise CannotSendRequest()
847 # Save the method we use, we need it later in the response phase
848 self._method = method
849 if not url:
850 url = '/'
851 str = '%s %s %s' % (method, url, self._http_vsn_str)
853 self._output(str)
855 if self._http_vsn == 11:
856 # Issue some standard headers for better HTTP/1.1 compliance
858 if not skip_host:
859 # this header is issued *only* for HTTP/1.1
860 # connections. more specifically, this means it is
861 # only issued when the client uses the new
862 # HTTPConnection() class. backwards-compat clients
863 # will be using HTTP/1.0 and those clients may be
864 # issuing this header themselves. we should NOT issue
865 # it twice; some web servers (such as Apache) barf
866 # when they see two Host: headers
868 # If we need a non-standard port,include it in the
869 # header. If the request is going through a proxy,
870 # but the host of the actual URL, not the host of the
871 # proxy.
873 netloc = ''
874 if url.startswith('http'):
875 nil, netloc, nil, nil, nil = urlsplit(url)
877 if netloc:
878 try:
879 netloc_enc = netloc.encode("ascii")
880 except UnicodeEncodeError:
881 netloc_enc = netloc.encode("idna")
882 self.putheader('Host', netloc_enc)
883 else:
884 try:
885 host_enc = self.host.encode("ascii")
886 except UnicodeEncodeError:
887 host_enc = self.host.encode("idna")
888 if self.port == self.default_port:
889 self.putheader('Host', host_enc)
890 else:
891 self.putheader('Host', "%s:%s" % (host_enc, self.port))
893 # note: we are assuming that clients will not attempt to set these
894 # headers since *this* library must deal with the
895 # consequences. this also means that when the supporting
896 # libraries are updated to recognize other forms, then this
897 # code should be changed (removed or updated).
899 # we only want a Content-Encoding of "identity" since we don't
900 # support encodings such as x-gzip or x-deflate.
901 if not skip_accept_encoding:
902 self.putheader('Accept-Encoding', 'identity')
904 # we can accept "chunked" Transfer-Encodings, but no others
905 # NOTE: no TE header implies *only* "chunked"
906 #self.putheader('TE', 'chunked')
908 # if TE is supplied in the header, then it must appear in a
909 # Connection header.
910 #self.putheader('Connection', 'TE')
912 else:
913 # For HTTP/1.0, the server will assume "not chunked"
914 pass
916 def putheader(self, header, *values):
917 """Send a request header line to the server.
919 For example: h.putheader('Accept', 'text/html')
921 if self.__state != _CS_REQ_STARTED:
922 raise CannotSendHeader()
924 str = '%s: %s' % (header, '\r\n\t'.join(values))
925 self._output(str)
927 def endheaders(self, message_body=None):
928 """Indicate that the last header line has been sent to the server.
930 This method sends the request to the server. The optional
931 message_body argument can be used to pass message body
932 associated with the request. The message body will be sent in
933 the same packet as the message headers if possible. The
934 message_body should be a string.
936 if self.__state == _CS_REQ_STARTED:
937 self.__state = _CS_REQ_SENT
938 else:
939 raise CannotSendHeader()
940 self._send_output(message_body)
942 def request(self, method, url, body=None, headers={}):
943 """Send a complete request to the server."""
945 try:
946 self._send_request(method, url, body, headers)
947 except socket.error, v:
948 # trap 'Broken pipe' if we're allowed to automatically reconnect
949 if v.args[0] != 32 or not self.auto_open:
950 raise
951 # try one more time
952 self._send_request(method, url, body, headers)
954 def _set_content_length(self, body):
955 # Set the content-length based on the body.
956 thelen = None
957 try:
958 thelen = str(len(body))
959 except TypeError, te:
960 # If this is a file-like object, try to
961 # fstat its file descriptor
962 import os
963 try:
964 thelen = str(os.fstat(body.fileno()).st_size)
965 except (AttributeError, OSError):
966 # Don't send a length if this failed
967 if self.debuglevel > 0: print "Cannot stat!!"
969 if thelen is not None:
970 self.putheader('Content-Length', thelen)
972 def _send_request(self, method, url, body, headers):
973 # honour explicitly requested Host: and Accept-Encoding headers
974 header_names = dict.fromkeys([k.lower() for k in headers])
975 skips = {}
976 if 'host' in header_names:
977 skips['skip_host'] = 1
978 if 'accept-encoding' in header_names:
979 skips['skip_accept_encoding'] = 1
981 self.putrequest(method, url, **skips)
983 if body and ('content-length' not in header_names):
984 self._set_content_length(body)
985 for hdr, value in headers.iteritems():
986 self.putheader(hdr, value)
987 self.endheaders(body)
989 def getresponse(self, buffering=False):
990 "Get the response from the server."
992 # if a prior response has been completed, then forget about it.
993 if self.__response and self.__response.isclosed():
994 self.__response = None
997 # if a prior response exists, then it must be completed (otherwise, we
998 # cannot read this response's header to determine the connection-close
999 # behavior)
1001 # note: if a prior response existed, but was connection-close, then the
1002 # socket and response were made independent of this HTTPConnection
1003 # object since a new request requires that we open a whole new
1004 # connection
1006 # this means the prior response had one of two states:
1007 # 1) will_close: this connection was reset and the prior socket and
1008 # response operate independently
1009 # 2) persistent: the response was retained and we await its
1010 # isclosed() status to become true.
1012 if self.__state != _CS_REQ_SENT or self.__response:
1013 raise ResponseNotReady()
1015 args = (self.sock,)
1016 kwds = {"strict":self.strict, "method":self._method}
1017 if self.debuglevel > 0:
1018 args += (self.debuglevel,)
1019 if buffering:
1020 #only add this keyword if non-default, for compatibility with
1021 #other response_classes.
1022 kwds["buffering"] = True;
1023 response = self.response_class(*args, **kwds)
1025 response.begin()
1026 assert response.will_close != _UNKNOWN
1027 self.__state = _CS_IDLE
1029 if response.will_close:
1030 # this effectively passes the connection to the response
1031 self.close()
1032 else:
1033 # remember this, so we can tell when it is complete
1034 self.__response = response
1036 return response
1039 class HTTP:
1040 "Compatibility class with httplib.py from 1.5."
1042 _http_vsn = 10
1043 _http_vsn_str = 'HTTP/1.0'
1045 debuglevel = 0
1047 _connection_class = HTTPConnection
1049 def __init__(self, host='', port=None, strict=None):
1050 "Provide a default host, since the superclass requires one."
1052 # some joker passed 0 explicitly, meaning default port
1053 if port == 0:
1054 port = None
1056 # Note that we may pass an empty string as the host; this will throw
1057 # an error when we attempt to connect. Presumably, the client code
1058 # will call connect before then, with a proper host.
1059 self._setup(self._connection_class(host, port, strict))
1061 def _setup(self, conn):
1062 self._conn = conn
1064 # set up delegation to flesh out interface
1065 self.send = conn.send
1066 self.putrequest = conn.putrequest
1067 self.putheader = conn.putheader
1068 self.endheaders = conn.endheaders
1069 self.set_debuglevel = conn.set_debuglevel
1071 conn._http_vsn = self._http_vsn
1072 conn._http_vsn_str = self._http_vsn_str
1074 self.file = None
1076 def connect(self, host=None, port=None):
1077 "Accept arguments to set the host/port, since the superclass doesn't."
1079 if host is not None:
1080 self._conn._set_hostport(host, port)
1081 self._conn.connect()
1083 def getfile(self):
1084 "Provide a getfile, since the superclass' does not use this concept."
1085 return self.file
1087 def getreply(self, buffering=False):
1088 """Compat definition since superclass does not define it.
1090 Returns a tuple consisting of:
1091 - server status code (e.g. '200' if all goes well)
1092 - server "reason" corresponding to status code
1093 - any RFC822 headers in the response from the server
1095 try:
1096 if not buffering:
1097 response = self._conn.getresponse()
1098 else:
1099 #only add this keyword if non-default for compatibility
1100 #with other connection classes
1101 response = self._conn.getresponse(buffering)
1102 except BadStatusLine, e:
1103 ### hmm. if getresponse() ever closes the socket on a bad request,
1104 ### then we are going to have problems with self.sock
1106 ### should we keep this behavior? do people use it?
1107 # keep the socket open (as a file), and return it
1108 self.file = self._conn.sock.makefile('rb', 0)
1110 # close our socket -- we want to restart after any protocol error
1111 self.close()
1113 self.headers = None
1114 return -1, e.line, None
1116 self.headers = response.msg
1117 self.file = response.fp
1118 return response.status, response.reason, response.msg
1120 def close(self):
1121 self._conn.close()
1123 # note that self.file == response.fp, which gets closed by the
1124 # superclass. just clear the object ref here.
1125 ### hmm. messy. if status==-1, then self.file is owned by us.
1126 ### well... we aren't explicitly closing, but losing this ref will
1127 ### do it
1128 self.file = None
1130 try:
1131 import ssl
1132 except ImportError:
1133 pass
1134 else:
1135 class HTTPSConnection(HTTPConnection):
1136 "This class allows communication via SSL."
1138 default_port = HTTPS_PORT
1140 def __init__(self, host, port=None, key_file=None, cert_file=None,
1141 strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1142 source_address=None):
1143 HTTPConnection.__init__(self, host, port, strict, timeout,
1144 source_address)
1145 self.key_file = key_file
1146 self.cert_file = cert_file
1148 def connect(self):
1149 "Connect to a host on a given (SSL) port."
1151 sock = socket.create_connection((self.host, self.port),
1152 self.timeout, self.source_address)
1153 if self._tunnel_host:
1154 self.sock = sock
1155 self._tunnel()
1156 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1158 __all__.append("HTTPSConnection")
1160 class HTTPS(HTTP):
1161 """Compatibility with 1.5 httplib interface
1163 Python 1.5.2 did not have an HTTPS class, but it defined an
1164 interface for sending http requests that is also useful for
1165 https.
1168 _connection_class = HTTPSConnection
1170 def __init__(self, host='', port=None, key_file=None, cert_file=None,
1171 strict=None):
1172 # provide a default host, pass the X509 cert info
1174 # urf. compensate for bad input.
1175 if port == 0:
1176 port = None
1177 self._setup(self._connection_class(host, port, key_file,
1178 cert_file, strict))
1180 # we never actually use these for anything, but we keep them
1181 # here for compatibility with post-1.5.2 CVS.
1182 self.key_file = key_file
1183 self.cert_file = cert_file
1186 def FakeSocket (sock, sslobj):
1187 warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " +
1188 "Use the result of ssl.wrap_socket() directly instead.",
1189 DeprecationWarning, stacklevel=2)
1190 return sslobj
1193 class HTTPException(Exception):
1194 # Subclasses that define an __init__ must call Exception.__init__
1195 # or define self.args. Otherwise, str() will fail.
1196 pass
1198 class NotConnected(HTTPException):
1199 pass
1201 class InvalidURL(HTTPException):
1202 pass
1204 class UnknownProtocol(HTTPException):
1205 def __init__(self, version):
1206 self.args = version,
1207 self.version = version
1209 class UnknownTransferEncoding(HTTPException):
1210 pass
1212 class UnimplementedFileMode(HTTPException):
1213 pass
1215 class IncompleteRead(HTTPException):
1216 def __init__(self, partial, expected=None):
1217 self.args = partial,
1218 self.partial = partial
1219 self.expected = expected
1220 def __repr__(self):
1221 if self.expected is not None:
1222 e = ', %i more expected' % self.expected
1223 else:
1224 e = ''
1225 return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1226 def __str__(self):
1227 return repr(self)
1229 class ImproperConnectionState(HTTPException):
1230 pass
1232 class CannotSendRequest(ImproperConnectionState):
1233 pass
1235 class CannotSendHeader(ImproperConnectionState):
1236 pass
1238 class ResponseNotReady(ImproperConnectionState):
1239 pass
1241 class BadStatusLine(HTTPException):
1242 def __init__(self, line):
1243 if not line:
1244 line = repr(line)
1245 self.args = line,
1246 self.line = line
1248 # for backwards compatibility
1249 error = HTTPException
1251 class LineAndFileWrapper:
1252 """A limited file-like object for HTTP/0.9 responses."""
1254 # The status-line parsing code calls readline(), which normally
1255 # get the HTTP status line. For a 0.9 response, however, this is
1256 # actually the first line of the body! Clients need to get a
1257 # readable file object that contains that line.
1259 def __init__(self, line, file):
1260 self._line = line
1261 self._file = file
1262 self._line_consumed = 0
1263 self._line_offset = 0
1264 self._line_left = len(line)
1266 def __getattr__(self, attr):
1267 return getattr(self._file, attr)
1269 def _done(self):
1270 # called when the last byte is read from the line. After the
1271 # call, all read methods are delegated to the underlying file
1272 # object.
1273 self._line_consumed = 1
1274 self.read = self._file.read
1275 self.readline = self._file.readline
1276 self.readlines = self._file.readlines
1278 def read(self, amt=None):
1279 if self._line_consumed:
1280 return self._file.read(amt)
1281 assert self._line_left
1282 if amt is None or amt > self._line_left:
1283 s = self._line[self._line_offset:]
1284 self._done()
1285 if amt is None:
1286 return s + self._file.read()
1287 else:
1288 return s + self._file.read(amt - len(s))
1289 else:
1290 assert amt <= self._line_left
1291 i = self._line_offset
1292 j = i + amt
1293 s = self._line[i:j]
1294 self._line_offset = j
1295 self._line_left -= amt
1296 if self._line_left == 0:
1297 self._done()
1298 return s
1300 def readline(self):
1301 if self._line_consumed:
1302 return self._file.readline()
1303 assert self._line_left
1304 s = self._line[self._line_offset:]
1305 self._done()
1306 return s
1308 def readlines(self, size=None):
1309 if self._line_consumed:
1310 return self._file.readlines(size)
1311 assert self._line_left
1312 L = [self._line[self._line_offset:]]
1313 self._done()
1314 if size is None:
1315 return L + self._file.readlines()
1316 else:
1317 return L + self._file.readlines(size)
1319 def test():
1320 """Test this module.
1322 A hodge podge of tests collected here, because they have too many
1323 external dependencies for the regular test suite.
1326 import sys
1327 import getopt
1328 opts, args = getopt.getopt(sys.argv[1:], 'd')
1329 dl = 0
1330 for o, a in opts:
1331 if o == '-d': dl = dl + 1
1332 host = 'www.python.org'
1333 selector = '/'
1334 if args[0:]: host = args[0]
1335 if args[1:]: selector = args[1]
1336 h = HTTP()
1337 h.set_debuglevel(dl)
1338 h.connect(host)
1339 h.putrequest('GET', selector)
1340 h.endheaders()
1341 status, reason, headers = h.getreply()
1342 print 'status =', status
1343 print 'reason =', reason
1344 print "read", len(h.getfile().read())
1345 print
1346 if headers:
1347 for header in headers.headers: print header.strip()
1348 print
1350 # minimal test that code to extract host from url works
1351 class HTTP11(HTTP):
1352 _http_vsn = 11
1353 _http_vsn_str = 'HTTP/1.1'
1355 h = HTTP11('www.python.org')
1356 h.putrequest('GET', 'http://www.python.org/~jeremy/')
1357 h.endheaders()
1358 h.getreply()
1359 h.close()
1361 try:
1362 import ssl
1363 except ImportError:
1364 pass
1365 else:
1367 for host, selector in (('sourceforge.net', '/projects/python'),
1369 print "https://%s%s" % (host, selector)
1370 hs = HTTPS()
1371 hs.set_debuglevel(dl)
1372 hs.connect(host)
1373 hs.putrequest('GET', selector)
1374 hs.endheaders()
1375 status, reason, headers = hs.getreply()
1376 print 'status =', status
1377 print 'reason =', reason
1378 print "read", len(hs.getfile().read())
1379 print
1380 if headers:
1381 for header in headers.headers: print header.strip()
1382 print
1384 if __name__ == '__main__':
1385 test()