Update version number and release date.
[python/dscho.git] / Lib / BaseHTTPServer.py
blob195d54a3be173abdddab23f2e982fb31dbb1daf9
1 """HTTP server base class.
3 Note: the class in this module doesn't implement any HTTP request; see
4 SimpleHTTPServer for simple implementations of GET, HEAD and POST
5 (including CGI scripts). It does, however, optionally implement HTTP/1.1
6 persistent connections, as of version 0.3.
8 Contents:
10 - BaseHTTPRequestHandler: HTTP request handler base class
11 - test: test function
13 XXX To do:
15 - log requests even later (to capture byte count)
16 - log user-agent header and other interesting goodies
17 - send error log to separate file
18 """
21 # See also:
23 # HTTP Working Group T. Berners-Lee
24 # INTERNET-DRAFT R. T. Fielding
25 # <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
26 # Expires September 8, 1995 March 8, 1995
28 # URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
30 # and
32 # Network Working Group R. Fielding
33 # Request for Comments: 2616 et al
34 # Obsoletes: 2068 June 1999
35 # Category: Standards Track
37 # URL: http://www.faqs.org/rfcs/rfc2616.html
39 # Log files
40 # ---------
42 # Here's a quote from the NCSA httpd docs about log file format.
44 # | The logfile format is as follows. Each line consists of:
45 # |
46 # | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
47 # |
48 # | host: Either the DNS name or the IP number of the remote client
49 # | rfc931: Any information returned by identd for this person,
50 # | - otherwise.
51 # | authuser: If user sent a userid for authentication, the user name,
52 # | - otherwise.
53 # | DD: Day
54 # | Mon: Month (calendar name)
55 # | YYYY: Year
56 # | hh: hour (24-hour format, the machine's timezone)
57 # | mm: minutes
58 # | ss: seconds
59 # | request: The first line of the HTTP request as sent by the client.
60 # | ddd: the status code returned by the server, - if not available.
61 # | bbbb: the total number of bytes sent,
62 # | *not including the HTTP/1.0 header*, - if not available
63 # |
64 # | You can determine the name of the file accessed through request.
66 # (Actually, the latter is only true if you know the server configuration
67 # at the time the request was made!)
69 __version__ = "0.3"
71 __all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
73 import sys
74 import time
75 import socket # For gethostbyaddr()
76 import mimetools
77 import SocketServer
78 import cStringIO
80 # Default error message
81 DEFAULT_ERROR_MESSAGE = """\
82 <head>
83 <title>Error response</title>
84 </head>
85 <body>
86 <h1>Error response</h1>
87 <p>Error code %(code)d.
88 <p>Message: %(message)s.
89 <p>Error code explanation: %(code)s = %(explain)s.
90 </body>
91 """
94 class HTTPServer(SocketServer.TCPServer):
96 allow_reuse_address = 1 # Seems to make sense in testing environment
98 def server_bind(self):
99 """Override server_bind to store the server name."""
100 SocketServer.TCPServer.server_bind(self)
101 host, port = self.socket.getsockname()
102 self.server_name = socket.getfqdn(host)
103 self.server_port = port
106 class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
108 """HTTP request handler base class.
110 The following explanation of HTTP serves to guide you through the
111 code as well as to expose any misunderstandings I may have about
112 HTTP (so you don't need to read the code to figure out I'm wrong
113 :-).
115 HTTP (HyperText Transfer Protocol) is an extensible protocol on
116 top of a reliable stream transport (e.g. TCP/IP). The protocol
117 recognizes three parts to a request:
119 1. One line identifying the request type and path
120 2. An optional set of RFC-822-style headers
121 3. An optional data part
123 The headers and data are separated by a blank line.
125 The first line of the request has the form
127 <command> <path> <version>
129 where <command> is a (case-sensitive) keyword such as GET or POST,
130 <path> is a string containing path information for the request,
131 and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
132 <path> is encoded using the URL encoding scheme (using %xx to signify
133 the ASCII character with hex code xx).
135 The specification specifies that lines are separated by CRLF but
136 for compatibility with the widest range of clients recommends
137 servers also handle LF. Similarly, whitespace in the request line
138 is treated sensibly (allowing multiple spaces between components
139 and allowing trailing whitespace).
141 Similarly, for output, lines ought to be separated by CRLF pairs
142 but most clients grok LF characters just fine.
144 If the first line of the request has the form
146 <command> <path>
148 (i.e. <version> is left out) then this is assumed to be an HTTP
149 0.9 request; this form has no optional headers and data part and
150 the reply consists of just the data.
152 The reply form of the HTTP 1.x protocol again has three parts:
154 1. One line giving the response code
155 2. An optional set of RFC-822-style headers
156 3. The data
158 Again, the headers and data are separated by a blank line.
160 The response code line has the form
162 <version> <responsecode> <responsestring>
164 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
165 <responsecode> is a 3-digit response code indicating success or
166 failure of the request, and <responsestring> is an optional
167 human-readable string explaining what the response code means.
169 This server parses the request and the headers, and then calls a
170 function specific to the request type (<command>). Specifically,
171 a request SPAM will be handled by a method do_SPAM(). If no
172 such method exists the server sends an error response to the
173 client. If it exists, it is called with no arguments:
175 do_SPAM()
177 Note that the request name is case sensitive (i.e. SPAM and spam
178 are different requests).
180 The various request details are stored in instance variables:
182 - client_address is the client IP address in the form (host,
183 port);
185 - command, path and version are the broken-down request line;
187 - headers is an instance of mimetools.Message (or a derived
188 class) containing the header information;
190 - rfile is a file object open for reading positioned at the
191 start of the optional input data part;
193 - wfile is a file object open for writing.
195 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
197 The first thing to be written must be the response line. Then
198 follow 0 or more header lines, then a blank line, and then the
199 actual data (if any). The meaning of the header lines depends on
200 the command executed by the server; in most cases, when data is
201 returned, there should be at least one header line of the form
203 Content-type: <type>/<subtype>
205 where <type> and <subtype> should be registered MIME types,
206 e.g. "text/html" or "text/plain".
210 # The Python system version, truncated to its first component.
211 sys_version = "Python/" + sys.version.split()[0]
213 # The server software version. You may want to override this.
214 # The format is multiple whitespace-separated strings,
215 # where each string is of the form name[/version].
216 server_version = "BaseHTTP/" + __version__
218 def parse_request(self):
219 """Parse a request (internal).
221 The request should be stored in self.raw_request; the results
222 are in self.command, self.path, self.request_version and
223 self.headers.
225 Return True for success, False for failure; on failure, an
226 error is sent back.
229 self.command = None # set in case of error on the first line
230 self.request_version = version = "HTTP/0.9" # Default
231 self.close_connection = 1
232 requestline = self.raw_requestline
233 if requestline[-2:] == '\r\n':
234 requestline = requestline[:-2]
235 elif requestline[-1:] == '\n':
236 requestline = requestline[:-1]
237 self.requestline = requestline
238 words = requestline.split()
239 if len(words) == 3:
240 [command, path, version] = words
241 if version[:5] != 'HTTP/':
242 self.send_error(400, "Bad request version (%s)" % `version`)
243 return False
244 try:
245 base_version_number = version.split('/', 1)[1]
246 version_number = base_version_number.split(".")
247 # RFC 2145 section 3.1 says there can be only one "." and
248 # - major and minor numbers MUST be treated as
249 # separate integers;
250 # - HTTP/2.4 is a lower version than HTTP/2.13, which in
251 # turn is lower than HTTP/12.3;
252 # - Leading zeros MUST be ignored by recipients.
253 if len(version_number) != 2:
254 raise ValueError
255 version_number = int(version_number[0]), int(version_number[1])
256 except (ValueError, IndexError):
257 self.send_error(400, "Bad request version (%s)" % `version`)
258 return False
259 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
260 self.close_connection = 0
261 if version_number >= (2, 0):
262 self.send_error(505,
263 "Invalid HTTP Version (%s)" % base_version_number)
264 return False
265 elif len(words) == 2:
266 [command, path] = words
267 self.close_connection = 1
268 if command != 'GET':
269 self.send_error(400,
270 "Bad HTTP/0.9 request type (%s)" % `command`)
271 return False
272 elif not words:
273 return False
274 else:
275 self.send_error(400, "Bad request syntax (%s)" % `requestline`)
276 return False
277 self.command, self.path, self.request_version = command, path, version
279 # Deal with pipelining
280 bytes = ""
281 while 1:
282 line = self.rfile.readline()
283 bytes = bytes + line
284 if line == '\r\n' or line == '\n' or line == '':
285 break
287 # Examine the headers and look for a Connection directive
288 hfile = cStringIO.StringIO(bytes)
289 self.headers = self.MessageClass(hfile)
291 conntype = self.headers.get('Connection', "")
292 if conntype.lower() == 'close':
293 self.close_connection = 1
294 elif (conntype.lower() == 'keep-alive' and
295 self.protocol_version >= "HTTP/1.1"):
296 self.close_connection = 0
297 return True
299 def handle_one_request(self):
300 """Handle a single HTTP request.
302 You normally don't need to override this method; see the class
303 __doc__ string for information on how to handle specific HTTP
304 commands such as GET and POST.
307 self.raw_requestline = self.rfile.readline()
308 if not self.raw_requestline:
309 self.close_connection = 1
310 return
311 if not self.parse_request(): # An error code has been sent, just exit
312 return
313 mname = 'do_' + self.command
314 if not hasattr(self, mname):
315 self.send_error(501, "Unsupported method (%s)" % `self.command`)
316 return
317 method = getattr(self, mname)
318 method()
320 def handle(self):
321 """Handle multiple requests if necessary."""
322 self.close_connection = 1
324 self.handle_one_request()
325 while not self.close_connection:
326 self.handle_one_request()
328 def send_error(self, code, message=None):
329 """Send and log an error reply.
331 Arguments are the error code, and a detailed message.
332 The detailed message defaults to the short entry matching the
333 response code.
335 This sends an error response (so it must be called before any
336 output has been generated), logs the error, and finally sends
337 a piece of HTML explaining the error to the user.
341 try:
342 short, long = self.responses[code]
343 except KeyError:
344 short, long = '???', '???'
345 if message is None:
346 message = short
347 explain = long
348 self.log_error("code %d, message %s", code, message)
349 content = (self.error_message_format %
350 {'code': code, 'message': message, 'explain': explain})
351 self.send_response(code, message)
352 self.send_header("Content-Type", "text/html")
353 self.send_header('Connection', 'close')
354 self.end_headers()
355 if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
356 self.wfile.write(content)
358 error_message_format = DEFAULT_ERROR_MESSAGE
360 def send_response(self, code, message=None):
361 """Send the response header and log the response code.
363 Also send two standard headers with the server software
364 version and the current date.
367 self.log_request(code)
368 if message is None:
369 if code in self.responses:
370 message = self.responses[code][0]
371 else:
372 message = ''
373 if self.request_version != 'HTTP/0.9':
374 self.wfile.write("%s %d %s\r\n" %
375 (self.protocol_version, code, message))
376 # print (self.protocol_version, code, message)
377 self.send_header('Server', self.version_string())
378 self.send_header('Date', self.date_time_string())
380 def send_header(self, keyword, value):
381 """Send a MIME header."""
382 if self.request_version != 'HTTP/0.9':
383 self.wfile.write("%s: %s\r\n" % (keyword, value))
385 if keyword.lower() == 'connection':
386 if value.lower() == 'close':
387 self.close_connection = 1
388 elif value.lower() == 'keep-alive':
389 self.close_connection = 0
391 def end_headers(self):
392 """Send the blank line ending the MIME headers."""
393 if self.request_version != 'HTTP/0.9':
394 self.wfile.write("\r\n")
396 def log_request(self, code='-', size='-'):
397 """Log an accepted request.
399 This is called by send_reponse().
403 self.log_message('"%s" %s %s',
404 self.requestline, str(code), str(size))
406 def log_error(self, *args):
407 """Log an error.
409 This is called when a request cannot be fulfilled. By
410 default it passes the message on to log_message().
412 Arguments are the same as for log_message().
414 XXX This should go to the separate error log.
418 self.log_message(*args)
420 def log_message(self, format, *args):
421 """Log an arbitrary message.
423 This is used by all other logging functions. Override
424 it if you have specific logging wishes.
426 The first argument, FORMAT, is a format string for the
427 message to be logged. If the format string contains
428 any % escapes requiring parameters, they should be
429 specified as subsequent arguments (it's just like
430 printf!).
432 The client host and current date/time are prefixed to
433 every message.
437 sys.stderr.write("%s - - [%s] %s\n" %
438 (self.address_string(),
439 self.log_date_time_string(),
440 format%args))
442 def version_string(self):
443 """Return the server software version string."""
444 return self.server_version + ' ' + self.sys_version
446 def date_time_string(self):
447 """Return the current date and time formatted for a message header."""
448 now = time.time()
449 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(now)
450 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
451 self.weekdayname[wd],
452 day, self.monthname[month], year,
453 hh, mm, ss)
454 return s
456 def log_date_time_string(self):
457 """Return the current time formatted for logging."""
458 now = time.time()
459 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
460 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
461 day, self.monthname[month], year, hh, mm, ss)
462 return s
464 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
466 monthname = [None,
467 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
468 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
470 def address_string(self):
471 """Return the client address formatted for logging.
473 This version looks up the full hostname using gethostbyaddr(),
474 and tries to find a name that contains at least one dot.
478 host, port = self.client_address
479 return socket.getfqdn(host)
481 # Essentially static class variables
483 # The version of the HTTP protocol we support.
484 # Set this to HTTP/1.1 to enable automatic keepalive
485 protocol_version = "HTTP/1.0"
487 # The Message-like class used to parse headers
488 MessageClass = mimetools.Message
490 # Table mapping response codes to messages; entries have the
491 # form {code: (shortmessage, longmessage)}.
492 # See http://www.w3.org/hypertext/WWW/Protocols/HTTP/HTRESP.html
493 responses = {
494 100: ('Continue', 'Request received, please continue'),
495 101: ('Switching Protocols',
496 'Switching to new protocol; obey Upgrade header'),
498 200: ('OK', 'Request fulfilled, document follows'),
499 201: ('Created', 'Document created, URL follows'),
500 202: ('Accepted',
501 'Request accepted, processing continues off-line'),
502 203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
503 204: ('No response', 'Request fulfilled, nothing follows'),
504 205: ('Reset Content', 'Clear input form for further input.'),
505 206: ('Partial Content', 'Partial content follows.'),
507 300: ('Multiple Choices',
508 'Object has several resources -- see URI list'),
509 301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
510 302: ('Found', 'Object moved temporarily -- see URI list'),
511 303: ('See Other', 'Object moved -- see Method and URL list'),
512 304: ('Not modified',
513 'Document has not changed since given time'),
514 305: ('Use Proxy',
515 'You must use proxy specified in Location to access this '
516 'resource.'),
517 307: ('Temporary Redirect',
518 'Object moved temporarily -- see URI list'),
520 400: ('Bad request',
521 'Bad request syntax or unsupported method'),
522 401: ('Unauthorized',
523 'No permission -- see authorization schemes'),
524 402: ('Payment required',
525 'No payment -- see charging schemes'),
526 403: ('Forbidden',
527 'Request forbidden -- authorization will not help'),
528 404: ('Not Found', 'Nothing matches the given URI'),
529 405: ('Method Not Allowed',
530 'Specified method is invalid for this server.'),
531 406: ('Not Acceptable', 'URI not available in preferred format.'),
532 407: ('Proxy Authentication Required', 'You must authenticate with '
533 'this proxy before proceeding.'),
534 408: ('Request Time-out', 'Request timed out; try again later.'),
535 409: ('Conflict', 'Request conflict.'),
536 410: ('Gone',
537 'URI no longer exists and has been permanently removed.'),
538 411: ('Length Required', 'Client must specify Content-Length.'),
539 412: ('Precondition Failed', 'Precondition in headers is false.'),
540 413: ('Request Entity Too Large', 'Entity is too large.'),
541 414: ('Request-URI Too Long', 'URI is too long.'),
542 415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
543 416: ('Requested Range Not Satisfiable',
544 'Cannot satisfy request range.'),
545 417: ('Expectation Failed',
546 'Expect condition could not be satisfied.'),
548 500: ('Internal error', 'Server got itself in trouble'),
549 501: ('Not Implemented',
550 'Server does not support this operation'),
551 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
552 503: ('Service temporarily overloaded',
553 'The server cannot process the request due to a high load'),
554 504: ('Gateway timeout',
555 'The gateway server did not receive a timely response'),
556 505: ('HTTP Version not supported', 'Cannot fulfill request.'),
560 def test(HandlerClass = BaseHTTPRequestHandler,
561 ServerClass = HTTPServer, protocol="HTTP/1.0"):
562 """Test the HTTP request handler class.
564 This runs an HTTP server on port 8000 (or the first command line
565 argument).
569 if sys.argv[1:]:
570 port = int(sys.argv[1])
571 else:
572 port = 8000
573 server_address = ('', port)
575 HandlerClass.protocol_version = protocol
576 httpd = ServerClass(server_address, HandlerClass)
578 sa = httpd.socket.getsockname()
579 print "Serving HTTP on", sa[0], "port", sa[1], "..."
580 httpd.serve_forever()
583 if __name__ == '__main__':
584 test()