1 #! /usr/local/bin/python
3 # NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is
4 # intentionally NOT "/usr/bin/env python". On many systems
5 # (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
6 # scripts, and /usr/local/bin is the default directory where Python is
7 # installed, so /usr/bin/env would be unable to find python. Granted,
8 # binary installations by Linux vendors often install Python in
9 # /usr/bin. So let those vendors patch cgi.py to match their choice
12 """Support module for CGI (Common Gateway Interface) scripts.
14 This module defines a number of utilities for use by CGI scripts
18 # XXX Perhaps there should be a slimmed version that doesn't contain
19 # all those backwards compatible and debugging classes and functions?
24 # Michael McLay started this module. Steve Majewski changed the
25 # interface to SvFormContentDict and FormContentDict. The multipart
26 # parsing was inspired by code submitted by Andreas Paepcke. Guido van
27 # Rossum rewrote, reformatted and documented the module and is currently
28 # responsible for its maintenance.
43 from StringIO
import StringIO
45 __all__
= ["MiniFieldStorage", "FieldStorage", "FormContentDict",
46 "SvFormContentDict", "InterpFormContentDict", "FormContent",
47 "parse", "parse_qs", "parse_qsl", "parse_multipart",
48 "parse_header", "print_exception", "print_environ",
49 "print_form", "print_directory", "print_arguments",
50 "print_environ_usage", "escape"]
55 logfile
= "" # Filename to log to, if not empty
56 logfp
= None # File object to log to, if not None
58 def initlog(*allargs
):
59 """Write a log message, if there is a log file.
61 Even though this function is called initlog(), you should always
62 use log(); log is a variable that is set either to initlog
63 (initially), to dolog (once the log file has been opened), or to
64 nolog (when logging is disabled).
66 The first argument is a format string; the remaining arguments (if
67 any) are arguments to the % operator, so e.g.
68 log("%s: %s", "a", "b")
69 will write "a: b" to the log file, followed by a newline.
71 If the global logfp is not None, it should be a file object to
72 which log data is written.
74 If the global logfp is None, the global logfile may be a string
75 giving a filename to open, in append mode. This file should be
76 world writable!!! If the file can't be opened, logging is
77 silently disabled (since there is no safe place where we could
78 send an error message).
82 if logfile
and not logfp
:
84 logfp
= open(logfile
, "a")
93 def dolog(fmt
, *args
):
94 """Write a log message to the log file. See initlog() for docs."""
95 logfp
.write(fmt
%args
+ "\n")
98 """Dummy function, assigned to log when logging is disabled."""
101 log
= initlog
# The current logging function
107 # Maximum input we will accept when REQUEST_METHOD is POST
108 # 0 ==> unlimited input
111 def parse(fp
=None, environ
=os
.environ
, keep_blank_values
=0, strict_parsing
=0):
112 """Parse a query in the environment or from a file (default stdin)
114 Arguments, all optional:
116 fp : file pointer; default: sys.stdin
118 environ : environment dictionary; default: os.environ
120 keep_blank_values: flag indicating whether blank values in
121 URL encoded forms should be treated as blank strings.
122 A true value indicates that blanks should be retained as
123 blank strings. The default false value indicates that
124 blank values are to be ignored and treated as if they were
127 strict_parsing: flag indicating what to do with parsing errors.
128 If false (the default), errors are silently ignored.
129 If true, errors raise a ValueError exception.
133 if not environ
.has_key('REQUEST_METHOD'):
134 environ
['REQUEST_METHOD'] = 'GET' # For testing stand-alone
135 if environ
['REQUEST_METHOD'] == 'POST':
136 ctype
, pdict
= parse_header(environ
['CONTENT_TYPE'])
137 if ctype
== 'multipart/form-data':
138 return parse_multipart(fp
, pdict
)
139 elif ctype
== 'application/x-www-form-urlencoded':
140 clength
= int(environ
['CONTENT_LENGTH'])
141 if maxlen
and clength
> maxlen
:
142 raise ValueError, 'Maximum content length exceeded'
143 qs
= fp
.read(clength
)
145 qs
= '' # Unknown content-type
146 if environ
.has_key('QUERY_STRING'):
148 qs
= qs
+ environ
['QUERY_STRING']
151 qs
= qs
+ sys
.argv
[1]
152 environ
['QUERY_STRING'] = qs
# XXX Shouldn't, really
153 elif environ
.has_key('QUERY_STRING'):
154 qs
= environ
['QUERY_STRING']
160 environ
['QUERY_STRING'] = qs
# XXX Shouldn't, really
161 return parse_qs(qs
, keep_blank_values
, strict_parsing
)
164 def parse_qs(qs
, keep_blank_values
=0, strict_parsing
=0):
165 """Parse a query given as a string argument.
169 qs: URL-encoded query string to be parsed
171 keep_blank_values: flag indicating whether blank values in
172 URL encoded queries should be treated as blank strings.
173 A true value indicates that blanks should be retained as
174 blank strings. The default false value indicates that
175 blank values are to be ignored and treated as if they were
178 strict_parsing: flag indicating what to do with parsing errors.
179 If false (the default), errors are silently ignored.
180 If true, errors raise a ValueError exception.
183 for name
, value
in parse_qsl(qs
, keep_blank_values
, strict_parsing
):
184 if dict.has_key(name
):
185 dict[name
].append(value
)
190 def parse_qsl(qs
, keep_blank_values
=0, strict_parsing
=0):
191 """Parse a query given as a string argument.
195 qs: URL-encoded query string to be parsed
197 keep_blank_values: flag indicating whether blank values in
198 URL encoded queries should be treated as blank strings. A
199 true value indicates that blanks should be retained as blank
200 strings. The default false value indicates that blank values
201 are to be ignored and treated as if they were not included.
203 strict_parsing: flag indicating what to do with parsing errors. If
204 false (the default), errors are silently ignored. If true,
205 errors raise a ValueError exception.
207 Returns a list, as G-d intended.
209 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
211 for name_value
in pairs
:
212 nv
= name_value
.split('=', 1)
215 raise ValueError, "bad query field: %s" % `name_value`
217 if len(nv
[1]) or keep_blank_values
:
218 name
= urllib
.unquote(nv
[0].replace('+', ' '))
219 value
= urllib
.unquote(nv
[1].replace('+', ' '))
220 r
.append((name
, value
))
225 def parse_multipart(fp
, pdict
):
226 """Parse multipart input.
230 pdict: dictionary containing other parameters of conten-type header
232 Returns a dictionary just like parse_qs(): keys are the field names, each
233 value is a list of values for that field. This is easy to use but not
234 much good if you are expecting megabytes to be uploaded -- in that case,
235 use the FieldStorage class instead which is much more flexible. Note
236 that content-type is the raw, unparsed contents of the content-type
239 XXX This does not parse nested multipart parts -- use FieldStorage for
242 XXX This should really be subsumed by FieldStorage altogether -- no
243 point in having two implementations of the same parsing algorithm.
247 if pdict
.has_key('boundary'):
248 boundary
= pdict
['boundary']
249 if not valid_boundary(boundary
):
250 raise ValueError, ('Invalid boundary in multipart form: %s'
253 nextpart
= "--" + boundary
254 lastpart
= "--" + boundary
+ "--"
258 while terminator
!= lastpart
:
262 # At start of next part. Read headers first.
263 headers
= mimetools
.Message(fp
)
264 clength
= headers
.getheader('content-length')
271 if maxlen
and bytes
> maxlen
:
272 raise ValueError, 'Maximum content length exceeded'
273 data
= fp
.read(bytes
)
276 # Read lines until end of part.
281 terminator
= lastpart
# End outer loop
284 terminator
= line
.strip()
285 if terminator
in (nextpart
, lastpart
):
293 # Strip final line terminator
295 if line
[-2:] == "\r\n":
297 elif line
[-1:] == "\n":
300 data
= "".join(lines
)
301 line
= headers
['content-disposition']
304 key
, params
= parse_header(line
)
305 if key
!= 'form-data':
307 if params
.has_key('name'):
308 name
= params
['name']
311 if partdict
.has_key(name
):
312 partdict
[name
].append(data
)
314 partdict
[name
] = [data
]
319 def parse_header(line
):
320 """Parse a Content-type like header.
322 Return the main content-type and a dictionary of options.
325 plist
= map(lambda x
: x
.strip(), line
.split(';'))
326 key
= plist
[0].lower()
332 name
= p
[:i
].strip().lower()
333 value
= p
[i
+1:].strip()
334 if len(value
) >= 2 and value
[0] == value
[-1] == '"':
340 # Classes for field storage
341 # =========================
343 class MiniFieldStorage
:
345 """Like FieldStorage, for use when no file uploads are possible."""
354 disposition_options
= {}
357 def __init__(self
, name
, value
):
358 """Constructor from field name and value."""
361 # self.file = StringIO(value)
364 """Return printable representation."""
365 return "MiniFieldStorage(%s, %s)" % (`self
.name`
, `self
.value`
)
370 """Store a sequence of fields, reading multipart/form-data.
372 This class provides naming, typing, files stored on disk, and
373 more. At the top level, it is accessible like a dictionary, whose
374 keys are the field names. (Note: None can occur as a field name.)
375 The items are either a Python list (if there's multiple values) or
376 another FieldStorage or MiniFieldStorage object. If it's a single
377 object, it has the following attributes:
379 name: the field name, if specified; otherwise None
381 filename: the filename, if specified; otherwise None; this is the
382 client side filename, *not* the file name on which it is
383 stored (that's a temporary file you don't deal with)
385 value: the value as a *string*; for file uploads, this
386 transparently reads the file every time you request the value
388 file: the file(-like) object from which you can read the data;
389 None if the data is stored a simple string
391 type: the content-type, or None if not specified
393 type_options: dictionary of options specified on the content-type
396 disposition: content-disposition, or None if not specified
398 disposition_options: dictionary of corresponding options
400 headers: a dictionary(-like) object (sometimes rfc822.Message or a
401 subclass thereof) containing *all* headers
403 The class is subclassable, mostly for the purpose of overriding
404 the make_file() method, which is called internally to come up with
405 a file open for reading and writing. This makes it possible to
406 override the default choice of storing all files in a temporary
407 directory and unlinking them as soon as they have been opened.
411 def __init__(self
, fp
=None, headers
=None, outerboundary
="",
412 environ
=os
.environ
, keep_blank_values
=0, strict_parsing
=0):
413 """Constructor. Read multipart/* until last part.
415 Arguments, all optional:
417 fp : file pointer; default: sys.stdin
418 (not used when the request method is GET)
420 headers : header dictionary-like object; default:
421 taken from environ as per CGI spec
423 outerboundary : terminating multipart boundary
424 (for internal use only)
426 environ : environment dictionary; default: os.environ
428 keep_blank_values: flag indicating whether blank values in
429 URL encoded forms should be treated as blank strings.
430 A true value indicates that blanks should be retained as
431 blank strings. The default false value indicates that
432 blank values are to be ignored and treated as if they were
435 strict_parsing: flag indicating what to do with parsing errors.
436 If false (the default), errors are silently ignored.
437 If true, errors raise a ValueError exception.
441 self
.keep_blank_values
= keep_blank_values
442 self
.strict_parsing
= strict_parsing
443 if environ
.has_key('REQUEST_METHOD'):
444 method
= environ
['REQUEST_METHOD'].upper()
445 if method
== 'GET' or method
== 'HEAD':
446 if environ
.has_key('QUERY_STRING'):
447 qs
= environ
['QUERY_STRING']
454 headers
= {'content-type':
455 "application/x-www-form-urlencoded"}
459 # Set default content-type for POST to what's traditional
460 headers
['content-type'] = "application/x-www-form-urlencoded"
461 if environ
.has_key('CONTENT_TYPE'):
462 headers
['content-type'] = environ
['CONTENT_TYPE']
463 if environ
.has_key('CONTENT_LENGTH'):
464 headers
['content-length'] = environ
['CONTENT_LENGTH']
465 self
.fp
= fp
or sys
.stdin
466 self
.headers
= headers
467 self
.outerboundary
= outerboundary
469 # Process content-disposition header
470 cdisp
, pdict
= "", {}
471 if self
.headers
.has_key('content-disposition'):
472 cdisp
, pdict
= parse_header(self
.headers
['content-disposition'])
473 self
.disposition
= cdisp
474 self
.disposition_options
= pdict
476 if pdict
.has_key('name'):
477 self
.name
= pdict
['name']
479 if pdict
.has_key('filename'):
480 self
.filename
= pdict
['filename']
482 # Process content-type header
484 # Honor any existing content-type header. But if there is no
485 # content-type header, use some sensible defaults. Assume
486 # outerboundary is "" at the outer level, but something non-false
487 # inside a multi-part. The default for an inner part is text/plain,
488 # but for an outer part it should be urlencoded. This should catch
489 # bogus clients which erroneously forget to include a content-type
492 # See below for what we do if there does exist a content-type header,
493 # but it happens to be something we don't understand.
494 if self
.headers
.has_key('content-type'):
495 ctype
, pdict
= parse_header(self
.headers
['content-type'])
496 elif self
.outerboundary
or method
!= 'POST':
497 ctype
, pdict
= "text/plain", {}
499 ctype
, pdict
= 'application/x-www-form-urlencoded', {}
501 self
.type_options
= pdict
502 self
.innerboundary
= ""
503 if pdict
.has_key('boundary'):
504 self
.innerboundary
= pdict
['boundary']
506 if self
.headers
.has_key('content-length'):
508 clen
= int(self
.headers
['content-length'])
511 if maxlen
and clen
> maxlen
:
512 raise ValueError, 'Maximum content length exceeded'
515 self
.list = self
.file = None
517 if ctype
== 'application/x-www-form-urlencoded':
518 self
.read_urlencoded()
519 elif ctype
[:10] == 'multipart/':
520 self
.read_multi(environ
, keep_blank_values
, strict_parsing
)
525 """Return a printable representation."""
526 return "FieldStorage(%s, %s, %s)" % (
527 `self
.name`
, `self
.filename`
, `self
.value`
)
529 def __getattr__(self
, name
):
531 raise AttributeError, name
534 value
= self
.file.read()
536 elif self
.list is not None:
542 def __getitem__(self
, key
):
543 """Dictionary style indexing."""
544 if self
.list is None:
545 raise TypeError, "not indexable"
547 for item
in self
.list:
548 if item
.name
== key
: found
.append(item
)
556 def getvalue(self
, key
, default
=None):
557 """Dictionary style get() method, including 'value' lookup."""
558 if self
.has_key(key
):
560 if type(value
) is type([]):
561 return map(lambda v
: v
.value
, value
)
567 def getfirst(self
, key
, default
=None):
568 """ Return the first value received."""
569 if self
.has_key(key
):
571 if type(value
) is type([]):
572 return value
[0].value
578 def getlist(self
, key
):
579 """ Return list of received values."""
580 if self
.has_key(key
):
582 if type(value
) is type([]):
583 return map(lambda v
: v
.value
, value
)
590 """Dictionary style keys() method."""
591 if self
.list is None:
592 raise TypeError, "not indexable"
594 for item
in self
.list:
595 if item
.name
not in keys
: keys
.append(item
.name
)
598 def has_key(self
, key
):
599 """Dictionary style has_key() method."""
600 if self
.list is None:
601 raise TypeError, "not indexable"
602 for item
in self
.list:
603 if item
.name
== key
: return 1
607 """Dictionary style len(x) support."""
608 return len(self
.keys())
610 def read_urlencoded(self
):
611 """Internal: read data in query string format."""
612 qs
= self
.fp
.read(self
.length
)
613 self
.list = list = []
614 for key
, value
in parse_qsl(qs
, self
.keep_blank_values
,
615 self
.strict_parsing
):
616 list.append(MiniFieldStorage(key
, value
))
619 FieldStorageClass
= None
621 def read_multi(self
, environ
, keep_blank_values
, strict_parsing
):
622 """Internal: read a part that is itself multipart."""
623 ib
= self
.innerboundary
624 if not valid_boundary(ib
):
625 raise ValueError, ('Invalid boundary in multipart form: %s'
628 klass
= self
.FieldStorageClass
or self
.__class
__
629 part
= klass(self
.fp
, {}, ib
,
630 environ
, keep_blank_values
, strict_parsing
)
631 # Throw first part away
633 headers
= rfc822
.Message(self
.fp
)
634 part
= klass(self
.fp
, headers
, ib
,
635 environ
, keep_blank_values
, strict_parsing
)
636 self
.list.append(part
)
639 def read_single(self
):
640 """Internal: read an atomic part."""
648 bufsize
= 8*1024 # I/O buffering size for copy to file
650 def read_binary(self
):
651 """Internal: read binary data."""
652 self
.file = self
.make_file('b')
656 data
= self
.fp
.read(min(todo
, self
.bufsize
))
660 self
.file.write(data
)
661 todo
= todo
- len(data
)
663 def read_lines(self
):
664 """Internal: read lines until EOF or outerboundary."""
665 self
.file = self
.__file
= StringIO()
666 if self
.outerboundary
:
667 self
.read_lines_to_outerboundary()
669 self
.read_lines_to_eof()
671 def __write(self
, line
):
672 if self
.__file
is not None:
673 if self
.__file
.tell() + len(line
) > 1000:
674 self
.file = self
.make_file('')
675 self
.file.write(self
.__file
.getvalue())
677 self
.file.write(line
)
679 def read_lines_to_eof(self
):
680 """Internal: read lines until EOF."""
682 line
= self
.fp
.readline()
688 def read_lines_to_outerboundary(self
):
689 """Internal: read lines until outerboundary."""
690 next
= "--" + self
.outerboundary
694 line
= self
.fp
.readline()
699 strippedline
= line
.strip()
700 if strippedline
== next
:
702 if strippedline
== last
:
706 if line
[-2:] == "\r\n":
709 elif line
[-1] == "\n":
714 self
.__write
(odelim
+ line
)
716 def skip_lines(self
):
717 """Internal: skip lines until outer boundary if defined."""
718 if not self
.outerboundary
or self
.done
:
720 next
= "--" + self
.outerboundary
723 line
= self
.fp
.readline()
728 strippedline
= line
.strip()
729 if strippedline
== next
:
731 if strippedline
== last
:
735 def make_file(self
, binary
=None):
736 """Overridable: return a readable & writable file.
738 The file will be used as follows:
739 - data is written to it
741 - data is read from it
743 The 'binary' argument is unused -- the file is always opened
746 This version opens a temporary file for reading and writing,
747 and immediately deletes (unlinks) it. The trick (on Unix!) is
748 that the file can still be used, but it can't be opened by
749 another process, and it will automatically be deleted when it
750 is closed or when the current process terminates.
752 If you want a more permanent file, you derive a class which
753 overrides this method. If you want a visible temporary file
754 that is nevertheless automatically deleted when the script
755 terminates, try defining a __del__ method in a derived class
756 which unlinks the temporary files you have created.
760 return tempfile
.TemporaryFile("w+b")
764 # Backwards Compatibility Classes
765 # ===============================
767 class FormContentDict(UserDict
.UserDict
):
768 """Form content as dictionary with a list of values per field.
770 form = FormContentDict()
772 form[key] -> [value, value, ...]
773 form.has_key(key) -> Boolean
774 form.keys() -> [key, key, ...]
775 form.values() -> [[val, val, ...], [val, val, ...], ...]
776 form.items() -> [(key, [val, val, ...]), (key, [val, val, ...]), ...]
777 form.dict == {key: [val, val, ...], ...}
780 def __init__(self
, environ
=os
.environ
):
781 self
.dict = self
.data
= parse(environ
=environ
)
782 self
.query_string
= environ
['QUERY_STRING']
785 class SvFormContentDict(FormContentDict
):
786 """Form content as dictionary expecting a single value per field.
788 If you only expect a single value for each field, then form[key]
789 will return that single value. It will raise an IndexError if
790 that expectation is not true. If you expect a field to have
791 possible multiple values, than you can use form.getlist(key) to
792 get all of the values. values() and items() are a compromise:
793 they return single strings where there is a single value, and
794 lists of strings otherwise.
797 def __getitem__(self
, key
):
798 if len(self
.dict[key
]) > 1:
799 raise IndexError, 'expecting a single value'
800 return self
.dict[key
][0]
801 def getlist(self
, key
):
802 return self
.dict[key
]
805 for value
in self
.dict.values():
807 result
.append(value
[0])
808 else: result
.append(value
)
812 for key
, value
in self
.dict.items():
814 result
.append((key
, value
[0]))
815 else: result
.append((key
, value
))
819 class InterpFormContentDict(SvFormContentDict
):
820 """This class is present for backwards compatibility only."""
821 def __getitem__(self
, key
):
822 v
= SvFormContentDict
.__getitem
__(self
, key
)
823 if v
[0] in '0123456789+-.':
827 except ValueError: pass
831 for key
in self
.keys():
833 result
.append(self
[key
])
835 result
.append(self
.dict[key
])
839 for key
in self
.keys():
841 result
.append((key
, self
[key
]))
843 result
.append((key
, self
.dict[key
]))
847 class FormContent(FormContentDict
):
848 """This class is present for backwards compatibility only."""
849 def values(self
, key
):
850 if self
.dict.has_key(key
) :return self
.dict[key
]
852 def indexed_value(self
, key
, location
):
853 if self
.dict.has_key(key
):
854 if len(self
.dict[key
]) > location
:
855 return self
.dict[key
][location
]
858 def value(self
, key
):
859 if self
.dict.has_key(key
): return self
.dict[key
][0]
861 def length(self
, key
):
862 return len(self
.dict[key
])
863 def stripped(self
, key
):
864 if self
.dict.has_key(key
): return self
.dict[key
][0].strip()
873 def test(environ
=os
.environ
):
874 """Robust test CGI script, usable as main program.
876 Write minimal HTTP headers and dump all information provided to
877 the script in HTML form.
881 print "Content-type: text/html"
883 sys
.stderr
= sys
.stdout
885 form
= FieldStorage() # Replace with other classes to test those
889 print_environ(environ
)
890 print_environ_usage()
892 exec "testing print_exception() -- <I>italics?</I>"
895 print "<H3>What follows is a test, not an actual exception:</H3>"
900 print "<H1>Second try with a small maxlen...</H1>"
905 form
= FieldStorage() # Replace with other classes to test those
909 print_environ(environ
)
913 def print_exception(type=None, value
=None, tb
=None, limit
=None):
915 type, value
, tb
= sys
.exc_info()
918 print "<H3>Traceback (most recent call last):</H3>"
919 list = traceback
.format_tb(tb
, limit
) + \
920 traceback
.format_exception_only(type, value
)
921 print "<PRE>%s<B>%s</B></PRE>" % (
922 escape("".join(list[:-1])),
927 def print_environ(environ
=os
.environ
):
928 """Dump the shell environment as HTML."""
929 keys
= environ
.keys()
932 print "<H3>Shell Environment:</H3>"
935 print "<DT>", escape(key
), "<DD>", escape(environ
[key
])
939 def print_form(form
):
940 """Dump the contents of a form as HTML."""
944 print "<H3>Form Contents:</H3>"
946 print "<P>No form fields."
949 print "<DT>" + escape(key
) + ":",
951 print "<i>" + escape(`
type(value
)`
) + "</i>"
952 print "<DD>" + escape(`value`
)
956 def print_directory():
957 """Dump the current directory as HTML."""
959 print "<H3>Current Working Directory:</H3>"
962 except os
.error
, msg
:
963 print "os.error:", escape(str(msg
))
968 def print_arguments():
970 print "<H3>Command Line Arguments:</H3>"
975 def print_environ_usage():
976 """Dump a list of environment variables used by CGI as HTML."""
978 <H3>These environment variables could have been set:</H3>
988 <LI>GATEWAY_INTERFACE
1006 In addition, HTTP headers sent by the server may be passed in the
1007 environment as well. Here are some common variable names:
1022 def escape(s
, quote
=None):
1023 """Replace special characters '&', '<' and '>' by SGML entities."""
1024 s
= s
.replace("&", "&") # Must be done first!
1025 s
= s
.replace("<", "<")
1026 s
= s
.replace(">", ">")
1028 s
= s
.replace('"', """)
1031 def valid_boundary(s
, _vb_pattern
="^[ -~]{0,200}[!-~]$"):
1033 return re
.match(_vb_pattern
, s
)
1038 # Call test() when this file is run as a script (not imported as a module)
1039 if __name__
== '__main__':