1 #! /usr/local/bin/python
3 # NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is
4 # intentionally NOT "/usr/bin/env python". On many systems
5 # (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
6 # scripts, and /usr/local/bin is the default directory where Python is
7 # installed, so /usr/bin/env would be unable to find python. Granted,
8 # binary installations by Linux vendors often install Python in
9 # /usr/bin. So let those vendors patch cgi.py to match their choice
12 """Support module for CGI (Common Gateway Interface) scripts.
14 This module defines a number of utilities for use by CGI scripts
18 # XXX Perhaps there should be a slimmed version that doesn't contain
19 # all those backwards compatible and debugging classes and functions?
24 # Michael McLay started this module. Steve Majewski changed the
25 # interface to SvFormContentDict and FormContentDict. The multipart
26 # parsing was inspired by code submitted by Andreas Paepcke. Guido van
27 # Rossum rewrote, reformatted and documented the module and is currently
28 # responsible for its maintenance.
43 from StringIO
import StringIO
45 __all__
= ["MiniFieldStorage", "FieldStorage", "FormContentDict",
46 "SvFormContentDict", "InterpFormContentDict", "FormContent",
47 "parse", "parse_qs", "parse_qsl", "parse_multipart",
48 "parse_header", "print_exception", "print_environ",
49 "print_form", "print_directory", "print_arguments",
50 "print_environ_usage", "escape"]
55 logfile
= "" # Filename to log to, if not empty
56 logfp
= None # File object to log to, if not None
58 def initlog(*allargs
):
59 """Write a log message, if there is a log file.
61 Even though this function is called initlog(), you should always
62 use log(); log is a variable that is set either to initlog
63 (initially), to dolog (once the log file has been opened), or to
64 nolog (when logging is disabled).
66 The first argument is a format string; the remaining arguments (if
67 any) are arguments to the % operator, so e.g.
68 log("%s: %s", "a", "b")
69 will write "a: b" to the log file, followed by a newline.
71 If the global logfp is not None, it should be a file object to
72 which log data is written.
74 If the global logfp is None, the global logfile may be a string
75 giving a filename to open, in append mode. This file should be
76 world writable!!! If the file can't be opened, logging is
77 silently disabled (since there is no safe place where we could
78 send an error message).
82 if logfile
and not logfp
:
84 logfp
= open(logfile
, "a")
93 def dolog(fmt
, *args
):
94 """Write a log message to the log file. See initlog() for docs."""
95 logfp
.write(fmt
%args
+ "\n")
98 """Dummy function, assigned to log when logging is disabled."""
101 log
= initlog
# The current logging function
107 # Maximum input we will accept when REQUEST_METHOD is POST
108 # 0 ==> unlimited input
111 def parse(fp
=None, environ
=os
.environ
, keep_blank_values
=0, strict_parsing
=0):
112 """Parse a query in the environment or from a file (default stdin)
114 Arguments, all optional:
116 fp : file pointer; default: sys.stdin
118 environ : environment dictionary; default: os.environ
120 keep_blank_values: flag indicating whether blank values in
121 URL encoded forms should be treated as blank strings.
122 A true value indicates that blanks should be retained as
123 blank strings. The default false value indicates that
124 blank values are to be ignored and treated as if they were
127 strict_parsing: flag indicating what to do with parsing errors.
128 If false (the default), errors are silently ignored.
129 If true, errors raise a ValueError exception.
133 if not 'REQUEST_METHOD' in environ
:
134 environ
['REQUEST_METHOD'] = 'GET' # For testing stand-alone
135 if environ
['REQUEST_METHOD'] == 'POST':
136 ctype
, pdict
= parse_header(environ
['CONTENT_TYPE'])
137 if ctype
== 'multipart/form-data':
138 return parse_multipart(fp
, pdict
)
139 elif ctype
== 'application/x-www-form-urlencoded':
140 clength
= int(environ
['CONTENT_LENGTH'])
141 if maxlen
and clength
> maxlen
:
142 raise ValueError, 'Maximum content length exceeded'
143 qs
= fp
.read(clength
)
145 qs
= '' # Unknown content-type
146 if 'QUERY_STRING' in environ
:
148 qs
= qs
+ environ
['QUERY_STRING']
151 qs
= qs
+ sys
.argv
[1]
152 environ
['QUERY_STRING'] = qs
# XXX Shouldn't, really
153 elif 'QUERY_STRING' in environ
:
154 qs
= environ
['QUERY_STRING']
160 environ
['QUERY_STRING'] = qs
# XXX Shouldn't, really
161 return parse_qs(qs
, keep_blank_values
, strict_parsing
)
164 def parse_qs(qs
, keep_blank_values
=0, strict_parsing
=0):
165 """Parse a query given as a string argument.
169 qs: URL-encoded query string to be parsed
171 keep_blank_values: flag indicating whether blank values in
172 URL encoded queries should be treated as blank strings.
173 A true value indicates that blanks should be retained as
174 blank strings. The default false value indicates that
175 blank values are to be ignored and treated as if they were
178 strict_parsing: flag indicating what to do with parsing errors.
179 If false (the default), errors are silently ignored.
180 If true, errors raise a ValueError exception.
183 for name
, value
in parse_qsl(qs
, keep_blank_values
, strict_parsing
):
185 dict[name
].append(value
)
190 def parse_qsl(qs
, keep_blank_values
=0, strict_parsing
=0):
191 """Parse a query given as a string argument.
195 qs: URL-encoded query string to be parsed
197 keep_blank_values: flag indicating whether blank values in
198 URL encoded queries should be treated as blank strings. A
199 true value indicates that blanks should be retained as blank
200 strings. The default false value indicates that blank values
201 are to be ignored and treated as if they were not included.
203 strict_parsing: flag indicating what to do with parsing errors. If
204 false (the default), errors are silently ignored. If true,
205 errors raise a ValueError exception.
207 Returns a list, as G-d intended.
209 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
211 for name_value
in pairs
:
212 nv
= name_value
.split('=', 1)
215 raise ValueError, "bad query field: %s" % `name_value`
217 if len(nv
[1]) or keep_blank_values
:
218 name
= urllib
.unquote(nv
[0].replace('+', ' '))
219 value
= urllib
.unquote(nv
[1].replace('+', ' '))
220 r
.append((name
, value
))
225 def parse_multipart(fp
, pdict
):
226 """Parse multipart input.
230 pdict: dictionary containing other parameters of conten-type header
232 Returns a dictionary just like parse_qs(): keys are the field names, each
233 value is a list of values for that field. This is easy to use but not
234 much good if you are expecting megabytes to be uploaded -- in that case,
235 use the FieldStorage class instead which is much more flexible. Note
236 that content-type is the raw, unparsed contents of the content-type
239 XXX This does not parse nested multipart parts -- use FieldStorage for
242 XXX This should really be subsumed by FieldStorage altogether -- no
243 point in having two implementations of the same parsing algorithm.
247 if 'boundary' in pdict
:
248 boundary
= pdict
['boundary']
249 if not valid_boundary(boundary
):
250 raise ValueError, ('Invalid boundary in multipart form: %s'
253 nextpart
= "--" + boundary
254 lastpart
= "--" + boundary
+ "--"
258 while terminator
!= lastpart
:
262 # At start of next part. Read headers first.
263 headers
= mimetools
.Message(fp
)
264 clength
= headers
.getheader('content-length')
271 if maxlen
and bytes
> maxlen
:
272 raise ValueError, 'Maximum content length exceeded'
273 data
= fp
.read(bytes
)
276 # Read lines until end of part.
281 terminator
= lastpart
# End outer loop
284 terminator
= line
.strip()
285 if terminator
in (nextpart
, lastpart
):
293 # Strip final line terminator
295 if line
[-2:] == "\r\n":
297 elif line
[-1:] == "\n":
300 data
= "".join(lines
)
301 line
= headers
['content-disposition']
304 key
, params
= parse_header(line
)
305 if key
!= 'form-data':
308 name
= params
['name']
312 partdict
[name
].append(data
)
314 partdict
[name
] = [data
]
319 def parse_header(line
):
320 """Parse a Content-type like header.
322 Return the main content-type and a dictionary of options.
325 plist
= map(lambda x
: x
.strip(), line
.split(';'))
326 key
= plist
.pop(0).lower()
331 name
= p
[:i
].strip().lower()
332 value
= p
[i
+1:].strip()
333 if len(value
) >= 2 and value
[0] == value
[-1] == '"':
339 # Classes for field storage
340 # =========================
342 class MiniFieldStorage
:
344 """Like FieldStorage, for use when no file uploads are possible."""
353 disposition_options
= {}
356 def __init__(self
, name
, value
):
357 """Constructor from field name and value."""
360 # self.file = StringIO(value)
363 """Return printable representation."""
364 return "MiniFieldStorage(%s, %s)" % (`self
.name`
, `self
.value`
)
369 """Store a sequence of fields, reading multipart/form-data.
371 This class provides naming, typing, files stored on disk, and
372 more. At the top level, it is accessible like a dictionary, whose
373 keys are the field names. (Note: None can occur as a field name.)
374 The items are either a Python list (if there's multiple values) or
375 another FieldStorage or MiniFieldStorage object. If it's a single
376 object, it has the following attributes:
378 name: the field name, if specified; otherwise None
380 filename: the filename, if specified; otherwise None; this is the
381 client side filename, *not* the file name on which it is
382 stored (that's a temporary file you don't deal with)
384 value: the value as a *string*; for file uploads, this
385 transparently reads the file every time you request the value
387 file: the file(-like) object from which you can read the data;
388 None if the data is stored a simple string
390 type: the content-type, or None if not specified
392 type_options: dictionary of options specified on the content-type
395 disposition: content-disposition, or None if not specified
397 disposition_options: dictionary of corresponding options
399 headers: a dictionary(-like) object (sometimes rfc822.Message or a
400 subclass thereof) containing *all* headers
402 The class is subclassable, mostly for the purpose of overriding
403 the make_file() method, which is called internally to come up with
404 a file open for reading and writing. This makes it possible to
405 override the default choice of storing all files in a temporary
406 directory and unlinking them as soon as they have been opened.
410 def __init__(self
, fp
=None, headers
=None, outerboundary
="",
411 environ
=os
.environ
, keep_blank_values
=0, strict_parsing
=0):
412 """Constructor. Read multipart/* until last part.
414 Arguments, all optional:
416 fp : file pointer; default: sys.stdin
417 (not used when the request method is GET)
419 headers : header dictionary-like object; default:
420 taken from environ as per CGI spec
422 outerboundary : terminating multipart boundary
423 (for internal use only)
425 environ : environment dictionary; default: os.environ
427 keep_blank_values: flag indicating whether blank values in
428 URL encoded forms should be treated as blank strings.
429 A true value indicates that blanks should be retained as
430 blank strings. The default false value indicates that
431 blank values are to be ignored and treated as if they were
434 strict_parsing: flag indicating what to do with parsing errors.
435 If false (the default), errors are silently ignored.
436 If true, errors raise a ValueError exception.
440 self
.keep_blank_values
= keep_blank_values
441 self
.strict_parsing
= strict_parsing
442 if 'REQUEST_METHOD' in environ
:
443 method
= environ
['REQUEST_METHOD'].upper()
444 if method
== 'GET' or method
== 'HEAD':
445 if 'QUERY_STRING' in environ
:
446 qs
= environ
['QUERY_STRING']
453 headers
= {'content-type':
454 "application/x-www-form-urlencoded"}
458 # Set default content-type for POST to what's traditional
459 headers
['content-type'] = "application/x-www-form-urlencoded"
460 if 'CONTENT_TYPE' in environ
:
461 headers
['content-type'] = environ
['CONTENT_TYPE']
462 if 'CONTENT_LENGTH' in environ
:
463 headers
['content-length'] = environ
['CONTENT_LENGTH']
464 self
.fp
= fp
or sys
.stdin
465 self
.headers
= headers
466 self
.outerboundary
= outerboundary
468 # Process content-disposition header
469 cdisp
, pdict
= "", {}
470 if 'content-disposition' in self
.headers
:
471 cdisp
, pdict
= parse_header(self
.headers
['content-disposition'])
472 self
.disposition
= cdisp
473 self
.disposition_options
= pdict
476 self
.name
= pdict
['name']
478 if 'filename' in pdict
:
479 self
.filename
= pdict
['filename']
481 # Process content-type header
483 # Honor any existing content-type header. But if there is no
484 # content-type header, use some sensible defaults. Assume
485 # outerboundary is "" at the outer level, but something non-false
486 # inside a multi-part. The default for an inner part is text/plain,
487 # but for an outer part it should be urlencoded. This should catch
488 # bogus clients which erroneously forget to include a content-type
491 # See below for what we do if there does exist a content-type header,
492 # but it happens to be something we don't understand.
493 if 'content-type' in self
.headers
:
494 ctype
, pdict
= parse_header(self
.headers
['content-type'])
495 elif self
.outerboundary
or method
!= 'POST':
496 ctype
, pdict
= "text/plain", {}
498 ctype
, pdict
= 'application/x-www-form-urlencoded', {}
500 self
.type_options
= pdict
501 self
.innerboundary
= ""
502 if 'boundary' in pdict
:
503 self
.innerboundary
= pdict
['boundary']
505 if 'content-length' in self
.headers
:
507 clen
= int(self
.headers
['content-length'])
510 if maxlen
and clen
> maxlen
:
511 raise ValueError, 'Maximum content length exceeded'
514 self
.list = self
.file = None
516 if ctype
== 'application/x-www-form-urlencoded':
517 self
.read_urlencoded()
518 elif ctype
[:10] == 'multipart/':
519 self
.read_multi(environ
, keep_blank_values
, strict_parsing
)
524 """Return a printable representation."""
525 return "FieldStorage(%s, %s, %s)" % (
526 `self
.name`
, `self
.filename`
, `self
.value`
)
529 return iter(self
.keys())
531 def __getattr__(self
, name
):
533 raise AttributeError, name
536 value
= self
.file.read()
538 elif self
.list is not None:
544 def __getitem__(self
, key
):
545 """Dictionary style indexing."""
546 if self
.list is None:
547 raise TypeError, "not indexable"
549 for item
in self
.list:
550 if item
.name
== key
: found
.append(item
)
558 def getvalue(self
, key
, default
=None):
559 """Dictionary style get() method, including 'value' lookup."""
562 if type(value
) is type([]):
563 return map(lambda v
: v
.value
, value
)
569 def getfirst(self
, key
, default
=None):
570 """ Return the first value received."""
573 if type(value
) is type([]):
574 return value
[0].value
580 def getlist(self
, key
):
581 """ Return list of received values."""
584 if type(value
) is type([]):
585 return map(lambda v
: v
.value
, value
)
592 """Dictionary style keys() method."""
593 if self
.list is None:
594 raise TypeError, "not indexable"
596 for item
in self
.list:
597 if item
.name
not in keys
: keys
.append(item
.name
)
600 def has_key(self
, key
):
601 """Dictionary style has_key() method."""
602 if self
.list is None:
603 raise TypeError, "not indexable"
604 for item
in self
.list:
605 if item
.name
== key
: return True
608 def __contains__(self
, key
):
609 """Dictionary style __contains__ method."""
610 if self
.list is None:
611 raise TypeError, "not indexable"
612 for item
in self
.list:
613 if item
.name
== key
: return True
617 """Dictionary style len(x) support."""
618 return len(self
.keys())
620 def read_urlencoded(self
):
621 """Internal: read data in query string format."""
622 qs
= self
.fp
.read(self
.length
)
623 self
.list = list = []
624 for key
, value
in parse_qsl(qs
, self
.keep_blank_values
,
625 self
.strict_parsing
):
626 list.append(MiniFieldStorage(key
, value
))
629 FieldStorageClass
= None
631 def read_multi(self
, environ
, keep_blank_values
, strict_parsing
):
632 """Internal: read a part that is itself multipart."""
633 ib
= self
.innerboundary
634 if not valid_boundary(ib
):
635 raise ValueError, ('Invalid boundary in multipart form: %s'
638 klass
= self
.FieldStorageClass
or self
.__class
__
639 part
= klass(self
.fp
, {}, ib
,
640 environ
, keep_blank_values
, strict_parsing
)
641 # Throw first part away
643 headers
= rfc822
.Message(self
.fp
)
644 part
= klass(self
.fp
, headers
, ib
,
645 environ
, keep_blank_values
, strict_parsing
)
646 self
.list.append(part
)
649 def read_single(self
):
650 """Internal: read an atomic part."""
658 bufsize
= 8*1024 # I/O buffering size for copy to file
660 def read_binary(self
):
661 """Internal: read binary data."""
662 self
.file = self
.make_file('b')
666 data
= self
.fp
.read(min(todo
, self
.bufsize
))
670 self
.file.write(data
)
671 todo
= todo
- len(data
)
673 def read_lines(self
):
674 """Internal: read lines until EOF or outerboundary."""
675 self
.file = self
.__file
= StringIO()
676 if self
.outerboundary
:
677 self
.read_lines_to_outerboundary()
679 self
.read_lines_to_eof()
681 def __write(self
, line
):
682 if self
.__file
is not None:
683 if self
.__file
.tell() + len(line
) > 1000:
684 self
.file = self
.make_file('')
685 self
.file.write(self
.__file
.getvalue())
687 self
.file.write(line
)
689 def read_lines_to_eof(self
):
690 """Internal: read lines until EOF."""
692 line
= self
.fp
.readline()
698 def read_lines_to_outerboundary(self
):
699 """Internal: read lines until outerboundary."""
700 next
= "--" + self
.outerboundary
704 line
= self
.fp
.readline()
709 strippedline
= line
.strip()
710 if strippedline
== next
:
712 if strippedline
== last
:
716 if line
[-2:] == "\r\n":
719 elif line
[-1] == "\n":
724 self
.__write
(odelim
+ line
)
726 def skip_lines(self
):
727 """Internal: skip lines until outer boundary if defined."""
728 if not self
.outerboundary
or self
.done
:
730 next
= "--" + self
.outerboundary
733 line
= self
.fp
.readline()
738 strippedline
= line
.strip()
739 if strippedline
== next
:
741 if strippedline
== last
:
745 def make_file(self
, binary
=None):
746 """Overridable: return a readable & writable file.
748 The file will be used as follows:
749 - data is written to it
751 - data is read from it
753 The 'binary' argument is unused -- the file is always opened
756 This version opens a temporary file for reading and writing,
757 and immediately deletes (unlinks) it. The trick (on Unix!) is
758 that the file can still be used, but it can't be opened by
759 another process, and it will automatically be deleted when it
760 is closed or when the current process terminates.
762 If you want a more permanent file, you derive a class which
763 overrides this method. If you want a visible temporary file
764 that is nevertheless automatically deleted when the script
765 terminates, try defining a __del__ method in a derived class
766 which unlinks the temporary files you have created.
770 return tempfile
.TemporaryFile("w+b")
774 # Backwards Compatibility Classes
775 # ===============================
777 class FormContentDict(UserDict
.UserDict
):
778 """Form content as dictionary with a list of values per field.
780 form = FormContentDict()
782 form[key] -> [value, value, ...]
783 key in form -> Boolean
784 form.keys() -> [key, key, ...]
785 form.values() -> [[val, val, ...], [val, val, ...], ...]
786 form.items() -> [(key, [val, val, ...]), (key, [val, val, ...]), ...]
787 form.dict == {key: [val, val, ...], ...}
790 def __init__(self
, environ
=os
.environ
):
791 self
.dict = self
.data
= parse(environ
=environ
)
792 self
.query_string
= environ
['QUERY_STRING']
795 class SvFormContentDict(FormContentDict
):
796 """Form content as dictionary expecting a single value per field.
798 If you only expect a single value for each field, then form[key]
799 will return that single value. It will raise an IndexError if
800 that expectation is not true. If you expect a field to have
801 possible multiple values, than you can use form.getlist(key) to
802 get all of the values. values() and items() are a compromise:
803 they return single strings where there is a single value, and
804 lists of strings otherwise.
807 def __getitem__(self
, key
):
808 if len(self
.dict[key
]) > 1:
809 raise IndexError, 'expecting a single value'
810 return self
.dict[key
][0]
811 def getlist(self
, key
):
812 return self
.dict[key
]
815 for value
in self
.dict.values():
817 result
.append(value
[0])
818 else: result
.append(value
)
822 for key
, value
in self
.dict.items():
824 result
.append((key
, value
[0]))
825 else: result
.append((key
, value
))
829 class InterpFormContentDict(SvFormContentDict
):
830 """This class is present for backwards compatibility only."""
831 def __getitem__(self
, key
):
832 v
= SvFormContentDict
.__getitem
__(self
, key
)
833 if v
[0] in '0123456789+-.':
837 except ValueError: pass
841 for key
in self
.keys():
843 result
.append(self
[key
])
845 result
.append(self
.dict[key
])
849 for key
in self
.keys():
851 result
.append((key
, self
[key
]))
853 result
.append((key
, self
.dict[key
]))
857 class FormContent(FormContentDict
):
858 """This class is present for backwards compatibility only."""
859 def values(self
, key
):
860 if key
in self
.dict :return self
.dict[key
]
862 def indexed_value(self
, key
, location
):
864 if len(self
.dict[key
]) > location
:
865 return self
.dict[key
][location
]
868 def value(self
, key
):
869 if key
in self
.dict: return self
.dict[key
][0]
871 def length(self
, key
):
872 return len(self
.dict[key
])
873 def stripped(self
, key
):
874 if key
in self
.dict: return self
.dict[key
][0].strip()
883 def test(environ
=os
.environ
):
884 """Robust test CGI script, usable as main program.
886 Write minimal HTTP headers and dump all information provided to
887 the script in HTML form.
890 print "Content-type: text/html"
892 sys
.stderr
= sys
.stdout
894 form
= FieldStorage() # Replace with other classes to test those
898 print_environ(environ
)
899 print_environ_usage()
901 exec "testing print_exception() -- <I>italics?</I>"
904 print "<H3>What follows is a test, not an actual exception:</H3>"
909 print "<H1>Second try with a small maxlen...</H1>"
914 form
= FieldStorage() # Replace with other classes to test those
918 print_environ(environ
)
922 def print_exception(type=None, value
=None, tb
=None, limit
=None):
924 type, value
, tb
= sys
.exc_info()
927 print "<H3>Traceback (most recent call last):</H3>"
928 list = traceback
.format_tb(tb
, limit
) + \
929 traceback
.format_exception_only(type, value
)
930 print "<PRE>%s<B>%s</B></PRE>" % (
931 escape("".join(list[:-1])),
936 def print_environ(environ
=os
.environ
):
937 """Dump the shell environment as HTML."""
938 keys
= environ
.keys()
941 print "<H3>Shell Environment:</H3>"
944 print "<DT>", escape(key
), "<DD>", escape(environ
[key
])
948 def print_form(form
):
949 """Dump the contents of a form as HTML."""
953 print "<H3>Form Contents:</H3>"
955 print "<P>No form fields."
958 print "<DT>" + escape(key
) + ":",
960 print "<i>" + escape(`
type(value
)`
) + "</i>"
961 print "<DD>" + escape(`value`
)
965 def print_directory():
966 """Dump the current directory as HTML."""
968 print "<H3>Current Working Directory:</H3>"
971 except os
.error
, msg
:
972 print "os.error:", escape(str(msg
))
977 def print_arguments():
979 print "<H3>Command Line Arguments:</H3>"
984 def print_environ_usage():
985 """Dump a list of environment variables used by CGI as HTML."""
987 <H3>These environment variables could have been set:</H3>
997 <LI>GATEWAY_INTERFACE
1015 In addition, HTTP headers sent by the server may be passed in the
1016 environment as well. Here are some common variable names:
1031 def escape(s
, quote
=None):
1032 """Replace special characters '&', '<' and '>' by SGML entities."""
1033 s
= s
.replace("&", "&") # Must be done first!
1034 s
= s
.replace("<", "<")
1035 s
= s
.replace(">", ">")
1037 s
= s
.replace('"', """)
1040 def valid_boundary(s
, _vb_pattern
="^[ -~]{0,200}[!-~]$"):
1042 return re
.match(_vb_pattern
, s
)
1047 # Call test() when this file is run as a script (not imported as a module)
1048 if __name__
== '__main__':