1 #! /usr/local/bin/python
3 # NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is
4 # intentionally NOT "/usr/bin/env python". On many systems
5 # (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
6 # scripts, and /usr/local/bin is the default directory where Python is
7 # installed, so /usr/bin/env would be unable to find python. Granted,
8 # binary installations by Linux vendors often install Python in
9 # /usr/bin. So let those vendors patch cgi.py to match their choice
12 """Support module for CGI (Common Gateway Interface) scripts.
14 This module defines a number of utilities for use by CGI scripts
18 # XXX Perhaps there should be a slimmed version that doesn't contain
19 # all those backwards compatible and debugging classes and functions?
24 # Michael McLay started this module. Steve Majewski changed the
25 # interface to SvFormContentDict and FormContentDict. The multipart
26 # parsing was inspired by code submitted by Andreas Paepcke. Guido van
27 # Rossum rewrote, reformatted and documented the module and is currently
28 # responsible for its maintenance.
43 from StringIO
import StringIO
45 __all__
= ["MiniFieldStorage", "FieldStorage", "FormContentDict",
46 "SvFormContentDict", "InterpFormContentDict", "FormContent",
47 "parse", "parse_qs", "parse_qsl", "parse_multipart",
48 "parse_header", "print_exception", "print_environ",
49 "print_form", "print_directory", "print_arguments",
50 "print_environ_usage", "escape"]
55 logfile
= "" # Filename to log to, if not empty
56 logfp
= None # File object to log to, if not None
58 def initlog(*allargs
):
59 """Write a log message, if there is a log file.
61 Even though this function is called initlog(), you should always
62 use log(); log is a variable that is set either to initlog
63 (initially), to dolog (once the log file has been opened), or to
64 nolog (when logging is disabled).
66 The first argument is a format string; the remaining arguments (if
67 any) are arguments to the % operator, so e.g.
68 log("%s: %s", "a", "b")
69 will write "a: b" to the log file, followed by a newline.
71 If the global logfp is not None, it should be a file object to
72 which log data is written.
74 If the global logfp is None, the global logfile may be a string
75 giving a filename to open, in append mode. This file should be
76 world writable!!! If the file can't be opened, logging is
77 silently disabled (since there is no safe place where we could
78 send an error message).
82 if logfile
and not logfp
:
84 logfp
= open(logfile
, "a")
93 def dolog(fmt
, *args
):
94 """Write a log message to the log file. See initlog() for docs."""
95 logfp
.write(fmt
%args
+ "\n")
98 """Dummy function, assigned to log when logging is disabled."""
101 log
= initlog
# The current logging function
107 # Maximum input we will accept when REQUEST_METHOD is POST
108 # 0 ==> unlimited input
111 def parse(fp
=None, environ
=os
.environ
, keep_blank_values
=0, strict_parsing
=0):
112 """Parse a query in the environment or from a file (default stdin)
114 Arguments, all optional:
116 fp : file pointer; default: sys.stdin
118 environ : environment dictionary; default: os.environ
120 keep_blank_values: flag indicating whether blank values in
121 URL encoded forms should be treated as blank strings.
122 A true value indicates that blanks should be retained as
123 blank strings. The default false value indicates that
124 blank values are to be ignored and treated as if they were
127 strict_parsing: flag indicating what to do with parsing errors.
128 If false (the default), errors are silently ignored.
129 If true, errors raise a ValueError exception.
133 if not 'REQUEST_METHOD' in environ
:
134 environ
['REQUEST_METHOD'] = 'GET' # For testing stand-alone
135 if environ
['REQUEST_METHOD'] == 'POST':
136 ctype
, pdict
= parse_header(environ
['CONTENT_TYPE'])
137 if ctype
== 'multipart/form-data':
138 return parse_multipart(fp
, pdict
)
139 elif ctype
== 'application/x-www-form-urlencoded':
140 clength
= int(environ
['CONTENT_LENGTH'])
141 if maxlen
and clength
> maxlen
:
142 raise ValueError, 'Maximum content length exceeded'
143 qs
= fp
.read(clength
)
145 qs
= '' # Unknown content-type
146 if 'QUERY_STRING' in environ
:
148 qs
= qs
+ environ
['QUERY_STRING']
151 qs
= qs
+ sys
.argv
[1]
152 environ
['QUERY_STRING'] = qs
# XXX Shouldn't, really
153 elif 'QUERY_STRING' in environ
:
154 qs
= environ
['QUERY_STRING']
160 environ
['QUERY_STRING'] = qs
# XXX Shouldn't, really
161 return parse_qs(qs
, keep_blank_values
, strict_parsing
)
164 def parse_qs(qs
, keep_blank_values
=0, strict_parsing
=0):
165 """Parse a query given as a string argument.
169 qs: URL-encoded query string to be parsed
171 keep_blank_values: flag indicating whether blank values in
172 URL encoded queries should be treated as blank strings.
173 A true value indicates that blanks should be retained as
174 blank strings. The default false value indicates that
175 blank values are to be ignored and treated as if they were
178 strict_parsing: flag indicating what to do with parsing errors.
179 If false (the default), errors are silently ignored.
180 If true, errors raise a ValueError exception.
183 for name
, value
in parse_qsl(qs
, keep_blank_values
, strict_parsing
):
185 dict[name
].append(value
)
190 def parse_qsl(qs
, keep_blank_values
=0, strict_parsing
=0):
191 """Parse a query given as a string argument.
195 qs: URL-encoded query string to be parsed
197 keep_blank_values: flag indicating whether blank values in
198 URL encoded queries should be treated as blank strings. A
199 true value indicates that blanks should be retained as blank
200 strings. The default false value indicates that blank values
201 are to be ignored and treated as if they were not included.
203 strict_parsing: flag indicating what to do with parsing errors. If
204 false (the default), errors are silently ignored. If true,
205 errors raise a ValueError exception.
207 Returns a list, as G-d intended.
209 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
211 for name_value
in pairs
:
212 nv
= name_value
.split('=', 1)
215 raise ValueError, "bad query field: %r" % (name_value
,)
216 # Handle case of a control-name with no equal sign
217 if keep_blank_values
:
221 if len(nv
[1]) or keep_blank_values
:
222 name
= urllib
.unquote(nv
[0].replace('+', ' '))
223 value
= urllib
.unquote(nv
[1].replace('+', ' '))
224 r
.append((name
, value
))
229 def parse_multipart(fp
, pdict
):
230 """Parse multipart input.
234 pdict: dictionary containing other parameters of conten-type header
236 Returns a dictionary just like parse_qs(): keys are the field names, each
237 value is a list of values for that field. This is easy to use but not
238 much good if you are expecting megabytes to be uploaded -- in that case,
239 use the FieldStorage class instead which is much more flexible. Note
240 that content-type is the raw, unparsed contents of the content-type
243 XXX This does not parse nested multipart parts -- use FieldStorage for
246 XXX This should really be subsumed by FieldStorage altogether -- no
247 point in having two implementations of the same parsing algorithm.
251 if 'boundary' in pdict
:
252 boundary
= pdict
['boundary']
253 if not valid_boundary(boundary
):
254 raise ValueError, ('Invalid boundary in multipart form: %r'
257 nextpart
= "--" + boundary
258 lastpart
= "--" + boundary
+ "--"
262 while terminator
!= lastpart
:
266 # At start of next part. Read headers first.
267 headers
= mimetools
.Message(fp
)
268 clength
= headers
.getheader('content-length')
275 if maxlen
and bytes
> maxlen
:
276 raise ValueError, 'Maximum content length exceeded'
277 data
= fp
.read(bytes
)
280 # Read lines until end of part.
285 terminator
= lastpart
# End outer loop
288 terminator
= line
.strip()
289 if terminator
in (nextpart
, lastpart
):
297 # Strip final line terminator
299 if line
[-2:] == "\r\n":
301 elif line
[-1:] == "\n":
304 data
= "".join(lines
)
305 line
= headers
['content-disposition']
308 key
, params
= parse_header(line
)
309 if key
!= 'form-data':
312 name
= params
['name']
316 partdict
[name
].append(data
)
318 partdict
[name
] = [data
]
323 def parse_header(line
):
324 """Parse a Content-type like header.
326 Return the main content-type and a dictionary of options.
329 plist
= map(lambda x
: x
.strip(), line
.split(';'))
330 key
= plist
.pop(0).lower()
335 name
= p
[:i
].strip().lower()
336 value
= p
[i
+1:].strip()
337 if len(value
) >= 2 and value
[0] == value
[-1] == '"':
343 # Classes for field storage
344 # =========================
346 class MiniFieldStorage
:
348 """Like FieldStorage, for use when no file uploads are possible."""
357 disposition_options
= {}
360 def __init__(self
, name
, value
):
361 """Constructor from field name and value."""
364 # self.file = StringIO(value)
367 """Return printable representation."""
368 return "MiniFieldStorage(%r, %r)" % (self
.name
, self
.value
)
373 """Store a sequence of fields, reading multipart/form-data.
375 This class provides naming, typing, files stored on disk, and
376 more. At the top level, it is accessible like a dictionary, whose
377 keys are the field names. (Note: None can occur as a field name.)
378 The items are either a Python list (if there's multiple values) or
379 another FieldStorage or MiniFieldStorage object. If it's a single
380 object, it has the following attributes:
382 name: the field name, if specified; otherwise None
384 filename: the filename, if specified; otherwise None; this is the
385 client side filename, *not* the file name on which it is
386 stored (that's a temporary file you don't deal with)
388 value: the value as a *string*; for file uploads, this
389 transparently reads the file every time you request the value
391 file: the file(-like) object from which you can read the data;
392 None if the data is stored a simple string
394 type: the content-type, or None if not specified
396 type_options: dictionary of options specified on the content-type
399 disposition: content-disposition, or None if not specified
401 disposition_options: dictionary of corresponding options
403 headers: a dictionary(-like) object (sometimes rfc822.Message or a
404 subclass thereof) containing *all* headers
406 The class is subclassable, mostly for the purpose of overriding
407 the make_file() method, which is called internally to come up with
408 a file open for reading and writing. This makes it possible to
409 override the default choice of storing all files in a temporary
410 directory and unlinking them as soon as they have been opened.
414 def __init__(self
, fp
=None, headers
=None, outerboundary
="",
415 environ
=os
.environ
, keep_blank_values
=0, strict_parsing
=0):
416 """Constructor. Read multipart/* until last part.
418 Arguments, all optional:
420 fp : file pointer; default: sys.stdin
421 (not used when the request method is GET)
423 headers : header dictionary-like object; default:
424 taken from environ as per CGI spec
426 outerboundary : terminating multipart boundary
427 (for internal use only)
429 environ : environment dictionary; default: os.environ
431 keep_blank_values: flag indicating whether blank values in
432 URL encoded forms should be treated as blank strings.
433 A true value indicates that blanks should be retained as
434 blank strings. The default false value indicates that
435 blank values are to be ignored and treated as if they were
438 strict_parsing: flag indicating what to do with parsing errors.
439 If false (the default), errors are silently ignored.
440 If true, errors raise a ValueError exception.
444 self
.keep_blank_values
= keep_blank_values
445 self
.strict_parsing
= strict_parsing
446 if 'REQUEST_METHOD' in environ
:
447 method
= environ
['REQUEST_METHOD'].upper()
448 if method
== 'GET' or method
== 'HEAD':
449 if 'QUERY_STRING' in environ
:
450 qs
= environ
['QUERY_STRING']
457 headers
= {'content-type':
458 "application/x-www-form-urlencoded"}
462 # Set default content-type for POST to what's traditional
463 headers
['content-type'] = "application/x-www-form-urlencoded"
464 if 'CONTENT_TYPE' in environ
:
465 headers
['content-type'] = environ
['CONTENT_TYPE']
466 if 'CONTENT_LENGTH' in environ
:
467 headers
['content-length'] = environ
['CONTENT_LENGTH']
468 self
.fp
= fp
or sys
.stdin
469 self
.headers
= headers
470 self
.outerboundary
= outerboundary
472 # Process content-disposition header
473 cdisp
, pdict
= "", {}
474 if 'content-disposition' in self
.headers
:
475 cdisp
, pdict
= parse_header(self
.headers
['content-disposition'])
476 self
.disposition
= cdisp
477 self
.disposition_options
= pdict
480 self
.name
= pdict
['name']
482 if 'filename' in pdict
:
483 self
.filename
= pdict
['filename']
485 # Process content-type header
487 # Honor any existing content-type header. But if there is no
488 # content-type header, use some sensible defaults. Assume
489 # outerboundary is "" at the outer level, but something non-false
490 # inside a multi-part. The default for an inner part is text/plain,
491 # but for an outer part it should be urlencoded. This should catch
492 # bogus clients which erroneously forget to include a content-type
495 # See below for what we do if there does exist a content-type header,
496 # but it happens to be something we don't understand.
497 if 'content-type' in self
.headers
:
498 ctype
, pdict
= parse_header(self
.headers
['content-type'])
499 elif self
.outerboundary
or method
!= 'POST':
500 ctype
, pdict
= "text/plain", {}
502 ctype
, pdict
= 'application/x-www-form-urlencoded', {}
504 self
.type_options
= pdict
505 self
.innerboundary
= ""
506 if 'boundary' in pdict
:
507 self
.innerboundary
= pdict
['boundary']
509 if 'content-length' in self
.headers
:
511 clen
= int(self
.headers
['content-length'])
514 if maxlen
and clen
> maxlen
:
515 raise ValueError, 'Maximum content length exceeded'
518 self
.list = self
.file = None
520 if ctype
== 'application/x-www-form-urlencoded':
521 self
.read_urlencoded()
522 elif ctype
[:10] == 'multipart/':
523 self
.read_multi(environ
, keep_blank_values
, strict_parsing
)
528 """Return a printable representation."""
529 return "FieldStorage(%r, %r, %r)" % (
530 self
.name
, self
.filename
, self
.value
)
533 return iter(self
.keys())
535 def __getattr__(self
, name
):
537 raise AttributeError, name
540 value
= self
.file.read()
542 elif self
.list is not None:
548 def __getitem__(self
, key
):
549 """Dictionary style indexing."""
550 if self
.list is None:
551 raise TypeError, "not indexable"
553 for item
in self
.list:
554 if item
.name
== key
: found
.append(item
)
562 def getvalue(self
, key
, default
=None):
563 """Dictionary style get() method, including 'value' lookup."""
566 if type(value
) is type([]):
567 return map(lambda v
: v
.value
, value
)
573 def getfirst(self
, key
, default
=None):
574 """ Return the first value received."""
577 if type(value
) is type([]):
578 return value
[0].value
584 def getlist(self
, key
):
585 """ Return list of received values."""
588 if type(value
) is type([]):
589 return map(lambda v
: v
.value
, value
)
596 """Dictionary style keys() method."""
597 if self
.list is None:
598 raise TypeError, "not indexable"
600 for item
in self
.list:
601 if item
.name
not in keys
: keys
.append(item
.name
)
604 def has_key(self
, key
):
605 """Dictionary style has_key() method."""
606 if self
.list is None:
607 raise TypeError, "not indexable"
608 for item
in self
.list:
609 if item
.name
== key
: return True
612 def __contains__(self
, key
):
613 """Dictionary style __contains__ method."""
614 if self
.list is None:
615 raise TypeError, "not indexable"
616 for item
in self
.list:
617 if item
.name
== key
: return True
621 """Dictionary style len(x) support."""
622 return len(self
.keys())
624 def read_urlencoded(self
):
625 """Internal: read data in query string format."""
626 qs
= self
.fp
.read(self
.length
)
627 self
.list = list = []
628 for key
, value
in parse_qsl(qs
, self
.keep_blank_values
,
629 self
.strict_parsing
):
630 list.append(MiniFieldStorage(key
, value
))
633 FieldStorageClass
= None
635 def read_multi(self
, environ
, keep_blank_values
, strict_parsing
):
636 """Internal: read a part that is itself multipart."""
637 ib
= self
.innerboundary
638 if not valid_boundary(ib
):
639 raise ValueError, 'Invalid boundary in multipart form: %r' % (ib
,)
641 klass
= self
.FieldStorageClass
or self
.__class
__
642 part
= klass(self
.fp
, {}, ib
,
643 environ
, keep_blank_values
, strict_parsing
)
644 # Throw first part away
646 headers
= rfc822
.Message(self
.fp
)
647 part
= klass(self
.fp
, headers
, ib
,
648 environ
, keep_blank_values
, strict_parsing
)
649 self
.list.append(part
)
652 def read_single(self
):
653 """Internal: read an atomic part."""
661 bufsize
= 8*1024 # I/O buffering size for copy to file
663 def read_binary(self
):
664 """Internal: read binary data."""
665 self
.file = self
.make_file('b')
669 data
= self
.fp
.read(min(todo
, self
.bufsize
))
673 self
.file.write(data
)
674 todo
= todo
- len(data
)
676 def read_lines(self
):
677 """Internal: read lines until EOF or outerboundary."""
678 self
.file = self
.__file
= StringIO()
679 if self
.outerboundary
:
680 self
.read_lines_to_outerboundary()
682 self
.read_lines_to_eof()
684 def __write(self
, line
):
685 if self
.__file
is not None:
686 if self
.__file
.tell() + len(line
) > 1000:
687 self
.file = self
.make_file('')
688 self
.file.write(self
.__file
.getvalue())
690 self
.file.write(line
)
692 def read_lines_to_eof(self
):
693 """Internal: read lines until EOF."""
695 line
= self
.fp
.readline()
701 def read_lines_to_outerboundary(self
):
702 """Internal: read lines until outerboundary."""
703 next
= "--" + self
.outerboundary
707 line
= self
.fp
.readline()
712 strippedline
= line
.strip()
713 if strippedline
== next
:
715 if strippedline
== last
:
719 if line
[-2:] == "\r\n":
722 elif line
[-1] == "\n":
727 self
.__write
(odelim
+ line
)
729 def skip_lines(self
):
730 """Internal: skip lines until outer boundary if defined."""
731 if not self
.outerboundary
or self
.done
:
733 next
= "--" + self
.outerboundary
736 line
= self
.fp
.readline()
741 strippedline
= line
.strip()
742 if strippedline
== next
:
744 if strippedline
== last
:
748 def make_file(self
, binary
=None):
749 """Overridable: return a readable & writable file.
751 The file will be used as follows:
752 - data is written to it
754 - data is read from it
756 The 'binary' argument is unused -- the file is always opened
759 This version opens a temporary file for reading and writing,
760 and immediately deletes (unlinks) it. The trick (on Unix!) is
761 that the file can still be used, but it can't be opened by
762 another process, and it will automatically be deleted when it
763 is closed or when the current process terminates.
765 If you want a more permanent file, you derive a class which
766 overrides this method. If you want a visible temporary file
767 that is nevertheless automatically deleted when the script
768 terminates, try defining a __del__ method in a derived class
769 which unlinks the temporary files you have created.
773 return tempfile
.TemporaryFile("w+b")
777 # Backwards Compatibility Classes
778 # ===============================
780 class FormContentDict(UserDict
.UserDict
):
781 """Form content as dictionary with a list of values per field.
783 form = FormContentDict()
785 form[key] -> [value, value, ...]
786 key in form -> Boolean
787 form.keys() -> [key, key, ...]
788 form.values() -> [[val, val, ...], [val, val, ...], ...]
789 form.items() -> [(key, [val, val, ...]), (key, [val, val, ...]), ...]
790 form.dict == {key: [val, val, ...], ...}
793 def __init__(self
, environ
=os
.environ
):
794 self
.dict = self
.data
= parse(environ
=environ
)
795 self
.query_string
= environ
['QUERY_STRING']
798 class SvFormContentDict(FormContentDict
):
799 """Form content as dictionary expecting a single value per field.
801 If you only expect a single value for each field, then form[key]
802 will return that single value. It will raise an IndexError if
803 that expectation is not true. If you expect a field to have
804 possible multiple values, than you can use form.getlist(key) to
805 get all of the values. values() and items() are a compromise:
806 they return single strings where there is a single value, and
807 lists of strings otherwise.
810 def __getitem__(self
, key
):
811 if len(self
.dict[key
]) > 1:
812 raise IndexError, 'expecting a single value'
813 return self
.dict[key
][0]
814 def getlist(self
, key
):
815 return self
.dict[key
]
818 for value
in self
.dict.values():
820 result
.append(value
[0])
821 else: result
.append(value
)
825 for key
, value
in self
.dict.items():
827 result
.append((key
, value
[0]))
828 else: result
.append((key
, value
))
832 class InterpFormContentDict(SvFormContentDict
):
833 """This class is present for backwards compatibility only."""
834 def __getitem__(self
, key
):
835 v
= SvFormContentDict
.__getitem
__(self
, key
)
836 if v
[0] in '0123456789+-.':
840 except ValueError: pass
844 for key
in self
.keys():
846 result
.append(self
[key
])
848 result
.append(self
.dict[key
])
852 for key
in self
.keys():
854 result
.append((key
, self
[key
]))
856 result
.append((key
, self
.dict[key
]))
860 class FormContent(FormContentDict
):
861 """This class is present for backwards compatibility only."""
862 def values(self
, key
):
863 if key
in self
.dict :return self
.dict[key
]
865 def indexed_value(self
, key
, location
):
867 if len(self
.dict[key
]) > location
:
868 return self
.dict[key
][location
]
871 def value(self
, key
):
872 if key
in self
.dict: return self
.dict[key
][0]
874 def length(self
, key
):
875 return len(self
.dict[key
])
876 def stripped(self
, key
):
877 if key
in self
.dict: return self
.dict[key
][0].strip()
886 def test(environ
=os
.environ
):
887 """Robust test CGI script, usable as main program.
889 Write minimal HTTP headers and dump all information provided to
890 the script in HTML form.
893 print "Content-type: text/html"
895 sys
.stderr
= sys
.stdout
897 form
= FieldStorage() # Replace with other classes to test those
901 print_environ(environ
)
902 print_environ_usage()
904 exec "testing print_exception() -- <I>italics?</I>"
907 print "<H3>What follows is a test, not an actual exception:</H3>"
912 print "<H1>Second try with a small maxlen...</H1>"
917 form
= FieldStorage() # Replace with other classes to test those
921 print_environ(environ
)
925 def print_exception(type=None, value
=None, tb
=None, limit
=None):
927 type, value
, tb
= sys
.exc_info()
930 print "<H3>Traceback (most recent call last):</H3>"
931 list = traceback
.format_tb(tb
, limit
) + \
932 traceback
.format_exception_only(type, value
)
933 print "<PRE>%s<B>%s</B></PRE>" % (
934 escape("".join(list[:-1])),
939 def print_environ(environ
=os
.environ
):
940 """Dump the shell environment as HTML."""
941 keys
= environ
.keys()
944 print "<H3>Shell Environment:</H3>"
947 print "<DT>", escape(key
), "<DD>", escape(environ
[key
])
951 def print_form(form
):
952 """Dump the contents of a form as HTML."""
956 print "<H3>Form Contents:</H3>"
958 print "<P>No form fields."
961 print "<DT>" + escape(key
) + ":",
963 print "<i>" + escape(repr(type(value
))) + "</i>"
964 print "<DD>" + escape(repr(value
))
968 def print_directory():
969 """Dump the current directory as HTML."""
971 print "<H3>Current Working Directory:</H3>"
974 except os
.error
, msg
:
975 print "os.error:", escape(str(msg
))
980 def print_arguments():
982 print "<H3>Command Line Arguments:</H3>"
987 def print_environ_usage():
988 """Dump a list of environment variables used by CGI as HTML."""
990 <H3>These environment variables could have been set:</H3>
1000 <LI>GATEWAY_INTERFACE
1018 In addition, HTTP headers sent by the server may be passed in the
1019 environment as well. Here are some common variable names:
1034 def escape(s
, quote
=None):
1035 """Replace special characters '&', '<' and '>' by SGML entities."""
1036 s
= s
.replace("&", "&") # Must be done first!
1037 s
= s
.replace("<", "<")
1038 s
= s
.replace(">", ">")
1040 s
= s
.replace('"', """)
1043 def valid_boundary(s
, _vb_pattern
="^[ -~]{0,200}[!-~]$"):
1045 return re
.match(_vb_pattern
, s
)
1050 # Call test() when this file is run as a script (not imported as a module)
1051 if __name__
== '__main__':