1 #! /usr/local/bin/python
3 # NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is
4 # intentionally NOT "/usr/bin/env python". On many systems
5 # (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
6 # scripts, and /usr/local/bin is the default directory where Python is
7 # installed, so /usr/bin/env would be unable to find python. Granted,
8 # binary installations by Linux vendors often install Python in
9 # /usr/bin. So let those vendors patch cgi.py to match their choice
12 """Support module for CGI (Common Gateway Interface) scripts.
14 This module defines a number of utilities for use by CGI scripts
18 # XXX Perhaps there should be a slimmed version that doesn't contain
19 # all those backwards compatible and debugging classes and functions?
24 # Michael McLay started this module. Steve Majewski changed the
25 # interface to SvFormContentDict and FormContentDict. The multipart
26 # parsing was inspired by code submitted by Andreas Paepcke. Guido van
27 # Rossum rewrote, reformatted and documented the module and is currently
28 # responsible for its maintenance.
43 from StringIO
import StringIO
45 __all__
= ["MiniFieldStorage","FieldStorage","FormContentDict",
46 "SvFormContentDict","InterpFormContentDict","FormContent",
52 logfile
= "" # Filename to log to, if not empty
53 logfp
= None # File object to log to, if not None
55 def initlog(*allargs
):
56 """Write a log message, if there is a log file.
58 Even though this function is called initlog(), you should always
59 use log(); log is a variable that is set either to initlog
60 (initially), to dolog (once the log file has been opened), or to
61 nolog (when logging is disabled).
63 The first argument is a format string; the remaining arguments (if
64 any) are arguments to the % operator, so e.g.
65 log("%s: %s", "a", "b")
66 will write "a: b" to the log file, followed by a newline.
68 If the global logfp is not None, it should be a file object to
69 which log data is written.
71 If the global logfp is None, the global logfile may be a string
72 giving a filename to open, in append mode. This file should be
73 world writable!!! If the file can't be opened, logging is
74 silently disabled (since there is no safe place where we could
75 send an error message).
79 if logfile
and not logfp
:
81 logfp
= open(logfile
, "a")
90 def dolog(fmt
, *args
):
91 """Write a log message to the log file. See initlog() for docs."""
92 logfp
.write(fmt
%args
+ "\n")
95 """Dummy function, assigned to log when logging is disabled."""
98 log
= initlog
# The current logging function
104 # Maximum input we will accept when REQUEST_METHOD is POST
105 # 0 ==> unlimited input
108 def parse(fp
=None, environ
=os
.environ
, keep_blank_values
=0, strict_parsing
=0):
109 """Parse a query in the environment or from a file (default stdin)
111 Arguments, all optional:
113 fp : file pointer; default: sys.stdin
115 environ : environment dictionary; default: os.environ
117 keep_blank_values: flag indicating whether blank values in
118 URL encoded forms should be treated as blank strings.
119 A true value indicates that blanks should be retained as
120 blank strings. The default false value indicates that
121 blank values are to be ignored and treated as if they were
124 strict_parsing: flag indicating what to do with parsing errors.
125 If false (the default), errors are silently ignored.
126 If true, errors raise a ValueError exception.
130 if not environ
.has_key('REQUEST_METHOD'):
131 environ
['REQUEST_METHOD'] = 'GET' # For testing stand-alone
132 if environ
['REQUEST_METHOD'] == 'POST':
133 ctype
, pdict
= parse_header(environ
['CONTENT_TYPE'])
134 if ctype
== 'multipart/form-data':
135 return parse_multipart(fp
, pdict
)
136 elif ctype
== 'application/x-www-form-urlencoded':
137 clength
= int(environ
['CONTENT_LENGTH'])
138 if maxlen
and clength
> maxlen
:
139 raise ValueError, 'Maximum content length exceeded'
140 qs
= fp
.read(clength
)
142 qs
= '' # Unknown content-type
143 if environ
.has_key('QUERY_STRING'):
145 qs
= qs
+ environ
['QUERY_STRING']
148 qs
= qs
+ sys
.argv
[1]
149 environ
['QUERY_STRING'] = qs
# XXX Shouldn't, really
150 elif environ
.has_key('QUERY_STRING'):
151 qs
= environ
['QUERY_STRING']
157 environ
['QUERY_STRING'] = qs
# XXX Shouldn't, really
158 return parse_qs(qs
, keep_blank_values
, strict_parsing
)
161 def parse_qs(qs
, keep_blank_values
=0, strict_parsing
=0):
162 """Parse a query given as a string argument.
166 qs: URL-encoded query string to be parsed
168 keep_blank_values: flag indicating whether blank values in
169 URL encoded queries should be treated as blank strings.
170 A true value indicates that blanks should be retained as
171 blank strings. The default false value indicates that
172 blank values are to be ignored and treated as if they were
175 strict_parsing: flag indicating what to do with parsing errors.
176 If false (the default), errors are silently ignored.
177 If true, errors raise a ValueError exception.
180 for name
, value
in parse_qsl(qs
, keep_blank_values
, strict_parsing
):
181 if dict.has_key(name
):
182 dict[name
].append(value
)
187 def parse_qsl(qs
, keep_blank_values
=0, strict_parsing
=0):
188 """Parse a query given as a string argument.
192 qs: URL-encoded query string to be parsed
194 keep_blank_values: flag indicating whether blank values in
195 URL encoded queries should be treated as blank strings. A
196 true value indicates that blanks should be retained as blank
197 strings. The default false value indicates that blank values
198 are to be ignored and treated as if they were not included.
200 strict_parsing: flag indicating what to do with parsing errors. If
201 false (the default), errors are silently ignored. If true,
202 errors raise a ValueError exception.
204 Returns a list, as G-d intended.
206 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
208 for name_value
in pairs
:
209 nv
= name_value
.split('=', 1)
212 raise ValueError, "bad query field: %s" % `name_value`
214 if len(nv
[1]) or keep_blank_values
:
215 name
= urllib
.unquote(nv
[0].replace('+', ' '))
216 value
= urllib
.unquote(nv
[1].replace('+', ' '))
217 r
.append((name
, value
))
222 def parse_multipart(fp
, pdict
):
223 """Parse multipart input.
227 pdict: dictionary containing other parameters of conten-type header
229 Returns a dictionary just like parse_qs(): keys are the field names, each
230 value is a list of values for that field. This is easy to use but not
231 much good if you are expecting megabytes to be uploaded -- in that case,
232 use the FieldStorage class instead which is much more flexible. Note
233 that content-type is the raw, unparsed contents of the content-type
236 XXX This does not parse nested multipart parts -- use FieldStorage for
239 XXX This should really be subsumed by FieldStorage altogether -- no
240 point in having two implementations of the same parsing algorithm.
243 if pdict
.has_key('boundary'):
244 boundary
= pdict
['boundary']
247 nextpart
= "--" + boundary
248 lastpart
= "--" + boundary
+ "--"
252 while terminator
!= lastpart
:
256 # At start of next part. Read headers first.
257 headers
= mimetools
.Message(fp
)
258 clength
= headers
.getheader('content-length')
265 if maxlen
and bytes
> maxlen
:
266 raise ValueError, 'Maximum content length exceeded'
267 data
= fp
.read(bytes
)
270 # Read lines until end of part.
275 terminator
= lastpart
# End outer loop
278 terminator
= line
.strip()
279 if terminator
in (nextpart
, lastpart
):
287 # Strip final line terminator
289 if line
[-2:] == "\r\n":
291 elif line
[-1:] == "\n":
294 data
= "".join(lines
)
295 line
= headers
['content-disposition']
298 key
, params
= parse_header(line
)
299 if key
!= 'form-data':
301 if params
.has_key('name'):
302 name
= params
['name']
305 if partdict
.has_key(name
):
306 partdict
[name
].append(data
)
308 partdict
[name
] = [data
]
313 def parse_header(line
):
314 """Parse a Content-type like header.
316 Return the main content-type and a dictionary of options.
319 plist
= map(lambda x
: x
.strip(), line
.split(';'))
320 key
= plist
[0].lower()
326 name
= p
[:i
].strip().lower()
327 value
= p
[i
+1:].strip()
328 if len(value
) >= 2 and value
[0] == value
[-1] == '"':
334 # Classes for field storage
335 # =========================
337 class MiniFieldStorage
:
339 """Like FieldStorage, for use when no file uploads are possible."""
348 disposition_options
= {}
351 def __init__(self
, name
, value
):
352 """Constructor from field name and value."""
355 # self.file = StringIO(value)
358 """Return printable representation."""
359 return "MiniFieldStorage(%s, %s)" % (`self
.name`
, `self
.value`
)
364 """Store a sequence of fields, reading multipart/form-data.
366 This class provides naming, typing, files stored on disk, and
367 more. At the top level, it is accessible like a dictionary, whose
368 keys are the field names. (Note: None can occur as a field name.)
369 The items are either a Python list (if there's multiple values) or
370 another FieldStorage or MiniFieldStorage object. If it's a single
371 object, it has the following attributes:
373 name: the field name, if specified; otherwise None
375 filename: the filename, if specified; otherwise None; this is the
376 client side filename, *not* the file name on which it is
377 stored (that's a temporary file you don't deal with)
379 value: the value as a *string*; for file uploads, this
380 transparently reads the file every time you request the value
382 file: the file(-like) object from which you can read the data;
383 None if the data is stored a simple string
385 type: the content-type, or None if not specified
387 type_options: dictionary of options specified on the content-type
390 disposition: content-disposition, or None if not specified
392 disposition_options: dictionary of corresponding options
394 headers: a dictionary(-like) object (sometimes rfc822.Message or a
395 subclass thereof) containing *all* headers
397 The class is subclassable, mostly for the purpose of overriding
398 the make_file() method, which is called internally to come up with
399 a file open for reading and writing. This makes it possible to
400 override the default choice of storing all files in a temporary
401 directory and unlinking them as soon as they have been opened.
405 def __init__(self
, fp
=None, headers
=None, outerboundary
="",
406 environ
=os
.environ
, keep_blank_values
=0, strict_parsing
=0):
407 """Constructor. Read multipart/* until last part.
409 Arguments, all optional:
411 fp : file pointer; default: sys.stdin
412 (not used when the request method is GET)
414 headers : header dictionary-like object; default:
415 taken from environ as per CGI spec
417 outerboundary : terminating multipart boundary
418 (for internal use only)
420 environ : environment dictionary; default: os.environ
422 keep_blank_values: flag indicating whether blank values in
423 URL encoded forms should be treated as blank strings.
424 A true value indicates that blanks should be retained as
425 blank strings. The default false value indicates that
426 blank values are to be ignored and treated as if they were
429 strict_parsing: flag indicating what to do with parsing errors.
430 If false (the default), errors are silently ignored.
431 If true, errors raise a ValueError exception.
435 self
.keep_blank_values
= keep_blank_values
436 self
.strict_parsing
= strict_parsing
437 if environ
.has_key('REQUEST_METHOD'):
438 method
= environ
['REQUEST_METHOD'].upper()
439 if method
== 'GET' or method
== 'HEAD':
440 if environ
.has_key('QUERY_STRING'):
441 qs
= environ
['QUERY_STRING']
448 headers
= {'content-type':
449 "application/x-www-form-urlencoded"}
453 # Set default content-type for POST to what's traditional
454 headers
['content-type'] = "application/x-www-form-urlencoded"
455 if environ
.has_key('CONTENT_TYPE'):
456 headers
['content-type'] = environ
['CONTENT_TYPE']
457 if environ
.has_key('CONTENT_LENGTH'):
458 headers
['content-length'] = environ
['CONTENT_LENGTH']
459 self
.fp
= fp
or sys
.stdin
460 self
.headers
= headers
461 self
.outerboundary
= outerboundary
463 # Process content-disposition header
464 cdisp
, pdict
= "", {}
465 if self
.headers
.has_key('content-disposition'):
466 cdisp
, pdict
= parse_header(self
.headers
['content-disposition'])
467 self
.disposition
= cdisp
468 self
.disposition_options
= pdict
470 if pdict
.has_key('name'):
471 self
.name
= pdict
['name']
473 if pdict
.has_key('filename'):
474 self
.filename
= pdict
['filename']
476 # Process content-type header
478 # Honor any existing content-type header. But if there is no
479 # content-type header, use some sensible defaults. Assume
480 # outerboundary is "" at the outer level, but something non-false
481 # inside a multi-part. The default for an inner part is text/plain,
482 # but for an outer part it should be urlencoded. This should catch
483 # bogus clients which erroneously forget to include a content-type
486 # See below for what we do if there does exist a content-type header,
487 # but it happens to be something we don't understand.
488 if self
.headers
.has_key('content-type'):
489 ctype
, pdict
= parse_header(self
.headers
['content-type'])
490 elif self
.outerboundary
or method
!= 'POST':
491 ctype
, pdict
= "text/plain", {}
493 ctype
, pdict
= 'application/x-www-form-urlencoded', {}
495 self
.type_options
= pdict
496 self
.innerboundary
= ""
497 if pdict
.has_key('boundary'):
498 self
.innerboundary
= pdict
['boundary']
500 if self
.headers
.has_key('content-length'):
502 clen
= int(self
.headers
['content-length'])
505 if maxlen
and clen
> maxlen
:
506 raise ValueError, 'Maximum content length exceeded'
509 self
.list = self
.file = None
511 if ctype
== 'application/x-www-form-urlencoded':
512 self
.read_urlencoded()
513 elif ctype
[:10] == 'multipart/':
514 self
.read_multi(environ
, keep_blank_values
, strict_parsing
)
519 """Return a printable representation."""
520 return "FieldStorage(%s, %s, %s)" % (
521 `self
.name`
, `self
.filename`
, `self
.value`
)
523 def __getattr__(self
, name
):
525 raise AttributeError, name
528 value
= self
.file.read()
530 elif self
.list is not None:
536 def __getitem__(self
, key
):
537 """Dictionary style indexing."""
538 if self
.list is None:
539 raise TypeError, "not indexable"
541 for item
in self
.list:
542 if item
.name
== key
: found
.append(item
)
550 def getvalue(self
, key
, default
=None):
551 """Dictionary style get() method, including 'value' lookup."""
552 if self
.has_key(key
):
554 if type(value
) is type([]):
555 return map(lambda v
: v
.value
, value
)
562 """Dictionary style keys() method."""
563 if self
.list is None:
564 raise TypeError, "not indexable"
566 for item
in self
.list:
567 if item
.name
not in keys
: keys
.append(item
.name
)
570 def has_key(self
, key
):
571 """Dictionary style has_key() method."""
572 if self
.list is None:
573 raise TypeError, "not indexable"
574 for item
in self
.list:
575 if item
.name
== key
: return 1
579 """Dictionary style len(x) support."""
580 return len(self
.keys())
582 def read_urlencoded(self
):
583 """Internal: read data in query string format."""
584 qs
= self
.fp
.read(self
.length
)
585 self
.list = list = []
586 for key
, value
in parse_qsl(qs
, self
.keep_blank_values
,
587 self
.strict_parsing
):
588 list.append(MiniFieldStorage(key
, value
))
591 FieldStorageClass
= None
593 def read_multi(self
, environ
, keep_blank_values
, strict_parsing
):
594 """Internal: read a part that is itself multipart."""
596 klass
= self
.FieldStorageClass
or self
.__class
__
597 part
= klass(self
.fp
, {}, self
.innerboundary
,
598 environ
, keep_blank_values
, strict_parsing
)
599 # Throw first part away
601 headers
= rfc822
.Message(self
.fp
)
602 part
= klass(self
.fp
, headers
, self
.innerboundary
,
603 environ
, keep_blank_values
, strict_parsing
)
604 self
.list.append(part
)
607 def read_single(self
):
608 """Internal: read an atomic part."""
616 bufsize
= 8*1024 # I/O buffering size for copy to file
618 def read_binary(self
):
619 """Internal: read binary data."""
620 self
.file = self
.make_file('b')
624 data
= self
.fp
.read(min(todo
, self
.bufsize
))
628 self
.file.write(data
)
629 todo
= todo
- len(data
)
631 def read_lines(self
):
632 """Internal: read lines until EOF or outerboundary."""
633 self
.file = self
.make_file('')
634 if self
.outerboundary
:
635 self
.read_lines_to_outerboundary()
637 self
.read_lines_to_eof()
639 def read_lines_to_eof(self
):
640 """Internal: read lines until EOF."""
642 line
= self
.fp
.readline()
646 self
.file.write(line
)
648 def read_lines_to_outerboundary(self
):
649 """Internal: read lines until outerboundary."""
650 next
= "--" + self
.outerboundary
654 line
= self
.fp
.readline()
659 strippedline
= line
.strip()
660 if strippedline
== next
:
662 if strippedline
== last
:
666 if line
[-2:] == "\r\n":
669 elif line
[-1] == "\n":
674 self
.file.write(odelim
+ line
)
676 def skip_lines(self
):
677 """Internal: skip lines until outer boundary if defined."""
678 if not self
.outerboundary
or self
.done
:
680 next
= "--" + self
.outerboundary
683 line
= self
.fp
.readline()
688 strippedline
= line
.strip()
689 if strippedline
== next
:
691 if strippedline
== last
:
695 def make_file(self
, binary
=None):
696 """Overridable: return a readable & writable file.
698 The file will be used as follows:
699 - data is written to it
701 - data is read from it
703 The 'binary' argument is unused -- the file is always opened
706 This version opens a temporary file for reading and writing,
707 and immediately deletes (unlinks) it. The trick (on Unix!) is
708 that the file can still be used, but it can't be opened by
709 another process, and it will automatically be deleted when it
710 is closed or when the current process terminates.
712 If you want a more permanent file, you derive a class which
713 overrides this method. If you want a visible temporary file
714 that is nevertheless automatically deleted when the script
715 terminates, try defining a __del__ method in a derived class
716 which unlinks the temporary files you have created.
720 return tempfile
.TemporaryFile("w+b")
724 # Backwards Compatibility Classes
725 # ===============================
727 class FormContentDict(UserDict
.UserDict
):
728 """Form content as dictionary with a list of values per field.
730 form = FormContentDict()
732 form[key] -> [value, value, ...]
733 form.has_key(key) -> Boolean
734 form.keys() -> [key, key, ...]
735 form.values() -> [[val, val, ...], [val, val, ...], ...]
736 form.items() -> [(key, [val, val, ...]), (key, [val, val, ...]), ...]
737 form.dict == {key: [val, val, ...], ...}
740 def __init__(self
, environ
=os
.environ
):
741 self
.dict = self
.data
= parse(environ
=environ
)
742 self
.query_string
= environ
['QUERY_STRING']
745 class SvFormContentDict(FormContentDict
):
746 """Form content as dictionary expecting a single value per field.
748 If you only expect a single value for each field, then form[key]
749 will return that single value. It will raise an IndexError if
750 that expectation is not true. If you expect a field to have
751 possible multiple values, than you can use form.getlist(key) to
752 get all of the values. values() and items() are a compromise:
753 they return single strings where there is a single value, and
754 lists of strings otherwise.
757 def __getitem__(self
, key
):
758 if len(self
.dict[key
]) > 1:
759 raise IndexError, 'expecting a single value'
760 return self
.dict[key
][0]
761 def getlist(self
, key
):
762 return self
.dict[key
]
765 for value
in self
.dict.values():
767 result
.append(value
[0])
768 else: result
.append(value
)
772 for key
, value
in self
.dict.items():
774 result
.append((key
, value
[0]))
775 else: result
.append((key
, value
))
779 class InterpFormContentDict(SvFormContentDict
):
780 """This class is present for backwards compatibility only."""
781 def __getitem__(self
, key
):
782 v
= SvFormContentDict
.__getitem
__(self
, key
)
783 if v
[0] in '0123456789+-.':
787 except ValueError: pass
791 for key
in self
.keys():
793 result
.append(self
[key
])
795 result
.append(self
.dict[key
])
799 for key
in self
.keys():
801 result
.append((key
, self
[key
]))
803 result
.append((key
, self
.dict[key
]))
807 class FormContent(FormContentDict
):
808 """This class is present for backwards compatibility only."""
809 def values(self
, key
):
810 if self
.dict.has_key(key
) :return self
.dict[key
]
812 def indexed_value(self
, key
, location
):
813 if self
.dict.has_key(key
):
814 if len(self
.dict[key
]) > location
:
815 return self
.dict[key
][location
]
818 def value(self
, key
):
819 if self
.dict.has_key(key
): return self
.dict[key
][0]
821 def length(self
, key
):
822 return len(self
.dict[key
])
823 def stripped(self
, key
):
824 if self
.dict.has_key(key
): return self
.dict[key
][0].strip()
833 def test(environ
=os
.environ
):
834 """Robust test CGI script, usable as main program.
836 Write minimal HTTP headers and dump all information provided to
837 the script in HTML form.
841 print "Content-type: text/html"
843 sys
.stderr
= sys
.stdout
845 form
= FieldStorage() # Replace with other classes to test those
849 print_environ(environ
)
850 print_environ_usage()
852 exec "testing print_exception() -- <I>italics?</I>"
855 print "<H3>What follows is a test, not an actual exception:</H3>"
860 print "<H1>Second try with a small maxlen...</H1>"
865 form
= FieldStorage() # Replace with other classes to test those
869 print_environ(environ
)
873 def print_exception(type=None, value
=None, tb
=None, limit
=None):
875 type, value
, tb
= sys
.exc_info()
878 print "<H3>Traceback (most recent call last):</H3>"
879 list = traceback
.format_tb(tb
, limit
) + \
880 traceback
.format_exception_only(type, value
)
881 print "<PRE>%s<B>%s</B></PRE>" % (
882 escape("".join(list[:-1])),
887 def print_environ(environ
=os
.environ
):
888 """Dump the shell environment as HTML."""
889 keys
= environ
.keys()
892 print "<H3>Shell Environment:</H3>"
895 print "<DT>", escape(key
), "<DD>", escape(environ
[key
])
899 def print_form(form
):
900 """Dump the contents of a form as HTML."""
904 print "<H3>Form Contents:</H3>"
906 print "<P>No form fields."
909 print "<DT>" + escape(key
) + ":",
911 print "<i>" + escape(`
type(value
)`
) + "</i>"
912 print "<DD>" + escape(`value`
)
916 def print_directory():
917 """Dump the current directory as HTML."""
919 print "<H3>Current Working Directory:</H3>"
922 except os
.error
, msg
:
923 print "os.error:", escape(str(msg
))
928 def print_arguments():
930 print "<H3>Command Line Arguments:</H3>"
935 def print_environ_usage():
936 """Dump a list of environment variables used by CGI as HTML."""
938 <H3>These environment variables could have been set:</H3>
948 <LI>GATEWAY_INTERFACE
966 In addition, HTTP headers sent by the server may be passed in the
967 environment as well. Here are some common variable names:
982 def escape(s
, quote
=None):
983 """Replace special characters '&', '<' and '>' by SGML entities."""
984 s
= s
.replace("&", "&") # Must be done first!
985 s
= s
.replace("<", "<")
986 s
= s
.replace(">", ">")
988 s
= s
.replace('"', """)
995 # Call test() when this file is run as a script (not imported as a module)
996 if __name__
== '__main__':