1 # Copyright (C) 2001,2002 Python Software Foundation
2 # Author: barry@zope.com (Barry Warsaw)
4 """Basic message object for the email package object model.
11 from cStringIO
import StringIO
12 from types
import ListType
, TupleType
, StringType
14 # Intrapackage imports
15 from email
import Utils
16 from email
import Errors
17 from email
import Charset
27 # Regular expression used to split header parameters. BAW: this may be too
28 # simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
29 # most headers found in the wild. We may eventually need a full fledged
31 paramre
= re
.compile(r
'\s*;\s*')
32 # Regular expression that matches `special' characters in parameters, the
33 # existance of which force quoting of the parameter value.
34 tspecials
= re
.compile(r
'[ \(\)<>@,;:\\"/\[\]\?=]')
39 def _formatparam(param
, value
=None, quote
=True):
40 """Convenience function to format and return a key=value pair.
42 This will quote the value if needed or if quote is true.
44 if value
is not None and len(value
) > 0:
45 # TupleType is used for RFC 2231 encoded parameter values where items
46 # are (charset, language, value). charset is a string, not a Charset
48 if isinstance(value
, TupleType
):
49 # Encode as per RFC 2231
51 value
= Utils
.encode_rfc2231(value
[2], value
[0], value
[1])
52 # BAW: Please check this. I think that if quote is set it should
53 # force quoting even if not necessary.
54 if quote
or tspecials
.search(value
):
55 return '%s="%s"' % (param
, Utils
.quote(value
))
57 return '%s=%s' % (param
, value
)
66 while end
> 0 and s
.count('"', 0, end
) % 2:
67 end
= s
.find(';', end
+ 1)
73 f
= f
[:i
].strip().lower() + '=' + f
[i
+1:].strip()
74 plist
.append(f
.strip())
79 def _unquotevalue(value
):
80 if isinstance(value
, TupleType
):
81 return value
[0], value
[1], Utils
.unquote(value
[2])
83 return Utils
.unquote(value
)
88 """Basic message object.
90 A message object is defined as something that has a bunch of RFC 2822
91 headers and a payload. It may optionally have an envelope header
92 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
93 multipart or a message/rfc822), then the payload is a list of Message
94 objects, otherwise it is a string.
96 Message objects implement part of the `mapping' interface, which assumes
97 there is exactly one occurrance of the header per message. Some headers
98 do in fact appear multiple times (e.g. Received) and for those headers,
99 you must use the explicit API to set or get all the headers. Not all of
100 the mapping methods are implemented.
104 self
._unixfrom
= None
107 # Defaults for multipart messages
108 self
.preamble
= self
.epilogue
= None
109 # Default content type
110 self
._default
_type
= 'text/plain'
113 """Return the entire formatted message as a string.
114 This includes the headers, body, and envelope header.
116 return self
.as_string(unixfrom
=True)
118 def as_string(self
, unixfrom
=False):
119 """Return the entire formatted message as a string.
120 Optional `unixfrom' when True, means include the Unix From_ envelope
123 This is a convenience method and may not generate the message exactly
124 as you intend. For more flexibility, use the flatten() method of a
127 from email
.Generator
import Generator
130 g
.flatten(self
, unixfrom
=unixfrom
)
133 def is_multipart(self
):
134 """Return True if the message consists of multiple parts."""
135 if isinstance(self
._payload
, ListType
):
142 def set_unixfrom(self
, unixfrom
):
143 self
._unixfrom
= unixfrom
145 def get_unixfrom(self
):
146 return self
._unixfrom
149 # Payload manipulation.
151 def add_payload(self
, payload
):
152 """Add the given payload to the current payload.
154 If the current payload is empty, then the current payload will be made
155 a scalar, set to the given value.
157 Note: This method is deprecated. Use .attach() instead.
159 warnings
.warn('add_payload() is deprecated, use attach() instead.',
160 DeprecationWarning, 2)
161 if self
._payload
is None:
162 self
._payload
= payload
163 elif isinstance(self
._payload
, ListType
):
164 self
._payload
.append(payload
)
165 elif self
.get_main_type() not in (None, 'multipart'):
166 raise Errors
.MultipartConversionError(
167 'Message main content type must be "multipart" or missing')
169 self
._payload
= [self
._payload
, payload
]
171 def attach(self
, payload
):
172 """Add the given payload to the current payload.
174 The current payload will always be a list of objects after this method
175 is called. If you want to set the payload to a scalar object, use
176 set_payload() instead.
178 if self
._payload
is None:
179 self
._payload
= [payload
]
181 self
._payload
.append(payload
)
183 def get_payload(self
, i
=None, decode
=False):
184 """Return a reference to the payload.
186 The payload will either be a list object or a string. If you mutate
187 the list object, you modify the message's payload in place. Optional
188 i returns that index into the payload.
190 Optional decode is a flag indicating whether the payload should be
191 decoded or not, according to the Content-Transfer-Encoding header
194 When True and the message is not a multipart, the payload will be
195 decoded if this header's value is `quoted-printable' or `base64'. If
196 some other encoding is used, or the header is missing, or if the
197 payload has bogus data (i.e. bogus base64 or uuencoded data), the
198 payload is returned as-is.
200 If the message is a multipart and the decode flag is True, then None
204 payload
= self
._payload
205 elif not isinstance(self
._payload
, ListType
):
206 raise TypeError, 'Expected list, got %s' % type(self
._payload
)
208 payload
= self
._payload
[i
]
210 if self
.is_multipart():
212 cte
= self
.get('content-transfer-encoding', '').lower()
213 if cte
== 'quoted-printable':
214 return Utils
._qdecode
(payload
)
215 elif cte
== 'base64':
217 return Utils
._bdecode
(payload
)
218 except binascii
.Error
:
221 elif cte
in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
224 uu
.decode(StringIO(payload
+'\n'), sfp
)
225 payload
= sfp
.getvalue()
227 # Some decoding problem
229 # Everything else, including encodings with 8bit or 7bit are returned
233 def set_payload(self
, payload
, charset
=None):
234 """Set the payload to the given value.
236 Optional charset sets the message's default character set. See
237 set_charset() for details.
239 self
._payload
= payload
240 if charset
is not None:
241 self
.set_charset(charset
)
243 def set_charset(self
, charset
):
244 """Set the charset of the payload to a given character set.
246 charset can be a Charset instance, a string naming a character set, or
247 None. If it is a string it will be converted to a Charset instance.
248 If charset is None, the charset parameter will be removed from the
249 Content-Type field. Anything else will generate a TypeError.
251 The message will be assumed to be of type text/* encoded with
252 charset.input_charset. It will be converted to charset.output_charset
253 and encoded properly, if needed, when generating the plain text
254 representation of the message. MIME headers (MIME-Version,
255 Content-Type, Content-Transfer-Encoding) will be added as needed.
259 self
.del_param('charset')
262 if isinstance(charset
, StringType
):
263 charset
= Charset
.Charset(charset
)
264 if not isinstance(charset
, Charset
.Charset
):
265 raise TypeError, charset
266 # BAW: should we accept strings that can serve as arguments to the
267 # Charset constructor?
268 self
._charset
= charset
269 if not self
.has_key('MIME-Version'):
270 self
.add_header('MIME-Version', '1.0')
271 if not self
.has_key('Content-Type'):
272 self
.add_header('Content-Type', 'text/plain',
273 charset
=charset
.get_output_charset())
275 self
.set_param('charset', charset
.get_output_charset())
276 if not self
.has_key('Content-Transfer-Encoding'):
277 cte
= charset
.get_body_encoding()
281 self
.add_header('Content-Transfer-Encoding', cte
)
283 def get_charset(self
):
284 """Return the Charset instance associated with the message's payload.
289 # MAPPING INTERFACE (partial)
292 """Return the total number of headers, including duplicates."""
293 return len(self
._headers
)
295 def __getitem__(self
, name
):
296 """Get a header value.
298 Return None if the header is missing instead of raising an exception.
300 Note that if the header appeared multiple times, exactly which
301 occurrance gets returned is undefined. Use getall() to get all
302 the values matching a header field name.
304 return self
.get(name
)
306 def __setitem__(self
, name
, val
):
307 """Set the value of a header.
309 Note: this does not overwrite an existing header with the same field
310 name. Use __delitem__() first to delete any existing headers.
312 self
._headers
.append((name
, val
))
314 def __delitem__(self
, name
):
315 """Delete all occurrences of a header, if present.
317 Does not raise an exception if the header is missing.
321 for k
, v
in self
._headers
:
322 if k
.lower() <> name
:
323 newheaders
.append((k
, v
))
324 self
._headers
= newheaders
326 def __contains__(self
, name
):
327 return name
.lower() in [k
.lower() for k
, v
in self
._headers
]
329 def has_key(self
, name
):
330 """Return true if the message contains the header."""
332 return self
.get(name
, missing
) is not missing
335 """Return a list of all the message's header field names.
337 These will be sorted in the order they appeared in the original
338 message, or were added to the message, and may contain duplicates.
339 Any fields deleted and re-inserted are always appended to the header
342 return [k
for k
, v
in self
._headers
]
345 """Return a list of all the message's header values.
347 These will be sorted in the order they appeared in the original
348 message, or were added to the message, and may contain duplicates.
349 Any fields deleted and re-inserted are always appended to the header
352 return [v
for k
, v
in self
._headers
]
355 """Get all the message's header fields and values.
357 These will be sorted in the order they appeared in the original
358 message, or were added to the message, and may contain duplicates.
359 Any fields deleted and re-inserted are always appended to the header
362 return self
._headers
[:]
364 def get(self
, name
, failobj
=None):
365 """Get a header value.
367 Like __getitem__() but return failobj instead of None when the field
371 for k
, v
in self
._headers
:
372 if k
.lower() == name
:
377 # Additional useful stuff
380 def get_all(self
, name
, failobj
=None):
381 """Return a list of all the values for the named field.
383 These will be sorted in the order they appeared in the original
384 message, and may contain duplicates. Any fields deleted and
385 re-inserted are always appended to the header list.
387 If no such fields exist, failobj is returned (defaults to None).
391 for k
, v
in self
._headers
:
392 if k
.lower() == name
:
398 def add_header(self
, _name
, _value
, **_params
):
399 """Extended header setting.
401 name is the header field to add. keyword arguments can be used to set
402 additional parameters for the header field, with underscores converted
403 to dashes. Normally the parameter will be added as key="value" unless
404 value is None, in which case only the key will be added.
408 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
411 for k
, v
in _params
.items():
413 parts
.append(k
.replace('_', '-'))
415 parts
.append(_formatparam(k
.replace('_', '-'), v
))
416 if _value
is not None:
417 parts
.insert(0, _value
)
418 self
._headers
.append((_name
, SEMISPACE
.join(parts
)))
420 def replace_header(self
, _name
, _value
):
423 Replace the first matching header found in the message, retaining
424 header order and case. If no matching header was found, a KeyError is
427 _name
= _name
.lower()
428 for i
, (k
, v
) in zip(range(len(self
._headers
)), self
._headers
):
429 if k
.lower() == _name
:
430 self
._headers
[i
] = (k
, _value
)
433 raise KeyError, _name
436 # These methods are silently deprecated in favor of get_content_type() and
437 # friends (see below). They will be noisily deprecated in email 3.0.
440 def get_type(self
, failobj
=None):
441 """Returns the message's content type.
443 The returned string is coerced to lowercase and returned as a single
444 string of the form `maintype/subtype'. If there was no Content-Type
445 header in the message, failobj is returned (defaults to None).
448 value
= self
.get('content-type', missing
)
451 return paramre
.split(value
)[0].lower().strip()
453 def get_main_type(self
, failobj
=None):
454 """Return the message's main content type if present."""
456 ctype
= self
.get_type(missing
)
459 if ctype
.count('/') <> 1:
461 return ctype
.split('/')[0]
463 def get_subtype(self
, failobj
=None):
464 """Return the message's content subtype if present."""
466 ctype
= self
.get_type(missing
)
469 if ctype
.count('/') <> 1:
471 return ctype
.split('/')[1]
474 # Use these three methods instead of the three above.
477 def get_content_type(self
):
478 """Return the message's content type.
480 The returned string is coerced to lower case of the form
481 `maintype/subtype'. If there was no Content-Type header in the
482 message, the default type as given by get_default_type() will be
483 returned. Since according to RFC 2045, messages always have a default
484 type this will always return a value.
486 RFC 2045 defines a message's default type to be text/plain unless it
487 appears inside a multipart/digest container, in which case it would be
491 value
= self
.get('content-type', missing
)
493 # This should have no parameters
494 return self
.get_default_type()
495 ctype
= paramre
.split(value
)[0].lower().strip()
496 # RFC 2045, section 5.2 says if its invalid, use text/plain
497 if ctype
.count('/') <> 1:
501 def get_content_maintype(self
):
502 """Return the message's main content type.
504 This is the `maintype' part of the string returned by
507 ctype
= self
.get_content_type()
508 return ctype
.split('/')[0]
510 def get_content_subtype(self
):
511 """Returns the message's sub-content type.
513 This is the `subtype' part of the string returned by
516 ctype
= self
.get_content_type()
517 return ctype
.split('/')[1]
519 def get_default_type(self
):
520 """Return the `default' content type.
522 Most messages have a default content type of text/plain, except for
523 messages that are subparts of multipart/digest containers. Such
524 subparts have a default content type of message/rfc822.
526 return self
._default
_type
528 def set_default_type(self
, ctype
):
529 """Set the `default' content type.
531 ctype should be either "text/plain" or "message/rfc822", although this
532 is not enforced. The default content type is not stored in the
535 self
._default
_type
= ctype
537 def _get_params_preserve(self
, failobj
, header
):
538 # Like get_params() but preserves the quoting of values. BAW:
539 # should this be part of the public interface?
541 value
= self
.get(header
, missing
)
545 for p
in _parseparam(';' + value
):
547 name
, val
= p
.split('=', 1)
551 # Must have been a bare attribute
554 params
.append((name
, val
))
555 params
= Utils
.decode_params(params
)
558 def get_params(self
, failobj
=None, header
='content-type', unquote
=True):
559 """Return the message's Content-Type parameters, as a list.
561 The elements of the returned list are 2-tuples of key/value pairs, as
562 split on the `=' sign. The left hand side of the `=' is the key,
563 while the right hand side is the value. If there is no `=' sign in
564 the parameter the value is the empty string. The value is as
565 described in the get_param() method.
567 Optional failobj is the object to return if there is no Content-Type
568 header. Optional header is the header to search instead of
569 Content-Type. If unquote is True, the value is unquoted.
572 params
= self
._get
_params
_preserve
(missing
, header
)
573 if params
is missing
:
576 return [(k
, _unquotevalue(v
)) for k
, v
in params
]
580 def get_param(self
, param
, failobj
=None, header
='content-type',
582 """Return the parameter value if found in the Content-Type header.
584 Optional failobj is the object to return if there is no Content-Type
585 header, or the Content-Type header has no such parameter. Optional
586 header is the header to search instead of Content-Type.
588 Parameter keys are always compared case insensitively. The return
589 value can either be a string, or a 3-tuple if the parameter was RFC
590 2231 encoded. When it's a 3-tuple, the elements of the value are of
591 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
592 LANGUAGE can be None, in which case you should consider VALUE to be
593 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
595 Your application should be prepared to deal with 3-tuple return
596 values, and can convert the parameter to a Unicode string like so:
598 param = msg.get_param('foo')
599 if isinstance(param, tuple):
600 param = unicode(param[2], param[0] or 'us-ascii')
602 In any case, the parameter value (either the returned string, or the
603 VALUE item in the 3-tuple) is always unquoted, unless unquote is set
606 if not self
.has_key(header
):
608 for k
, v
in self
._get
_params
_preserve
(failobj
, header
):
609 if k
.lower() == param
.lower():
611 return _unquotevalue(v
)
616 def set_param(self
, param
, value
, header
='Content-Type', requote
=True,
617 charset
=None, language
=''):
618 """Set a parameter in the Content-Type header.
620 If the parameter already exists in the header, its value will be
621 replaced with the new value.
623 If header is Content-Type and has not yet been defined for this
624 message, it will be set to "text/plain" and the new parameter and
625 value will be appended as per RFC 2045.
627 An alternate header can specified in the header argument, and all
628 parameters will be quoted as necessary unless requote is False.
630 If charset is specified, the parameter will be encoded according to RFC
631 2231. Optional language specifies the RFC 2231 language, defaulting
632 to the empty string. Both charset and language should be strings.
634 if not isinstance(value
, TupleType
) and charset
:
635 value
= (charset
, language
, value
)
637 if not self
.has_key(header
) and header
.lower() == 'content-type':
640 ctype
= self
.get(header
)
641 if not self
.get_param(param
, header
=header
):
643 ctype
= _formatparam(param
, value
, requote
)
645 ctype
= SEMISPACE
.join(
646 [ctype
, _formatparam(param
, value
, requote
)])
649 for old_param
, old_value
in self
.get_params(header
=header
,
652 if old_param
.lower() == param
.lower():
653 append_param
= _formatparam(param
, value
, requote
)
655 append_param
= _formatparam(old_param
, old_value
, requote
)
659 ctype
= SEMISPACE
.join([ctype
, append_param
])
660 if ctype
<> self
.get(header
):
664 def del_param(self
, param
, header
='content-type', requote
=True):
665 """Remove the given parameter completely from the Content-Type header.
667 The header will be re-written in place without the parameter or its
668 value. All values will be quoted as necessary unless requote is
669 False. Optional header specifies an alternative to the Content-Type
672 if not self
.has_key(header
):
675 for p
, v
in self
.get_params(header
, unquote
=requote
):
676 if p
.lower() <> param
.lower():
678 new_ctype
= _formatparam(p
, v
, requote
)
680 new_ctype
= SEMISPACE
.join([new_ctype
,
681 _formatparam(p
, v
, requote
)])
682 if new_ctype
<> self
.get(header
):
684 self
[header
] = new_ctype
686 def set_type(self
, type, header
='Content-Type', requote
=True):
687 """Set the main type and subtype for the Content-Type header.
689 type must be a string in the form "maintype/subtype", otherwise a
690 ValueError is raised.
692 This method replaces the Content-Type header, keeping all the
693 parameters in place. If requote is False, this leaves the existing
694 header's quoting as is. Otherwise, the parameters will be quoted (the
697 An alternative header can be specified in the header argument. When
698 the Content-Type header is set, we'll always also add a MIME-Version
701 # BAW: should we be strict?
702 if not type.count('/') == 1:
704 # Set the Content-Type, you get a MIME-Version
705 if header
.lower() == 'content-type':
706 del self
['mime-version']
707 self
['MIME-Version'] = '1.0'
708 if not self
.has_key(header
):
711 params
= self
.get_params(header
, unquote
=requote
)
714 # Skip the first param; it's the old type.
715 for p
, v
in params
[1:]:
716 self
.set_param(p
, v
, header
, requote
)
718 def get_filename(self
, failobj
=None):
719 """Return the filename associated with the payload if present.
721 The filename is extracted from the Content-Disposition header's
722 `filename' parameter, and it is unquoted.
725 filename
= self
.get_param('filename', missing
, 'content-disposition')
726 if filename
is missing
:
728 if isinstance(filename
, TupleType
):
729 # It's an RFC 2231 encoded parameter
730 newvalue
= _unquotevalue(filename
)
731 return unicode(newvalue
[2], newvalue
[0] or 'us-ascii')
733 newvalue
= _unquotevalue(filename
.strip())
736 def get_boundary(self
, failobj
=None):
737 """Return the boundary associated with the payload if present.
739 The boundary is extracted from the Content-Type header's `boundary'
740 parameter, and it is unquoted.
743 boundary
= self
.get_param('boundary', missing
)
744 if boundary
is missing
:
746 if isinstance(boundary
, TupleType
):
747 # RFC 2231 encoded, so decode. It better end up as ascii
748 charset
= boundary
[0] or 'us-ascii'
749 return unicode(boundary
[2], charset
).encode('us-ascii')
750 return _unquotevalue(boundary
.strip())
752 def set_boundary(self
, boundary
):
753 """Set the boundary parameter in Content-Type to 'boundary'.
755 This is subtly different than deleting the Content-Type header and
756 adding a new one with a new boundary parameter via add_header(). The
757 main difference is that using the set_boundary() method preserves the
758 order of the Content-Type header in the original message.
760 HeaderParseError is raised if the message has no Content-Type header.
763 params
= self
._get
_params
_preserve
(missing
, 'content-type')
764 if params
is missing
:
765 # There was no Content-Type header, and we don't know what type
766 # to set it to, so raise an exception.
767 raise Errors
.HeaderParseError
, 'No Content-Type header found'
770 for pk
, pv
in params
:
771 if pk
.lower() == 'boundary':
772 newparams
.append(('boundary', '"%s"' % boundary
))
775 newparams
.append((pk
, pv
))
777 # The original Content-Type header had no boundary attribute.
778 # Tack one on the end. BAW: should we raise an exception
780 newparams
.append(('boundary', '"%s"' % boundary
))
781 # Replace the existing Content-Type header with the new value
783 for h
, v
in self
._headers
:
784 if h
.lower() == 'content-type':
786 for k
, v
in newparams
:
790 parts
.append('%s=%s' % (k
, v
))
791 newheaders
.append((h
, SEMISPACE
.join(parts
)))
794 newheaders
.append((h
, v
))
795 self
._headers
= newheaders
798 from email
._compat
22 import walk
800 # Must be using Python 2.1
801 from email
._compat
21 import walk
803 def get_content_charset(self
, failobj
=None):
804 """Return the charset parameter of the Content-Type header.
806 The returned string is always coerced to lower case. If there is no
807 Content-Type header, or if that header has no charset parameter,
811 charset
= self
.get_param('charset', missing
)
812 if charset
is missing
:
814 if isinstance(charset
, TupleType
):
815 # RFC 2231 encoded, so decode it, and it better end up as ascii.
816 pcharset
= charset
[0] or 'us-ascii'
817 charset
= unicode(charset
[2], pcharset
).encode('us-ascii')
818 # RFC 2046, $4.1.2 says charsets are not case sensitive
819 return charset
.lower()
821 def get_charsets(self
, failobj
=None):
822 """Return a list containing the charset(s) used in this message.
824 The returned list of items describes the Content-Type headers'
825 charset parameter for this message and all the subparts in its
828 Each item will either be a string (the value of the charset parameter
829 in the Content-Type header of that part) or the value of the
830 'failobj' parameter (defaults to None), if the part does not have a
831 main MIME type of "text", or the charset is not defined.
833 The list will contain one string for each part of the message, plus
834 one for the container message (i.e. self), so that a non-multipart
835 message will still return a list of length 1.
837 return [part
.get_content_charset(failobj
) for part
in self
.walk()]