append(): Fixing the test for convertability after consultation with
[python/dscho.git] / Doc / lib / emailmessage.tex
blob34c152db9ac8b79f228d2cd30324d788648b818d
1 \declaremodule{standard}{email.Message}
2 \modulesynopsis{The base class representing email messages.}
4 The central class in the \module{email} package is the
5 \class{Message} class; it is the base class for the \module{email}
6 object model. \class{Message} provides the core functionality for
7 setting and querying header fields, and for accessing message bodies.
9 Conceptually, a \class{Message} object consists of \emph{headers} and
10 \emph{payloads}. Headers are \rfc{2822} style field names and
11 values where the field name and value are separated by a colon. The
12 colon is not part of either the field name or the field value.
14 Headers are stored and returned in case-preserving form but are
15 matched case-insensitively. There may also be a single envelope
16 header, also known as the \emph{Unix-From} header or the
17 \code{From_} header. The payload is either a string in the case of
18 simple message objects or a list of \class{Message} objects for
19 MIME container documents (e.g. \mimetype{multipart/*} and
20 \mimetype{message/rfc822}).
22 \class{Message} objects provide a mapping style interface for
23 accessing the message headers, and an explicit interface for accessing
24 both the headers and the payload. It provides convenience methods for
25 generating a flat text representation of the message object tree, for
26 accessing commonly used header parameters, and for recursively walking
27 over the object tree.
29 Here are the methods of the \class{Message} class:
31 \begin{classdesc}{Message}{}
32 The constructor takes no arguments.
33 \end{classdesc}
35 \begin{methoddesc}[Message]{as_string}{\optional{unixfrom}}
36 Return the entire message flatten as a string. When optional
37 \var{unixfrom} is \code{True}, the envelope header is included in the
38 returned string. \var{unixfrom} defaults to \code{False}.
39 \end{methoddesc}
41 \begin{methoddesc}[Message]{__str__}{}
42 Equivalent to \method{as_string(unixfrom=True)}.
43 \end{methoddesc}
45 \begin{methoddesc}[Message]{is_multipart}{}
46 Return \code{True} if the message's payload is a list of
47 sub-\class{Message} objects, otherwise return \code{False}. When
48 \method{is_multipart()} returns False, the payload should be a string
49 object.
50 \end{methoddesc}
52 \begin{methoddesc}[Message]{set_unixfrom}{unixfrom}
53 Set the message's envelope header to \var{unixfrom}, which should be a string.
54 \end{methoddesc}
56 \begin{methoddesc}[Message]{get_unixfrom}{}
57 Return the message's envelope header. Defaults to \code{None} if the
58 envelope header was never set.
59 \end{methoddesc}
61 \begin{methoddesc}[Message]{attach}{payload}
62 Add the given \var{payload} to the current payload, which must be
63 \code{None} or a list of \class{Message} objects before the call.
64 After the call, the payload will always be a list of \class{Message}
65 objects. If you want to set the payload to a scalar object (e.g. a
66 string), use \method{set_payload()} instead.
67 \end{methoddesc}
69 \begin{methoddesc}[Message]{get_payload}{\optional{i\optional{, decode}}}
70 Return a reference the current payload, which will be a list of
71 \class{Message} objects when \method{is_multipart()} is \code{True}, or a
72 string when \method{is_multipart()} is \code{False}. If the
73 payload is a list and you mutate the list object, you modify the
74 message's payload in place.
76 With optional argument \var{i}, \method{get_payload()} will return the
77 \var{i}-th element of the payload, counting from zero, if
78 \method{is_multipart()} is \code{True}. An \exception{IndexError}
79 will be raised if \var{i} is less than 0 or greater than or equal to
80 the number of items in the payload. If the payload is a string
81 (i.e. \method{is_multipart()} is \code{False}) and \var{i} is given, a
82 \exception{TypeError} is raised.
84 Optional \var{decode} is a flag indicating whether the payload should be
85 decoded or not, according to the \mailheader{Content-Transfer-Encoding} header.
86 When \code{True} and the message is not a multipart, the payload will be
87 decoded if this header's value is \samp{quoted-printable} or
88 \samp{base64}. If some other encoding is used, or
89 \mailheader{Content-Transfer-Encoding} header is
90 missing, the payload is returned as-is (undecoded). If the message is
91 a multipart and the \var{decode} flag is \code{True}, then \code{None} is
92 returned. The default for \var{decode} is \code{False}.
93 \end{methoddesc}
95 \begin{methoddesc}[Message]{set_payload}{payload\optional{, charset}}
96 Set the entire message object's payload to \var{payload}. It is the
97 client's responsibility to ensure the payload invariants. Optional
98 \var{charset} sets the message's default character set; see
99 \method{set_charset()} for details.
101 \versionchanged[\var{charset} argument added]{2.2.2}
102 \end{methoddesc}
104 \begin{methoddesc}[Message]{set_charset}{charset}
105 Set the character set of the payload to \var{charset}, which can
106 either be a \class{Charset} instance (see \refmodule{email.Charset}), a
107 string naming a character set,
108 or \code{None}. If it is a string, it will be converted to a
109 \class{Charset} instance. If \var{charset} is \code{None}, the
110 \code{charset} parameter will be removed from the
111 \mailheader{Content-Type} header. Anything else will generate a
112 \exception{TypeError}.
114 The message will be assumed to be of type \mimetype{text/*} encoded with
115 \code{charset.input_charset}. It will be converted to
116 \code{charset.output_charset}
117 and encoded properly, if needed, when generating the plain text
118 representation of the message. MIME headers
119 (\mailheader{MIME-Version}, \mailheader{Content-Type},
120 \mailheader{Content-Transfer-Encoding}) will be added as needed.
122 \versionadded{2.2.2}
123 \end{methoddesc}
125 \begin{methoddesc}[Message]{get_charset}{}
126 Return the \class{Charset} instance associated with the message's payload.
127 \versionadded{2.2.2}
128 \end{methoddesc}
130 The following methods implement a mapping-like interface for accessing
131 the message's \rfc{2822} headers. Note that there are some
132 semantic differences between these methods and a normal mapping
133 (i.e. dictionary) interface. For example, in a dictionary there are
134 no duplicate keys, but here there may be duplicate message headers. Also,
135 in dictionaries there is no guaranteed order to the keys returned by
136 \method{keys()}, but in a \class{Message} object, headers are always
137 returned in the order they appeared in the original message, or were
138 added to the message later. Any header deleted and then re-added are
139 always appended to the end of the header list.
141 These semantic differences are intentional and are biased toward
142 maximal convenience.
144 Note that in all cases, any envelope header present in the message is
145 not included in the mapping interface.
147 \begin{methoddesc}[Message]{__len__}{}
148 Return the total number of headers, including duplicates.
149 \end{methoddesc}
151 \begin{methoddesc}[Message]{__contains__}{name}
152 Return true if the message object has a field named \var{name}.
153 Matching is done case-insensitively and \var{name} should not include the
154 trailing colon. Used for the \code{in} operator,
155 e.g.:
157 \begin{verbatim}
158 if 'message-id' in myMessage:
159 print 'Message-ID:', myMessage['message-id']
160 \end{verbatim}
161 \end{methoddesc}
163 \begin{methoddesc}[Message]{__getitem__}{name}
164 Return the value of the named header field. \var{name} should not
165 include the colon field separator. If the header is missing,
166 \code{None} is returned; a \exception{KeyError} is never raised.
168 Note that if the named field appears more than once in the message's
169 headers, exactly which of those field values will be returned is
170 undefined. Use the \method{get_all()} method to get the values of all
171 the extant named headers.
172 \end{methoddesc}
174 \begin{methoddesc}[Message]{__setitem__}{name, val}
175 Add a header to the message with field name \var{name} and value
176 \var{val}. The field is appended to the end of the message's existing
177 fields.
179 Note that this does \emph{not} overwrite or delete any existing header
180 with the same name. If you want to ensure that the new header is the
181 only one present in the message with field name
182 \var{name}, delete the field first, e.g.:
184 \begin{verbatim}
185 del msg['subject']
186 msg['subject'] = 'Python roolz!'
187 \end{verbatim}
188 \end{methoddesc}
190 \begin{methoddesc}[Message]{__delitem__}{name}
191 Delete all occurrences of the field with name \var{name} from the
192 message's headers. No exception is raised if the named field isn't
193 present in the headers.
194 \end{methoddesc}
196 \begin{methoddesc}[Message]{has_key}{name}
197 Return true if the message contains a header field named \var{name},
198 otherwise return false.
199 \end{methoddesc}
201 \begin{methoddesc}[Message]{keys}{}
202 Return a list of all the message's header field names.
203 \end{methoddesc}
205 \begin{methoddesc}[Message]{values}{}
206 Return a list of all the message's field values.
207 \end{methoddesc}
209 \begin{methoddesc}[Message]{items}{}
210 Return a list of 2-tuples containing all the message's field headers
211 and values.
212 \end{methoddesc}
214 \begin{methoddesc}[Message]{get}{name\optional{, failobj}}
215 Return the value of the named header field. This is identical to
216 \method{__getitem__()} except that optional \var{failobj} is returned
217 if the named header is missing (defaults to \code{None}).
218 \end{methoddesc}
220 Here are some additional useful methods:
222 \begin{methoddesc}[Message]{get_all}{name\optional{, failobj}}
223 Return a list of all the values for the field named \var{name}.
224 If there are no such named headers in the message, \var{failobj} is
225 returned (defaults to \code{None}).
226 \end{methoddesc}
228 \begin{methoddesc}[Message]{add_header}{_name, _value, **_params}
229 Extended header setting. This method is similar to
230 \method{__setitem__()} except that additional header parameters can be
231 provided as keyword arguments. \var{_name} is the header field to add
232 and \var{_value} is the \emph{primary} value for the header.
234 For each item in the keyword argument dictionary \var{_params}, the
235 key is taken as the parameter name, with underscores converted to
236 dashes (since dashes are illegal in Python identifiers). Normally,
237 the parameter will be added as \code{key="value"} unless the value is
238 \code{None}, in which case only the key will be added.
240 Here's an example:
242 \begin{verbatim}
243 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
244 \end{verbatim}
246 This will add a header that looks like
248 \begin{verbatim}
249 Content-Disposition: attachment; filename="bud.gif"
250 \end{verbatim}
251 \end{methoddesc}
253 \begin{methoddesc}[Message]{replace_header}{_name, _value}
254 Replace a header. Replace the first header found in the message that
255 matches \var{_name}, retaining header order and field name case. If
256 no matching header was found, a \exception{KeyError} is raised.
258 \versionadded{2.2.2}
259 \end{methoddesc}
261 \begin{methoddesc}[Message]{get_content_type}{}
262 Return the message's content type. The returned string is coerced to
263 lower case of the form \mimetype{maintype/subtype}. If there was no
264 \mailheader{Content-Type} header in the message the default type as
265 given by \method{get_default_type()} will be returned. Since
266 according to \rfc{2045}, messages always have a default type,
267 \method{get_content_type()} will always return a value.
269 \rfc{2045} defines a message's default type to be
270 \mimetype{text/plain} unless it appears inside a
271 \mimetype{multipart/digest} container, in which case it would be
272 \mimetype{message/rfc822}. If the \mailheader{Content-Type} header
273 has an invalid type specification, \rfc{2045} mandates that the
274 default type be \mimetype{text/plain}.
276 \versionadded{2.2.2}
277 \end{methoddesc}
279 \begin{methoddesc}[Message]{get_content_maintype}{}
280 Return the message's main content type. This is the
281 \mimetype{maintype} part of the string returned by
282 \method{get_content_type()}.
284 \versionadded{2.2.2}
285 \end{methoddesc}
287 \begin{methoddesc}[Message]{get_content_subtype}{}
288 Return the message's sub-content type. This is the \mimetype{subtype}
289 part of the string returned by \method{get_content_type()}.
291 \versionadded{2.2.2}
292 \end{methoddesc}
294 \begin{methoddesc}[Message]{get_default_type}{}
295 Return the default content type. Most messages have a default content
296 type of \mimetype{text/plain}, except for messages that are subparts
297 of \mimetype{multipart/digest} containers. Such subparts have a
298 default content type of \mimetype{message/rfc822}.
300 \versionadded{2.2.2}
301 \end{methoddesc}
303 \begin{methoddesc}[Message]{set_default_type}{ctype}
304 Set the default content type. \var{ctype} should either be
305 \mimetype{text/plain} or \mimetype{message/rfc822}, although this is
306 not enforced. The default content type is not stored in the
307 \mailheader{Content-Type} header.
309 \versionadded{2.2.2}
310 \end{methoddesc}
312 \begin{methoddesc}[Message]{get_params}{\optional{failobj\optional{,
313 header\optional{, unquote}}}}
314 Return the message's \mailheader{Content-Type} parameters, as a list. The
315 elements of the returned list are 2-tuples of key/value pairs, as
316 split on the \character{=} sign. The left hand side of the
317 \character{=} is the key, while the right hand side is the value. If
318 there is no \character{=} sign in the parameter the value is the empty
319 string, otherwise the value is as described in \method{get_param()} and is
320 unquoted if optional \var{unquote} is \code{True} (the default).
322 Optional \var{failobj} is the object to return if there is no
323 \mailheader{Content-Type} header. Optional \var{header} is the header to
324 search instead of \mailheader{Content-Type}.
326 \versionchanged[\var{unquote} argument added]{2.2.2}
327 \end{methoddesc}
329 \begin{methoddesc}[Message]{get_param}{param\optional{,
330 failobj\optional{, header\optional{, unquote}}}}
331 Return the value of the \mailheader{Content-Type} header's parameter
332 \var{param} as a string. If the message has no \mailheader{Content-Type}
333 header or if there is no such parameter, then \var{failobj} is
334 returned (defaults to \code{None}).
336 Optional \var{header} if given, specifies the message header to use
337 instead of \mailheader{Content-Type}.
339 Parameter keys are always compared case insensitively. The return
340 value can either be a string, or a 3-tuple if the parameter was
341 \rfc{2231} encoded. When it's a 3-tuple, the elements of the value are of
342 the form \code{(CHARSET, LANGUAGE, VALUE)}, where \code{LANGUAGE} may
343 be the empty string. Your application should be prepared to deal with
344 3-tuple return values, which it can convert to a Unicode string like
347 \begin{verbatim}
348 param = msg.get_param('foo')
349 if isinstance(param, tuple):
350 param = unicode(param[2], param[0])
351 \end{verbatim}
353 In any case, the parameter value (either the returned string, or the
354 \code{VALUE} item in the 3-tuple) is always unquoted, unless
355 \var{unquote} is set to \code{False}.
357 \versionchanged[\var{unquote} argument added, and 3-tuple return value
358 possible]{2.2.2}
359 \end{methoddesc}
361 \begin{methoddesc}[Message]{set_param}{param, value\optional{,
362 header\optional{, requote\optional{, charset\optional{, language}}}}}
364 Set a parameter in the \mailheader{Content-Type} header. If the
365 parameter already exists in the header, its value will be replaced
366 with \var{value}. If the \mailheader{Content-Type} header as not yet
367 been defined for this message, it will be set to \mimetype{text/plain}
368 and the new parameter value will be appended as per \rfc{2045}.
370 Optional \var{header} specifies an alternative header to
371 \mailheader{Content-Type}, and all parameters will be quoted as
372 necessary unless optional \var{requote} is \code{False} (the default
373 is \code{True}).
375 If optional \var{charset} is specified, the parameter will be encoded
376 according to \rfc{2231}. Optional \var{language} specifies the RFC
377 2231 language, defaulting to the empty string. Both \var{charset} and
378 \var{language} should be strings.
380 \versionadded{2.2.2}
381 \end{methoddesc}
383 \begin{methoddesc}[Message]{del_param}{param\optional{, header\optional{,
384 requote}}}
385 Remove the given parameter completely from the
386 \mailheader{Content-Type} header. The header will be re-written in
387 place without the parameter or its value. All values will be quoted
388 as necessary unless \var{requote} is \code{False} (the default is
389 \code{True}). Optional \var{header} specifies an alternative to
390 \mailheader{Content-Type}.
392 \versionadded{2.2.2}
393 \end{methoddesc}
395 \begin{methoddesc}[Message]{set_type}{type\optional{, header}\optional{,
396 requote}}
397 Set the main type and subtype for the \mailheader{Content-Type}
398 header. \var{type} must be a string in the form
399 \mimetype{maintype/subtype}, otherwise a \exception{ValueError} is
400 raised.
402 This method replaces the \mailheader{Content-Type} header, keeping all
403 the parameters in place. If \var{requote} is \code{False}, this
404 leaves the existing header's quoting as is, otherwise the parameters
405 will be quoted (the default).
407 An alternative header can be specified in the \var{header} argument.
408 When the \mailheader{Content-Type} header is set a
409 \mailheader{MIME-Version} header is also added.
411 \versionadded{2.2.2}
412 \end{methoddesc}
414 \begin{methoddesc}[Message]{get_filename}{\optional{failobj}}
415 Return the value of the \code{filename} parameter of the
416 \mailheader{Content-Disposition} header of the message, or \var{failobj} if
417 either the header is missing, or has no \code{filename} parameter.
418 The returned string will always be unquoted as per
419 \method{Utils.unquote()}.
420 \end{methoddesc}
422 \begin{methoddesc}[Message]{get_boundary}{\optional{failobj}}
423 Return the value of the \code{boundary} parameter of the
424 \mailheader{Content-Type} header of the message, or \var{failobj} if either
425 the header is missing, or has no \code{boundary} parameter. The
426 returned string will always be unquoted as per
427 \method{Utils.unquote()}.
428 \end{methoddesc}
430 \begin{methoddesc}[Message]{set_boundary}{boundary}
431 Set the \code{boundary} parameter of the \mailheader{Content-Type}
432 header to \var{boundary}. \method{set_boundary()} will always quote
433 \var{boundary} if necessary. A \exception{HeaderParseError} is raised
434 if the message object has no \mailheader{Content-Type} header.
436 Note that using this method is subtly different than deleting the old
437 \mailheader{Content-Type} header and adding a new one with the new boundary
438 via \method{add_header()}, because \method{set_boundary()} preserves the
439 order of the \mailheader{Content-Type} header in the list of headers.
440 However, it does \emph{not} preserve any continuation lines which may
441 have been present in the original \mailheader{Content-Type} header.
442 \end{methoddesc}
444 \begin{methoddesc}[Message]{get_content_charset}{\optional{failobj}}
445 Return the \code{charset} parameter of the \mailheader{Content-Type}
446 header, coerced to lower case. If there is no
447 \mailheader{Content-Type} header, or if that header has no
448 \code{charset} parameter, \var{failobj} is returned.
450 Note that this method differs from \method{get_charset()} which
451 returns the \class{Charset} instance for the default encoding of the
452 message body.
454 \versionadded{2.2.2}
455 \end{methoddesc}
457 \begin{methoddesc}[Message]{get_charsets}{\optional{failobj}}
458 Return a list containing the character set names in the message. If
459 the message is a \mimetype{multipart}, then the list will contain one
460 element for each subpart in the payload, otherwise, it will be a list
461 of length 1.
463 Each item in the list will be a string which is the value of the
464 \code{charset} parameter in the \mailheader{Content-Type} header for the
465 represented subpart. However, if the subpart has no
466 \mailheader{Content-Type} header, no \code{charset} parameter, or is not of
467 the \mimetype{text} main MIME type, then that item in the returned list
468 will be \var{failobj}.
469 \end{methoddesc}
471 \begin{methoddesc}[Message]{walk}{}
472 The \method{walk()} method is an all-purpose generator which can be
473 used to iterate over all the parts and subparts of a message object
474 tree, in depth-first traversal order. You will typically use
475 \method{walk()} as the iterator in a \code{for} loop; each
476 iteration returns the next subpart.
478 Here's an example that prints the MIME type of every part of a
479 multipart message structure:
481 \begin{verbatim}
482 >>> for part in msg.walk():
483 >>> print part.get_content_type()
484 multipart/report
485 text/plain
486 message/delivery-status
487 text/plain
488 text/plain
489 message/rfc822
490 \end{verbatim}
491 \end{methoddesc}
493 \class{Message} objects can also optionally contain two instance
494 attributes, which can be used when generating the plain text of a MIME
495 message.
497 \begin{datadesc}{preamble}
498 The format of a MIME document allows for some text between the blank
499 line following the headers, and the first multipart boundary string.
500 Normally, this text is never visible in a MIME-aware mail reader
501 because it falls outside the standard MIME armor. However, when
502 viewing the raw text of the message, or when viewing the message in a
503 non-MIME aware reader, this text can become visible.
505 The \var{preamble} attribute contains this leading extra-armor text
506 for MIME documents. When the \class{Parser} discovers some text after
507 the headers but before the first boundary string, it assigns this text
508 to the message's \var{preamble} attribute. When the \class{Generator}
509 is writing out the plain text representation of a MIME message, and it
510 finds the message has a \var{preamble} attribute, it will write this
511 text in the area between the headers and the first boundary. See
512 \refmodule{email.Parser} and \refmodule{email.Generator} for details.
514 Note that if the message object has no preamble, the
515 \var{preamble} attribute will be \code{None}.
516 \end{datadesc}
518 \begin{datadesc}{epilogue}
519 The \var{epilogue} attribute acts the same way as the \var{preamble}
520 attribute, except that it contains text that appears between the last
521 boundary and the end of the message.
523 One note: when generating the flat text for a \mimetype{multipart}
524 message that has no \var{epilogue} (using the standard
525 \class{Generator} class), no newline is added after the closing
526 boundary line. If the message object has an \var{epilogue} and its
527 value does not start with a newline, a newline is printed after the
528 closing boundary. This seems a little clumsy, but it makes the most
529 practical sense. The upshot is that if you want to ensure that a
530 newline get printed after your closing \mimetype{multipart} boundary,
531 set the \var{epilogue} to the empty string.
532 \end{datadesc}
534 \subsubsection{Deprecated methods}
536 The following methods are deprecated in \module{email} version 2.
537 They are documented here for completeness.
539 \begin{methoddesc}[Message]{add_payload}{payload}
540 Add \var{payload} to the message object's existing payload. If, prior
541 to calling this method, the object's payload was \code{None}
542 (i.e. never before set), then after this method is called, the payload
543 will be the argument \var{payload}.
545 If the object's payload was already a list
546 (i.e. \method{is_multipart()} returns 1), then \var{payload} is
547 appended to the end of the existing payload list.
549 For any other type of existing payload, \method{add_payload()} will
550 transform the new payload into a list consisting of the old payload
551 and \var{payload}, but only if the document is already a MIME
552 multipart document. This condition is satisfied if the message's
553 \mailheader{Content-Type} header's main type is either
554 \mimetype{multipart}, or there is no \mailheader{Content-Type}
555 header. In any other situation,
556 \exception{MultipartConversionError} is raised.
558 \deprecated{2.2.2}{Use the \method{attach()} method instead.}
559 \end{methoddesc}
561 \begin{methoddesc}[Message]{get_type}{\optional{failobj}}
562 Return the message's content type, as a string of the form
563 \mimetype{maintype/subtype} as taken from the
564 \mailheader{Content-Type} header.
565 The returned string is coerced to lowercase.
567 If there is no \mailheader{Content-Type} header in the message,
568 \var{failobj} is returned (defaults to \code{None}).
570 \deprecated{2.2.2}{Use the \method{get_content_type()} method instead.}
571 \end{methoddesc}
573 \begin{methoddesc}[Message]{get_main_type}{\optional{failobj}}
574 Return the message's \emph{main} content type. This essentially returns the
575 \var{maintype} part of the string returned by \method{get_type()}, with the
576 same semantics for \var{failobj}.
578 \deprecated{2.2.2}{Use the \method{get_content_maintype()} method instead.}
579 \end{methoddesc}
581 \begin{methoddesc}[Message]{get_subtype}{\optional{failobj}}
582 Return the message's sub-content type. This essentially returns the
583 \var{subtype} part of the string returned by \method{get_type()}, with the
584 same semantics for \var{failobj}.
586 \deprecated{2.2.2}{Use the \method{get_content_subtype()} method instead.}
587 \end{methoddesc}