Lib/email/Message.py

   1 # Copyright (C) 2001,2002 Python Software Foundation
   2 # Author: barry@zope.com (Barry Warsaw)
   3
   4 """Basic message object for the email package object model.
   5 """
   6
   7 import re
   8 import uu
   9 import binascii
  10 import warnings
  11 from cStringIO import StringIO
  12 from types import ListType, TupleType, StringType
  13
  14 # Intrapackage imports
  15 from email import Utils
  16 from email import Errors
  17 from email import Charset
  18
  19 SEMISPACE = '; '
  20
  21 try:
  22     True, False
  23 except NameError:
  24     True = 1
  25     False = 0
  26
  27 # Regular expression used to split header parameters.  BAW: this may be too
  28 # simple.  It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
  29 # most headers found in the wild.  We may eventually need a full fledged
  30 # parser eventually.
  31 paramre = re.compile(r'\s*;\s*')
  32 # Regular expression that matches `special' characters in parameters, the
  33 # existance of which force quoting of the parameter value.
  34 tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
  35
  36
  37 \f
  38 # Helper functions
  39 def _formatparam(param, value=None, quote=True):
  40     """Convenience function to format and return a key=value pair.
  41
  42     This will quote the value if needed or if quote is true.
  43     """
  44     if value is not None and len(value) > 0:
  45         # TupleType is used for RFC 2231 encoded parameter values where items
  46         # are (charset, language, value).  charset is a string, not a Charset
  47         # instance.
  48         if isinstance(value, TupleType):
  49             # Encode as per RFC 2231
  50             param += '*'
  51             value = Utils.encode_rfc2231(value[2], value[0], value[1])
  52         # BAW: Please check this.  I think that if quote is set it should
  53         # force quoting even if not necessary.
  54         if quote or tspecials.search(value):
  55             return '%s="%s"' % (param, Utils.quote(value))
  56         else:
  57             return '%s=%s' % (param, value)
  58     else:
  59         return param
  60
  61 def _parseparam(s):
  62     plist = []
  63     while s[:1] == ';':
  64         s = s[1:]
  65         end = s.find(';')
  66         while end > 0 and s.count('"', 0, end) % 2:
  67             end = s.find(';', end + 1)
  68         if end < 0:
  69             end = len(s)
  70         f = s[:end]
  71         if '=' in f:
  72             i = f.index('=')
  73             f = f[:i].strip().lower() + '=' + f[i+1:].strip()
  74         plist.append(f.strip())
  75         s = s[end:]
  76     return plist
  77
  78
  79 def _unquotevalue(value):
  80     if isinstance(value, TupleType):
  81         return value[0], value[1], Utils.unquote(value[2])
  82     else:
  83         return Utils.unquote(value)
  84
  85
  86 \f
  87 class Message:
  88     """Basic message object.
  89
  90     A message object is defined as something that has a bunch of RFC 2822
  91     headers and a payload.  It may optionally have an envelope header
  92     (a.k.a. Unix-From or From_ header).  If the message is a container (i.e. a
  93     multipart or a message/rfc822), then the payload is a list of Message
  94     objects, otherwise it is a string.
  95
  96     Message objects implement part of the `mapping' interface, which assumes
  97     there is exactly one occurrance of the header per message.  Some headers
  98     do in fact appear multiple times (e.g. Received) and for those headers,
  99     you must use the explicit API to set or get all the headers.  Not all of
 100     the mapping methods are implemented.
 101     """
 102     def __init__(self):
 103         self._headers = []
 104         self._unixfrom = None
 105         self._payload = None
 106         self._charset = None
 107         # Defaults for multipart messages
 108         self.preamble = self.epilogue = None
 109         # Default content type
 110         self._default_type = 'text/plain'
 111
 112     def __str__(self):
 113         """Return the entire formatted message as a string.
 114         This includes the headers, body, and envelope header.
 115         """
 116         return self.as_string(unixfrom=True)
 117
 118     def as_string(self, unixfrom=False):
 119         """Return the entire formatted message as a string.
 120         Optional `unixfrom' when True, means include the Unix From_ envelope
 121         header.
 122
 123         This is a convenience method and may not generate the message exactly
 124         as you intend.  For more flexibility, use the flatten() method of a
 125         Generator instance.
 126         """
 127         from email.Generator import Generator
 128         fp = StringIO()
 129         g = Generator(fp)
 130         g.flatten(self, unixfrom=unixfrom)
 131         return fp.getvalue()
 132
 133     def is_multipart(self):
 134         """Return True if the message consists of multiple parts."""
 135         if isinstance(self._payload, ListType):
 136             return True
 137         return False
 138
 139     #
 140     # Unix From_ line
 141     #
 142     def set_unixfrom(self, unixfrom):
 143         self._unixfrom = unixfrom
 144
 145     def get_unixfrom(self):
 146         return self._unixfrom
 147
 148     #
 149     # Payload manipulation.
 150     #
 151     def add_payload(self, payload):
 152         """Add the given payload to the current payload.
 153
 154         If the current payload is empty, then the current payload will be made
 155         a scalar, set to the given value.
 156
 157         Note: This method is deprecated.  Use .attach() instead.
 158         """
 159         warnings.warn('add_payload() is deprecated, use attach() instead.',
 160                       DeprecationWarning, 2)
 161         if self._payload is None:
 162             self._payload = payload
 163         elif isinstance(self._payload, ListType):
 164             self._payload.append(payload)
 165         elif self.get_main_type() not in (None, 'multipart'):
 166             raise Errors.MultipartConversionError(
 167                 'Message main content type must be "multipart" or missing')
 168         else:
 169             self._payload = [self._payload, payload]
 170
 171     def attach(self, payload):
 172         """Add the given payload to the current payload.
 173
 174         The current payload will always be a list of objects after this method
 175         is called.  If you want to set the payload to a scalar object, use
 176         set_payload() instead.
 177         """
 178         if self._payload is None:
 179             self._payload = [payload]
 180         else:
 181             self._payload.append(payload)
 182
 183     def get_payload(self, i=None, decode=False):
 184         """Return a reference to the payload.
 185
 186         The payload will either be a list object or a string.  If you mutate
 187         the list object, you modify the message's payload in place.  Optional
 188         i returns that index into the payload.
 189
 190         Optional decode is a flag indicating whether the payload should be
 191         decoded or not, according to the Content-Transfer-Encoding header
 192         (default is False).
 193
 194         When True and the message is not a multipart, the payload will be
 195         decoded if this header's value is `quoted-printable' or `base64'.  If
 196         some other encoding is used, or the header is missing, or if the
 197         payload has bogus data (i.e. bogus base64 or uuencoded data), the
 198         payload is returned as-is.
 199
 200         If the message is a multipart and the decode flag is True, then None
 201         is returned.
 202         """
 203         if i is None:
 204             payload = self._payload
 205         elif not isinstance(self._payload, ListType):
 206             raise TypeError, 'Expected list, got %s' % type(self._payload)
 207         else:
 208             payload = self._payload[i]
 209         if decode:
 210             if self.is_multipart():
 211                 return None
 212             cte = self.get('content-transfer-encoding', '').lower()
 213             if cte == 'quoted-printable':
 214                 return Utils._qdecode(payload)
 215             elif cte == 'base64':
 216                 try:
 217                     return Utils._bdecode(payload)
 218                 except binascii.Error:
 219                     # Incorrect padding
 220                     return payload
 221             elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
 222                 sfp = StringIO()
 223                 try:
 224                     uu.decode(StringIO(payload+'\n'), sfp)
 225                     payload = sfp.getvalue()
 226                 except uu.Error:
 227                     # Some decoding problem
 228                     return payload
 229         # Everything else, including encodings with 8bit or 7bit are returned
 230         # unchanged.
 231         return payload
 232
 233     def set_payload(self, payload, charset=None):
 234         """Set the payload to the given value.
 235
 236         Optional charset sets the message's default character set.  See
 237         set_charset() for details.
 238         """
 239         self._payload = payload
 240         if charset is not None:
 241             self.set_charset(charset)
 242
 243     def set_charset(self, charset):
 244         """Set the charset of the payload to a given character set.
 245
 246         charset can be a Charset instance, a string naming a character set, or
 247         None.  If it is a string it will be converted to a Charset instance.
 248         If charset is None, the charset parameter will be removed from the
 249         Content-Type field.  Anything else will generate a TypeError.
 250
 251         The message will be assumed to be of type text/* encoded with
 252         charset.input_charset.  It will be converted to charset.output_charset
 253         and encoded properly, if needed, when generating the plain text
 254         representation of the message.  MIME headers (MIME-Version,
 255         Content-Type, Content-Transfer-Encoding) will be added as needed.
 256
 257         """
 258         if charset is None:
 259             self.del_param('charset')
 260             self._charset = None
 261             return
 262         if isinstance(charset, StringType):
 263             charset = Charset.Charset(charset)
 264         if not isinstance(charset, Charset.Charset):
 265             raise TypeError, charset
 266         # BAW: should we accept strings that can serve as arguments to the
 267         # Charset constructor?
 268         self._charset = charset
 269         if not self.has_key('MIME-Version'):
 270             self.add_header('MIME-Version', '1.0')
 271         if not self.has_key('Content-Type'):
 272             self.add_header('Content-Type', 'text/plain',
 273                             charset=charset.get_output_charset())
 274         else:
 275             self.set_param('charset', charset.get_output_charset())
 276         if not self.has_key('Content-Transfer-Encoding'):
 277             cte = charset.get_body_encoding()
 278             if callable(cte):
 279                 cte(self)
 280             else:
 281                 self.add_header('Content-Transfer-Encoding', cte)
 282
 283     def get_charset(self):
 284         """Return the Charset instance associated with the message's payload.
 285         """
 286         return self._charset
 287
 288     #
 289     # MAPPING INTERFACE (partial)
 290     #
 291     def __len__(self):
 292         """Return the total number of headers, including duplicates."""
 293         return len(self._headers)
 294
 295     def __getitem__(self, name):
 296         """Get a header value.
 297
 298         Return None if the header is missing instead of raising an exception.
 299
 300         Note that if the header appeared multiple times, exactly which
 301         occurrance gets returned is undefined.  Use getall() to get all
 302         the values matching a header field name.
 303         """
 304         return self.get(name)
 305
 306     def __setitem__(self, name, val):
 307         """Set the value of a header.
 308
 309         Note: this does not overwrite an existing header with the same field
 310         name.  Use __delitem__() first to delete any existing headers.
 311         """
 312         self._headers.append((name, val))
 313
 314     def __delitem__(self, name):
 315         """Delete all occurrences of a header, if present.
 316
 317         Does not raise an exception if the header is missing.
 318         """
 319         name = name.lower()
 320         newheaders = []
 321         for k, v in self._headers:
 322             if k.lower() <> name:
 323                 newheaders.append((k, v))
 324         self._headers = newheaders
 325
 326     def __contains__(self, name):
 327         return name.lower() in [k.lower() for k, v in self._headers]
 328
 329     def has_key(self, name):
 330         """Return true if the message contains the header."""
 331         missing = []
 332         return self.get(name, missing) is not missing
 333
 334     def keys(self):
 335         """Return a list of all the message's header field names.
 336
 337         These will be sorted in the order they appeared in the original
 338         message, or were added to the message, and may contain duplicates.
 339         Any fields deleted and re-inserted are always appended to the header
 340         list.
 341         """
 342         return [k for k, v in self._headers]
 343
 344     def values(self):
 345         """Return a list of all the message's header values.
 346
 347         These will be sorted in the order they appeared in the original
 348         message, or were added to the message, and may contain duplicates.
 349         Any fields deleted and re-inserted are always appended to the header
 350         list.
 351         """
 352         return [v for k, v in self._headers]
 353
 354     def items(self):
 355         """Get all the message's header fields and values.
 356
 357         These will be sorted in the order they appeared in the original
 358         message, or were added to the message, and may contain duplicates.
 359         Any fields deleted and re-inserted are always appended to the header
 360         list.
 361         """
 362         return self._headers[:]
 363
 364     def get(self, name, failobj=None):
 365         """Get a header value.
 366
 367         Like __getitem__() but return failobj instead of None when the field
 368         is missing.
 369         """
 370         name = name.lower()
 371         for k, v in self._headers:
 372             if k.lower() == name:
 373                 return v
 374         return failobj
 375
 376     #
 377     # Additional useful stuff
 378     #
 379
 380     def get_all(self, name, failobj=None):
 381         """Return a list of all the values for the named field.
 382
 383         These will be sorted in the order they appeared in the original
 384         message, and may contain duplicates.  Any fields deleted and
 385         re-inserted are always appended to the header list.
 386
 387         If no such fields exist, failobj is returned (defaults to None).
 388         """
 389         values = []
 390         name = name.lower()
 391         for k, v in self._headers:
 392             if k.lower() == name:
 393                 values.append(v)
 394         if not values:
 395             return failobj
 396         return values
 397
 398     def add_header(self, _name, _value, **_params):
 399         """Extended header setting.
 400
 401         name is the header field to add.  keyword arguments can be used to set
 402         additional parameters for the header field, with underscores converted
 403         to dashes.  Normally the parameter will be added as key="value" unless
 404         value is None, in which case only the key will be added.
 405
 406         Example:
 407
 408         msg.add_header('content-disposition', 'attachment', filename='bud.gif')
 409         """
 410         parts = []
 411         for k, v in _params.items():
 412             if v is None:
 413                 parts.append(k.replace('_', '-'))
 414             else:
 415                 parts.append(_formatparam(k.replace('_', '-'), v))
 416         if _value is not None:
 417             parts.insert(0, _value)
 418         self._headers.append((_name, SEMISPACE.join(parts)))
 419
 420     def replace_header(self, _name, _value):
 421         """Replace a header.
 422
 423         Replace the first matching header found in the message, retaining
 424         header order and case.  If no matching header was found, a KeyError is
 425         raised.
 426         """
 427         _name = _name.lower()
 428         for i, (k, v) in zip(range(len(self._headers)), self._headers):
 429             if k.lower() == _name:
 430                 self._headers[i] = (k, _value)
 431                 break
 432         else:
 433             raise KeyError, _name
 434
 435     #
 436     # These methods are silently deprecated in favor of get_content_type() and
 437     # friends (see below).  They will be noisily deprecated in email 3.0.
 438     #
 439
 440     def get_type(self, failobj=None):
 441         """Returns the message's content type.
 442
 443         The returned string is coerced to lowercase and returned as a single
 444         string of the form `maintype/subtype'.  If there was no Content-Type
 445         header in the message, failobj is returned (defaults to None).
 446         """
 447         missing = []
 448         value = self.get('content-type', missing)
 449         if value is missing:
 450             return failobj
 451         return paramre.split(value)[0].lower().strip()
 452
 453     def get_main_type(self, failobj=None):
 454         """Return the message's main content type if present."""
 455         missing = []
 456         ctype = self.get_type(missing)
 457         if ctype is missing:
 458             return failobj
 459         if ctype.count('/') <> 1:
 460             return failobj
 461         return ctype.split('/')[0]
 462
 463     def get_subtype(self, failobj=None):
 464         """Return the message's content subtype if present."""
 465         missing = []
 466         ctype = self.get_type(missing)
 467         if ctype is missing:
 468             return failobj
 469         if ctype.count('/') <> 1:
 470             return failobj
 471         return ctype.split('/')[1]
 472
 473     #
 474     # Use these three methods instead of the three above.
 475     #
 476
 477     def get_content_type(self):
 478         """Return the message's content type.
 479
 480         The returned string is coerced to lower case of the form
 481         `maintype/subtype'.  If there was no Content-Type header in the
 482         message, the default type as given by get_default_type() will be
 483         returned.  Since according to RFC 2045, messages always have a default
 484         type this will always return a value.
 485
 486         RFC 2045 defines a message's default type to be text/plain unless it
 487         appears inside a multipart/digest container, in which case it would be
 488         message/rfc822.
 489         """
 490         missing = []
 491         value = self.get('content-type', missing)
 492         if value is missing:
 493             # This should have no parameters
 494             return self.get_default_type()
 495         ctype = paramre.split(value)[0].lower().strip()
 496         # RFC 2045, section 5.2 says if its invalid, use text/plain
 497         if ctype.count('/') <> 1:
 498             return 'text/plain'
 499         return ctype
 500
 501     def get_content_maintype(self):
 502         """Return the message's main content type.
 503
 504         This is the `maintype' part of the string returned by
 505         get_content_type().
 506         """
 507         ctype = self.get_content_type()
 508         return ctype.split('/')[0]
 509
 510     def get_content_subtype(self):
 511         """Returns the message's sub-content type.
 512
 513         This is the `subtype' part of the string returned by
 514         get_content_type().
 515         """
 516         ctype = self.get_content_type()
 517         return ctype.split('/')[1]
 518
 519     def get_default_type(self):
 520         """Return the `default' content type.
 521
 522         Most messages have a default content type of text/plain, except for
 523         messages that are subparts of multipart/digest containers.  Such
 524         subparts have a default content type of message/rfc822.
 525         """
 526         return self._default_type
 527
 528     def set_default_type(self, ctype):
 529         """Set the `default' content type.
 530
 531         ctype should be either "text/plain" or "message/rfc822", although this
 532         is not enforced.  The default content type is not stored in the
 533         Content-Type header.
 534         """
 535         self._default_type = ctype
 536
 537     def _get_params_preserve(self, failobj, header):
 538         # Like get_params() but preserves the quoting of values.  BAW:
 539         # should this be part of the public interface?
 540         missing = []
 541         value = self.get(header, missing)
 542         if value is missing:
 543             return failobj
 544         params = []
 545         for p in _parseparam(';' + value):
 546             try:
 547                 name, val = p.split('=', 1)
 548                 name = name.strip()
 549                 val = val.strip()
 550             except ValueError:
 551                 # Must have been a bare attribute
 552                 name = p.strip()
 553                 val = ''
 554             params.append((name, val))
 555         params = Utils.decode_params(params)
 556         return params
 557
 558     def get_params(self, failobj=None, header='content-type', unquote=True):
 559         """Return the message's Content-Type parameters, as a list.
 560
 561         The elements of the returned list are 2-tuples of key/value pairs, as
 562         split on the `=' sign.  The left hand side of the `=' is the key,
 563         while the right hand side is the value.  If there is no `=' sign in
 564         the parameter the value is the empty string.  The value is as
 565         described in the get_param() method.
 566
 567         Optional failobj is the object to return if there is no Content-Type
 568         header.  Optional header is the header to search instead of
 569         Content-Type.  If unquote is True, the value is unquoted.
 570         """
 571         missing = []
 572         params = self._get_params_preserve(missing, header)
 573         if params is missing:
 574             return failobj
 575         if unquote:
 576             return [(k, _unquotevalue(v)) for k, v in params]
 577         else:
 578             return params
 579
 580     def get_param(self, param, failobj=None, header='content-type',
 581                   unquote=True):
 582         """Return the parameter value if found in the Content-Type header.
 583
 584         Optional failobj is the object to return if there is no Content-Type
 585         header, or the Content-Type header has no such parameter.  Optional
 586         header is the header to search instead of Content-Type.
 587
 588         Parameter keys are always compared case insensitively.  The return
 589         value can either be a string, or a 3-tuple if the parameter was RFC
 590         2231 encoded.  When it's a 3-tuple, the elements of the value are of
 591         the form (CHARSET, LANGUAGE, VALUE).  Note that both CHARSET and
 592         LANGUAGE can be None, in which case you should consider VALUE to be
 593         encoded in the us-ascii charset.  You can usually ignore LANGUAGE.
 594
 595         Your application should be prepared to deal with 3-tuple return
 596         values, and can convert the parameter to a Unicode string like so:
 597
 598             param = msg.get_param('foo')
 599             if isinstance(param, tuple):
 600                 param = unicode(param[2], param[0] or 'us-ascii')
 601
 602         In any case, the parameter value (either the returned string, or the
 603         VALUE item in the 3-tuple) is always unquoted, unless unquote is set
 604         to False.
 605         """
 606         if not self.has_key(header):
 607             return failobj
 608         for k, v in self._get_params_preserve(failobj, header):
 609             if k.lower() == param.lower():
 610                 if unquote:
 611                     return _unquotevalue(v)
 612                 else:
 613                     return v
 614         return failobj
 615
 616     def set_param(self, param, value, header='Content-Type', requote=True,
 617                   charset=None, language=''):
 618         """Set a parameter in the Content-Type header.
 619
 620         If the parameter already exists in the header, its value will be
 621         replaced with the new value.
 622
 623         If header is Content-Type and has not yet been defined for this
 624         message, it will be set to "text/plain" and the new parameter and
 625         value will be appended as per RFC 2045.
 626
 627         An alternate header can specified in the header argument, and all
 628         parameters will be quoted as necessary unless requote is False.
 629
 630         If charset is specified, the parameter will be encoded according to RFC
 631         2231.  Optional language specifies the RFC 2231 language, defaulting
 632         to the empty string.  Both charset and language should be strings.
 633         """
 634         if not isinstance(value, TupleType) and charset:
 635             value = (charset, language, value)
 636
 637         if not self.has_key(header) and header.lower() == 'content-type':
 638             ctype = 'text/plain'
 639         else:
 640             ctype = self.get(header)
 641         if not self.get_param(param, header=header):
 642             if not ctype:
 643                 ctype = _formatparam(param, value, requote)
 644             else:
 645                 ctype = SEMISPACE.join(
 646                     [ctype, _formatparam(param, value, requote)])
 647         else:
 648             ctype = ''
 649             for old_param, old_value in self.get_params(header=header,
 650                                                         unquote=requote):
 651                 append_param = ''
 652                 if old_param.lower() == param.lower():
 653                     append_param = _formatparam(param, value, requote)
 654                 else:
 655                     append_param = _formatparam(old_param, old_value, requote)
 656                 if not ctype:
 657                     ctype = append_param
 658                 else:
 659                     ctype = SEMISPACE.join([ctype, append_param])
 660         if ctype <> self.get(header):
 661             del self[header]
 662             self[header] = ctype
 663
 664     def del_param(self, param, header='content-type', requote=True):
 665         """Remove the given parameter completely from the Content-Type header.
 666
 667         The header will be re-written in place without the parameter or its
 668         value. All values will be quoted as necessary unless requote is
 669         False.  Optional header specifies an alternative to the Content-Type
 670         header.
 671         """
 672         if not self.has_key(header):
 673             return
 674         new_ctype = ''
 675         for p, v in self.get_params(header, unquote=requote):
 676             if p.lower() <> param.lower():
 677                 if not new_ctype:
 678                     new_ctype = _formatparam(p, v, requote)
 679                 else:
 680                     new_ctype = SEMISPACE.join([new_ctype,
 681                                                 _formatparam(p, v, requote)])
 682         if new_ctype <> self.get(header):
 683             del self[header]
 684             self[header] = new_ctype
 685
 686     def set_type(self, type, header='Content-Type', requote=True):
 687         """Set the main type and subtype for the Content-Type header.
 688
 689         type must be a string in the form "maintype/subtype", otherwise a
 690         ValueError is raised.
 691
 692         This method replaces the Content-Type header, keeping all the
 693         parameters in place.  If requote is False, this leaves the existing
 694         header's quoting as is.  Otherwise, the parameters will be quoted (the
 695         default).
 696
 697         An alternative header can be specified in the header argument.  When
 698         the Content-Type header is set, we'll always also add a MIME-Version
 699         header.
 700         """
 701         # BAW: should we be strict?
 702         if not type.count('/') == 1:
 703             raise ValueError
 704         # Set the Content-Type, you get a MIME-Version
 705         if header.lower() == 'content-type':
 706             del self['mime-version']
 707             self['MIME-Version'] = '1.0'
 708         if not self.has_key(header):
 709             self[header] = type
 710             return
 711         params = self.get_params(header, unquote=requote)
 712         del self[header]
 713         self[header] = type
 714         # Skip the first param; it's the old type.
 715         for p, v in params[1:]:
 716             self.set_param(p, v, header, requote)
 717
 718     def get_filename(self, failobj=None):
 719         """Return the filename associated with the payload if present.
 720
 721         The filename is extracted from the Content-Disposition header's
 722         `filename' parameter, and it is unquoted.
 723         """
 724         missing = []
 725         filename = self.get_param('filename', missing, 'content-disposition')
 726         if filename is missing:
 727             return failobj
 728         if isinstance(filename, TupleType):
 729             # It's an RFC 2231 encoded parameter
 730             newvalue = _unquotevalue(filename)
 731             return unicode(newvalue[2], newvalue[0] or 'us-ascii')
 732         else:
 733             newvalue = _unquotevalue(filename.strip())
 734             return newvalue
 735
 736     def get_boundary(self, failobj=None):
 737         """Return the boundary associated with the payload if present.
 738
 739         The boundary is extracted from the Content-Type header's `boundary'
 740         parameter, and it is unquoted.
 741         """
 742         missing = []
 743         boundary = self.get_param('boundary', missing)
 744         if boundary is missing:
 745             return failobj
 746         if isinstance(boundary, TupleType):
 747             # RFC 2231 encoded, so decode.  It better end up as ascii
 748             charset = boundary[0] or 'us-ascii'
 749             return unicode(boundary[2], charset).encode('us-ascii')
 750         return _unquotevalue(boundary.strip())
 751
 752     def set_boundary(self, boundary):
 753         """Set the boundary parameter in Content-Type to 'boundary'.
 754
 755         This is subtly different than deleting the Content-Type header and
 756         adding a new one with a new boundary parameter via add_header().  The
 757         main difference is that using the set_boundary() method preserves the
 758         order of the Content-Type header in the original message.
 759
 760         HeaderParseError is raised if the message has no Content-Type header.
 761         """
 762         missing = []
 763         params = self._get_params_preserve(missing, 'content-type')
 764         if params is missing:
 765             # There was no Content-Type header, and we don't know what type
 766             # to set it to, so raise an exception.
 767             raise Errors.HeaderParseError, 'No Content-Type header found'
 768         newparams = []
 769         foundp = False
 770         for pk, pv in params:
 771             if pk.lower() == 'boundary':
 772                 newparams.append(('boundary', '"%s"' % boundary))
 773                 foundp = True
 774             else:
 775                 newparams.append((pk, pv))
 776         if not foundp:
 777             # The original Content-Type header had no boundary attribute.
 778             # Tack one on the end.  BAW: should we raise an exception
 779             # instead???
 780             newparams.append(('boundary', '"%s"' % boundary))
 781         # Replace the existing Content-Type header with the new value
 782         newheaders = []
 783         for h, v in self._headers:
 784             if h.lower() == 'content-type':
 785                 parts = []
 786                 for k, v in newparams:
 787                     if v == '':
 788                         parts.append(k)
 789                     else:
 790                         parts.append('%s=%s' % (k, v))
 791                 newheaders.append((h, SEMISPACE.join(parts)))
 792
 793             else:
 794                 newheaders.append((h, v))
 795         self._headers = newheaders
 796
 797     try:
 798         from email._compat22 import walk
 799     except SyntaxError:
 800         # Must be using Python 2.1
 801         from email._compat21 import walk
 802
 803     def get_content_charset(self, failobj=None):
 804         """Return the charset parameter of the Content-Type header.
 805
 806         The returned string is always coerced to lower case.  If there is no
 807         Content-Type header, or if that header has no charset parameter,
 808         failobj is returned.
 809         """
 810         missing = []
 811         charset = self.get_param('charset', missing)
 812         if charset is missing:
 813             return failobj
 814         if isinstance(charset, TupleType):
 815             # RFC 2231 encoded, so decode it, and it better end up as ascii.
 816             pcharset = charset[0] or 'us-ascii'
 817             charset = unicode(charset[2], pcharset).encode('us-ascii')
 818         # RFC 2046, $4.1.2 says charsets are not case sensitive
 819         return charset.lower()
 820
 821     def get_charsets(self, failobj=None):
 822         """Return a list containing the charset(s) used in this message.
 823
 824         The returned list of items describes the Content-Type headers'
 825         charset parameter for this message and all the subparts in its
 826         payload.
 827
 828         Each item will either be a string (the value of the charset parameter
 829         in the Content-Type header of that part) or the value of the
 830         'failobj' parameter (defaults to None), if the part does not have a
 831         main MIME type of "text", or the charset is not defined.
 832
 833         The list will contain one string for each part of the message, plus
 834         one for the container message (i.e. self), so that a non-multipart
 835         message will still return a list of length 1.
 836         """
 837         return [part.get_content_charset(failobj) for part in self.walk()]