Lib/email/Generator.py

   1 # Copyright (C) 2001,2002 Python Software Foundation
   2 # Author: barry@zope.com (Barry Warsaw)
   3
   4 """Classes to generate plain text from a message object tree.
   5 """
   6
   7 import re
   8 import sys
   9 import time
  10 import locale
  11 import random
  12
  13 from types import ListType, StringType
  14 from cStringIO import StringIO
  15
  16 from email.Header import Header
  17 from email.Parser import NLCRE
  18
  19 try:
  20     from email._compat22 import _isstring
  21 except SyntaxError:
  22     from email._compat21 import _isstring
  23
  24 try:
  25     True, False
  26 except NameError:
  27     True = 1
  28     False = 0
  29
  30 EMPTYSTRING = ''
  31 SEMISPACE = '; '
  32 BAR = '|'
  33 UNDERSCORE = '_'
  34 NL = '\n'
  35 NLTAB = '\n\t'
  36 SEMINLTAB = ';\n\t'
  37 SPACE8 = ' ' * 8
  38
  39 fcre = re.compile(r'^From ', re.MULTILINE)
  40
  41 def _is8bitstring(s):
  42     if isinstance(s, StringType):
  43         try:
  44             unicode(s, 'us-ascii')
  45         except UnicodeError:
  46             return True
  47     return False
  48
  49
  50 \f
  51 class Generator:
  52     """Generates output from a Message object tree.
  53
  54     This basic generator writes the message to the given file object as plain
  55     text.
  56     """
  57     #
  58     # Public interface
  59     #
  60
  61     def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
  62         """Create the generator for message flattening.
  63
  64         outfp is the output file-like object for writing the message to.  It
  65         must have a write() method.
  66
  67         Optional mangle_from_ is a flag that, when True (the default), escapes
  68         From_ lines in the body of the message by putting a `>' in front of
  69         them.
  70
  71         Optional maxheaderlen specifies the longest length for a non-continued
  72         header.  When a header line is longer (in characters, with tabs
  73         expanded to 8 spaces) than maxheaderlen, the header will split as
  74         defined in the Header class.  Set maxheaderlen to zero to disable
  75         header wrapping.  The default is 78, as recommended (but not required)
  76         by RFC 2822.
  77         """
  78         self._fp = outfp
  79         self._mangle_from_ = mangle_from_
  80         self.__maxheaderlen = maxheaderlen
  81
  82     def write(self, s):
  83         # Just delegate to the file object
  84         self._fp.write(s)
  85
  86     def flatten(self, msg, unixfrom=False):
  87         """Print the message object tree rooted at msg to the output file
  88         specified when the Generator instance was created.
  89
  90         unixfrom is a flag that forces the printing of a Unix From_ delimiter
  91         before the first object in the message tree.  If the original message
  92         has no From_ delimiter, a `standard' one is crafted.  By default, this
  93         is False to inhibit the printing of any From_ delimiter.
  94
  95         Note that for subobjects, no From_ line is printed.
  96         """
  97         if unixfrom:
  98             ufrom = msg.get_unixfrom()
  99             if not ufrom:
 100                 ufrom = 'From nobody ' + time.ctime(time.time())
 101             print >> self._fp, ufrom
 102         self._write(msg)
 103
 104     # For backwards compatibility, but this is slower
 105     __call__ = flatten
 106
 107     def clone(self, fp):
 108         """Clone this generator with the exact same options."""
 109         return self.__class__(fp, self._mangle_from_, self.__maxheaderlen)
 110
 111     #
 112     # Protected interface - undocumented ;/
 113     #
 114
 115     def _write(self, msg):
 116         # We can't write the headers yet because of the following scenario:
 117         # say a multipart message includes the boundary string somewhere in
 118         # its body.  We'd have to calculate the new boundary /before/ we write
 119         # the headers so that we can write the correct Content-Type:
 120         # parameter.
 121         #
 122         # The way we do this, so as to make the _handle_*() methods simpler,
 123         # is to cache any subpart writes into a StringIO.  The we write the
 124         # headers and the StringIO contents.  That way, subpart handlers can
 125         # Do The Right Thing, and can still modify the Content-Type: header if
 126         # necessary.
 127         oldfp = self._fp
 128         try:
 129             self._fp = sfp = StringIO()
 130             self._dispatch(msg)
 131         finally:
 132             self._fp = oldfp
 133         # Write the headers.  First we see if the message object wants to
 134         # handle that itself.  If not, we'll do it generically.
 135         meth = getattr(msg, '_write_headers', None)
 136         if meth is None:
 137             self._write_headers(msg)
 138         else:
 139             meth(self)
 140         self._fp.write(sfp.getvalue())
 141
 142     def _dispatch(self, msg):
 143         # Get the Content-Type: for the message, then try to dispatch to
 144         # self._handle_<maintype>_<subtype>().  If there's no handler for the
 145         # full MIME type, then dispatch to self._handle_<maintype>().  If
 146         # that's missing too, then dispatch to self._writeBody().
 147         main = msg.get_content_maintype()
 148         sub = msg.get_content_subtype()
 149         specific = UNDERSCORE.join((main, sub)).replace('-', '_')
 150         meth = getattr(self, '_handle_' + specific, None)
 151         if meth is None:
 152             generic = main.replace('-', '_')
 153             meth = getattr(self, '_handle_' + generic, None)
 154             if meth is None:
 155                 meth = self._writeBody
 156         meth(msg)
 157
 158     #
 159     # Default handlers
 160     #
 161
 162     def _write_headers(self, msg):
 163         for h, v in msg.items():
 164             print >> self._fp, '%s:' % h,
 165             if self.__maxheaderlen == 0:
 166                 # Explicit no-wrapping
 167                 print >> self._fp, v
 168             elif isinstance(v, Header):
 169                 # Header instances know what to do
 170                 print >> self._fp, v.encode()
 171             elif _is8bitstring(v):
 172                 # If we have raw 8bit data in a byte string, we have no idea
 173                 # what the encoding is.  There is no safe way to split this
 174                 # string.  If it's ascii-subset, then we could do a normal
 175                 # ascii split, but if it's multibyte then we could break the
 176                 # string.  There's no way to know so the least harm seems to
 177                 # be to not split the string and risk it being too long.
 178                 print >> self._fp, v
 179             else:
 180                 # Header's got lots of smarts, so use it.
 181                 print >> self._fp, Header(
 182                     v, maxlinelen=self.__maxheaderlen,
 183                     header_name=h, continuation_ws='\t').encode()
 184         # A blank line always separates headers from body
 185         print >> self._fp
 186
 187     #
 188     # Handlers for writing types and subtypes
 189     #
 190
 191     def _handle_text(self, msg):
 192         payload = msg.get_payload()
 193         if payload is None:
 194             return
 195         cset = msg.get_charset()
 196         if cset is not None:
 197             payload = cset.body_encode(payload)
 198         if not _isstring(payload):
 199             raise TypeError, 'string payload expected: %s' % type(payload)
 200         if self._mangle_from_:
 201             payload = fcre.sub('>From ', payload)
 202         self._fp.write(payload)
 203
 204     # Default body handler
 205     _writeBody = _handle_text
 206
 207     def _handle_multipart(self, msg):
 208         # The trick here is to write out each part separately, merge them all
 209         # together, and then make sure that the boundary we've chosen isn't
 210         # present in the payload.
 211         msgtexts = []
 212         subparts = msg.get_payload()
 213         if subparts is None:
 214             # Nothing has ever been attached
 215             boundary = msg.get_boundary(failobj=_make_boundary())
 216             print >> self._fp, '--' + boundary
 217             print >> self._fp, '\n'
 218             print >> self._fp, '--' + boundary + '--'
 219             return
 220         elif _isstring(subparts):
 221             # e.g. a non-strict parse of a message with no starting boundary.
 222             self._fp.write(subparts)
 223             return
 224         elif not isinstance(subparts, ListType):
 225             # Scalar payload
 226             subparts = [subparts]
 227         for part in subparts:
 228             s = StringIO()
 229             g = self.clone(s)
 230             g.flatten(part, unixfrom=False)
 231             msgtexts.append(s.getvalue())
 232         # Now make sure the boundary we've selected doesn't appear in any of
 233         # the message texts.
 234         alltext = NL.join(msgtexts)
 235         # BAW: What about boundaries that are wrapped in double-quotes?
 236         boundary = msg.get_boundary(failobj=_make_boundary(alltext))
 237         # If we had to calculate a new boundary because the body text
 238         # contained that string, set the new boundary.  We don't do it
 239         # unconditionally because, while set_boundary() preserves order, it
 240         # doesn't preserve newlines/continuations in headers.  This is no big
 241         # deal in practice, but turns out to be inconvenient for the unittest
 242         # suite.
 243         if msg.get_boundary() <> boundary:
 244             msg.set_boundary(boundary)
 245         # Write out any preamble
 246         if msg.preamble is not None:
 247             self._fp.write(msg.preamble)
 248             # If preamble is the empty string, the length of the split will be
 249             # 1, but the last element will be the empty string.  If it's
 250             # anything else but does not end in a line separator, the length
 251             # will be > 1 and not end in an empty string.  We need to
 252             # guarantee a newline after the preamble, but don't add too many.
 253             plines = NLCRE.split(msg.preamble)
 254             if plines <> [''] and plines[-1] <> '':
 255                 self._fp.write('\n')
 256         # First boundary is a bit different; it doesn't have a leading extra
 257         # newline.
 258         print >> self._fp, '--' + boundary
 259         # Join and write the individual parts
 260         joiner = '\n--' + boundary + '\n'
 261         self._fp.write(joiner.join(msgtexts))
 262         print >> self._fp, '\n--' + boundary + '--',
 263         # Write out any epilogue
 264         if msg.epilogue is not None:
 265             if not msg.epilogue.startswith('\n'):
 266                 print >> self._fp
 267             self._fp.write(msg.epilogue)
 268
 269     def _handle_message_delivery_status(self, msg):
 270         # We can't just write the headers directly to self's file object
 271         # because this will leave an extra newline between the last header
 272         # block and the boundary.  Sigh.
 273         blocks = []
 274         for part in msg.get_payload():
 275             s = StringIO()
 276             g = self.clone(s)
 277             g.flatten(part, unixfrom=False)
 278             text = s.getvalue()
 279             lines = text.split('\n')
 280             # Strip off the unnecessary trailing empty line
 281             if lines and lines[-1] == '':
 282                 blocks.append(NL.join(lines[:-1]))
 283             else:
 284                 blocks.append(text)
 285         # Now join all the blocks with an empty line.  This has the lovely
 286         # effect of separating each block with an empty line, but not adding
 287         # an extra one after the last one.
 288         self._fp.write(NL.join(blocks))
 289
 290     def _handle_message(self, msg):
 291         s = StringIO()
 292         g = self.clone(s)
 293         # The payload of a message/rfc822 part should be a multipart sequence
 294         # of length 1.  The zeroth element of the list should be the Message
 295         # object for the subpart.  Extract that object, stringify it, and
 296         # write it out.
 297         g.flatten(msg.get_payload(0), unixfrom=False)
 298         self._fp.write(s.getvalue())
 299
 300
 301 \f
 302 class DecodedGenerator(Generator):
 303     """Generator a text representation of a message.
 304
 305     Like the Generator base class, except that non-text parts are substituted
 306     with a format string representing the part.
 307     """
 308     def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
 309         """Like Generator.__init__() except that an additional optional
 310         argument is allowed.
 311
 312         Walks through all subparts of a message.  If the subpart is of main
 313         type `text', then it prints the decoded payload of the subpart.
 314
 315         Otherwise, fmt is a format string that is used instead of the message
 316         payload.  fmt is expanded with the following keywords (in
 317         %(keyword)s format):
 318
 319         type       : Full MIME type of the non-text part
 320         maintype   : Main MIME type of the non-text part
 321         subtype    : Sub-MIME type of the non-text part
 322         filename   : Filename of the non-text part
 323         description: Description associated with the non-text part
 324         encoding   : Content transfer encoding of the non-text part
 325
 326         The default value for fmt is None, meaning
 327
 328         [Non-text (%(type)s) part of message omitted, filename %(filename)s]
 329         """
 330         Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
 331         if fmt is None:
 332             fmt = ('[Non-text (%(type)s) part of message omitted, '
 333                    'filename %(filename)s]')
 334         self._fmt = fmt
 335
 336     def _dispatch(self, msg):
 337         for part in msg.walk():
 338             maintype = part.get_main_type('text')
 339             if maintype == 'text':
 340                 print >> self, part.get_payload(decode=True)
 341             elif maintype == 'multipart':
 342                 # Just skip this
 343                 pass
 344             else:
 345                 print >> self, self._fmt % {
 346                     'type'       : part.get_type('[no MIME type]'),
 347                     'maintype'   : part.get_main_type('[no main MIME type]'),
 348                     'subtype'    : part.get_subtype('[no sub-MIME type]'),
 349                     'filename'   : part.get_filename('[no filename]'),
 350                     'description': part.get('Content-Description',
 351                                             '[no description]'),
 352                     'encoding'   : part.get('Content-Transfer-Encoding',
 353                                             '[no encoding]'),
 354                     }
 355
 356
 357 \f
 358 # Helper
 359 _width = len(repr(sys.maxint-1))
 360 _fmt = '%%0%dd' % _width
 361
 362 def _make_boundary(text=None):
 363     # Craft a random boundary.  If text is given, ensure that the chosen
 364     # boundary doesn't appear in the text.
 365     token = random.randrange(sys.maxint)
 366     boundary = ('=' * 15) + (_fmt % token) + '=='
 367     if text is None:
 368         return boundary
 369     b = boundary
 370     counter = 0
 371     while True:
 372         cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
 373         if not cre.search(text):
 374             break
 375         b = boundary + '.' + str(counter)
 376         counter += 1
 377     return b