Lib/email/Generator.py

   1 # Copyright (C) 2001 Python Software Foundation
   2 # Author: barry@zope.com (Barry Warsaw)
   3
   4 """Classes to generate plain text from a message object tree.
   5 """
   6
   7 import time
   8 import re
   9 import random
  10
  11 from types import ListType, StringType
  12 from cStringIO import StringIO
  13
  14 # Intrapackage imports
  15 import Message
  16 import Errors
  17
  18 EMPTYSTRING = ''
  19 SEMISPACE = '; '
  20 BAR = '|'
  21 UNDERSCORE = '_'
  22 NL = '\n'
  23 NLTAB = '\n\t'
  24 SEMINLTAB = ';\n\t'
  25 SPACE8 = ' ' * 8
  26
  27 fcre = re.compile(r'^From ', re.MULTILINE)
  28
  29
  30 \f
  31 class Generator:
  32     """Generates output from a Message object tree.
  33
  34     This basic generator writes the message to the given file object as plain
  35     text.
  36     """
  37     #
  38     # Public interface
  39     #
  40
  41     def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):
  42         """Create the generator for message flattening.
  43
  44         outfp is the output file-like object for writing the message to.  It
  45         must have a write() method.
  46
  47         Optional mangle_from_ is a flag that, when true, escapes From_ lines
  48         in the body of the message by putting a `>' in front of them.
  49
  50         Optional maxheaderlen specifies the longest length for a non-continued
  51         header.  When a header line is longer (in characters, with tabs
  52         expanded to 8 spaces), than maxheaderlen, the header will be broken on
  53         semicolons and continued as per RFC 2822.  If no semicolon is found,
  54         then the header is left alone.  Set to zero to disable wrapping
  55         headers.  Default is 78, as recommended (but not required by RFC
  56         2822.
  57         """
  58         self._fp = outfp
  59         self._mangle_from_ = mangle_from_
  60         self.__first = 1
  61         self.__maxheaderlen = maxheaderlen
  62
  63     def write(self, s):
  64         # Just delegate to the file object
  65         self._fp.write(s)
  66
  67     def __call__(self, msg, unixfrom=0):
  68         """Print the message object tree rooted at msg to the output file
  69         specified when the Generator instance was created.
  70
  71         unixfrom is a flag that forces the printing of a Unix From_ delimiter
  72         before the first object in the message tree.  If the original message
  73         has no From_ delimiter, a `standard' one is crafted.  By default, this
  74         is 0 to inhibit the printing of any From_ delimiter.
  75
  76         Note that for subobjects, no From_ line is printed.
  77         """
  78         if unixfrom:
  79             ufrom = msg.get_unixfrom()
  80             if not ufrom:
  81                 ufrom = 'From nobody ' + time.ctime(time.time())
  82             print >> self._fp, ufrom
  83         self._write(msg)
  84
  85     #
  86     # Protected interface - undocumented ;/
  87     #
  88
  89     def _write(self, msg):
  90         # We can't write the headers yet because of the following scenario:
  91         # say a multipart message includes the boundary string somewhere in
  92         # its body.  We'd have to calculate the new boundary /before/ we write
  93         # the headers so that we can write the correct Content-Type:
  94         # parameter.
  95         #
  96         # The way we do this, so as to make the _handle_*() methods simpler,
  97         # is to cache any subpart writes into a StringIO.  The we write the
  98         # headers and the StringIO contents.  That way, subpart handlers can
  99         # Do The Right Thing, and can still modify the Content-Type: header if
 100         # necessary.
 101         oldfp = self._fp
 102         try:
 103             self._fp = sfp = StringIO()
 104             self._dispatch(msg)
 105         finally:
 106             self._fp = oldfp
 107         # Write the headers.  First we see if the message object wants to
 108         # handle that itself.  If not, we'll do it generically.
 109         meth = getattr(msg, '_write_headers', None)
 110         if meth is None:
 111             self._write_headers(msg)
 112         else:
 113             meth(self)
 114         self._fp.write(sfp.getvalue())
 115
 116     def _dispatch(self, msg):
 117         # Get the Content-Type: for the message, then try to dispatch to
 118         # self._handle_maintype_subtype().  If there's no handler for the full
 119         # MIME type, then dispatch to self._handle_maintype().  If that's
 120         # missing too, then dispatch to self._writeBody().
 121         ctype = msg.get_type()
 122         if ctype is None:
 123             # No Content-Type: header so try the default handler
 124             self._writeBody(msg)
 125         else:
 126             # We do have a Content-Type: header.
 127             specific = UNDERSCORE.join(ctype.split('/')).replace('-', '_')
 128             meth = getattr(self, '_handle_' + specific, None)
 129             if meth is None:
 130                 generic = msg.get_main_type().replace('-', '_')
 131                 meth = getattr(self, '_handle_' + generic, None)
 132                 if meth is None:
 133                     meth = self._writeBody
 134             meth(msg)
 135
 136     #
 137     # Default handlers
 138     #
 139
 140     def _write_headers(self, msg):
 141         for h, v in msg.items():
 142             # We only write the MIME-Version: header for the outermost
 143             # container message.  Unfortunately, we can't use same technique
 144             # as for the Unix-From above because we don't know when
 145             # MIME-Version: will occur.
 146             if h.lower() == 'mime-version' and not self.__first:
 147                 continue
 148             # RFC 2822 says that lines SHOULD be no more than maxheaderlen
 149             # characters wide, so we're well within our rights to split long
 150             # headers.
 151             text = '%s: %s' % (h, v)
 152             if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
 153                 text = self._split_header(text)
 154             print >> self._fp, text
 155         # A blank line always separates headers from body
 156         print >> self._fp
 157
 158     def _split_header(self, text):
 159         maxheaderlen = self.__maxheaderlen
 160         # Find out whether any lines in the header are really longer than
 161         # maxheaderlen characters wide.  There could be continuation lines
 162         # that actually shorten it.  Also, replace hard tabs with 8 spaces.
 163         lines = [s.replace('\t', SPACE8) for s in text.split('\n')]
 164         for line in lines:
 165             if len(line) > maxheaderlen:
 166                 break
 167         else:
 168             # No line was actually longer than maxheaderlen characters, so
 169             # just return the original unchanged.
 170             return text
 171         rtn = []
 172         for line in text.split('\n'):
 173             # Short lines can remain unchanged
 174             if len(line.replace('\t', SPACE8)) <= maxheaderlen:
 175                 rtn.append(line)
 176                 SEMINLTAB.join(rtn)
 177             else:
 178                 oldlen = len(text)
 179                 # Try to break the line on semicolons, but if that doesn't
 180                 # work, try to split on folding whitespace.
 181                 while len(text) > maxheaderlen:
 182                     i = text.rfind(';', 0, maxheaderlen)
 183                     if i < 0:
 184                         break
 185                     rtn.append(text[:i])
 186                     text = text[i+1:].lstrip()
 187                 if len(text) <> oldlen:
 188                     # Splitting on semis worked
 189                     rtn.append(text)
 190                     return SEMINLTAB.join(rtn)
 191                 # Splitting on semis didn't help, so try to split on
 192                 # whitespace.
 193                 parts = re.split(r'(\s+)', text)
 194                 # Watch out though for "Header: longnonsplittableline"
 195                 if parts[0].endswith(':') and len(parts) == 3:
 196                     return text
 197                 first = parts.pop(0)
 198                 sublines = [first]
 199                 acc = len(first)
 200                 while parts:
 201                     len0 = len(parts[0])
 202                     len1 = len(parts[1])
 203                     if acc + len0 + len1 < maxheaderlen:
 204                         sublines.append(parts.pop(0))
 205                         sublines.append(parts.pop(0))
 206                         acc += len0 + len1
 207                     else:
 208                         # Split it here, but don't forget to ignore the
 209                         # next whitespace-only part
 210                         rtn.append(EMPTYSTRING.join(sublines))
 211                         del parts[0]
 212                         first = parts.pop(0)
 213                         sublines = [first]
 214                         acc = len(first)
 215                 rtn.append(EMPTYSTRING.join(sublines))
 216                 return NLTAB.join(rtn)
 217
 218     #
 219     # Handlers for writing types and subtypes
 220     #
 221
 222     def _handle_text(self, msg):
 223         payload = msg.get_payload()
 224         if payload is None:
 225             return
 226         if not isinstance(payload, StringType):
 227             raise TypeError, 'string payload expected: %s' % type(payload)
 228         if self._mangle_from_:
 229             payload = fcre.sub('>From ', payload)
 230         self._fp.write(payload)
 231
 232     # Default body handler
 233     _writeBody = _handle_text
 234
 235     def _handle_multipart(self, msg, isdigest=0):
 236         # The trick here is to write out each part separately, merge them all
 237         # together, and then make sure that the boundary we've chosen isn't
 238         # present in the payload.
 239         msgtexts = []
 240         # BAW: kludge for broken add_payload() semantics; watch out for
 241         # multipart/* MIME types with None or scalar payloads.
 242         subparts = msg.get_payload()
 243         if subparts is None:
 244             # Nothing has every been attached
 245             boundary = msg.get_boundary(failobj=_make_boundary())
 246             print >> self._fp, '--' + boundary
 247             print >> self._fp, '\n'
 248             print >> self._fp, '--' + boundary + '--'
 249             return
 250         elif not isinstance(subparts, ListType):
 251             # Scalar payload
 252             subparts = [subparts]
 253         for part in subparts:
 254             s = StringIO()
 255             g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
 256             g(part, unixfrom=0)
 257             msgtexts.append(s.getvalue())
 258         # Now make sure the boundary we've selected doesn't appear in any of
 259         # the message texts.
 260         alltext = NL.join(msgtexts)
 261         # BAW: What about boundaries that are wrapped in double-quotes?
 262         boundary = msg.get_boundary(failobj=_make_boundary(alltext))
 263         # If we had to calculate a new boundary because the body text
 264         # contained that string, set the new boundary.  We don't do it
 265         # unconditionally because, while set_boundary() preserves order, it
 266         # doesn't preserve newlines/continuations in headers.  This is no big
 267         # deal in practice, but turns out to be inconvenient for the unittest
 268         # suite.
 269         if msg.get_boundary() <> boundary:
 270             msg.set_boundary(boundary)
 271         # Write out any preamble
 272         if msg.preamble is not None:
 273             self._fp.write(msg.preamble)
 274         # First boundary is a bit different; it doesn't have a leading extra
 275         # newline.
 276         print >> self._fp, '--' + boundary
 277         if isdigest:
 278             print >> self._fp
 279         # Join and write the individual parts
 280         joiner = '\n--' + boundary + '\n'
 281         if isdigest:
 282             # multipart/digest types effectively add an extra newline between
 283             # the boundary and the body part.
 284             joiner += '\n'
 285         self._fp.write(joiner.join(msgtexts))
 286         print >> self._fp, '\n--' + boundary + '--',
 287         # Write out any epilogue
 288         if msg.epilogue is not None:
 289             if not msg.epilogue.startswith('\n'):
 290                 print >> self._fp
 291             self._fp.write(msg.epilogue)
 292
 293     def _handle_multipart_digest(self, msg):
 294         self._handle_multipart(msg, isdigest=1)
 295
 296     def _handle_message_delivery_status(self, msg):
 297         # We can't just write the headers directly to self's file object
 298         # because this will leave an extra newline between the last header
 299         # block and the boundary.  Sigh.
 300         blocks = []
 301         for part in msg.get_payload():
 302             s = StringIO()
 303             g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
 304             g(part, unixfrom=0)
 305             text = s.getvalue()
 306             lines = text.split('\n')
 307             # Strip off the unnecessary trailing empty line
 308             if lines and lines[-1] == '':
 309                 blocks.append(NL.join(lines[:-1]))
 310             else:
 311                 blocks.append(text)
 312         # Now join all the blocks with an empty line.  This has the lovely
 313         # effect of separating each block with an empty line, but not adding
 314         # an extra one after the last one.
 315         self._fp.write(NL.join(blocks))
 316
 317     def _handle_message(self, msg):
 318         s = StringIO()
 319         g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
 320         # A message/rfc822 should contain a scalar payload which is another
 321         # Message object.  Extract that object, stringify it, and write that
 322         # out.
 323         g(msg.get_payload(), unixfrom=0)
 324         self._fp.write(s.getvalue())
 325
 326
 327 \f
 328 class DecodedGenerator(Generator):
 329     """Generator a text representation of a message.
 330
 331     Like the Generator base class, except that non-text parts are substituted
 332     with a format string representing the part.
 333     """
 334     def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):
 335         """Like Generator.__init__() except that an additional optional
 336         argument is allowed.
 337
 338         Walks through all subparts of a message.  If the subpart is of main
 339         type `text', then it prints the decoded payload of the subpart.
 340
 341         Otherwise, fmt is a format string that is used instead of the message
 342         payload.  fmt is expanded with the following keywords (in
 343         %(keyword)s format):
 344
 345         type       : Full MIME type of the non-text part
 346         maintype   : Main MIME type of the non-text part
 347         subtype    : Sub-MIME type of the non-text part
 348         filename   : Filename of the non-text part
 349         description: Description associated with the non-text part
 350         encoding   : Content transfer encoding of the non-text part
 351
 352         The default value for fmt is None, meaning
 353
 354         [Non-text (%(type)s) part of message omitted, filename %(filename)s]
 355         """
 356         Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
 357         if fmt is None:
 358             fmt = ('[Non-text (%(type)s) part of message omitted, '
 359                    'filename %(filename)s]')
 360         self._fmt = fmt
 361
 362     def _dispatch(self, msg):
 363         for part in msg.walk():
 364             maintype = part.get_main_type('text')
 365             if maintype == 'text':
 366                 print >> self, part.get_payload(decode=1)
 367             elif maintype == 'multipart':
 368                 # Just skip this
 369                 pass
 370             else:
 371                 print >> self, self._fmt % {
 372                     'type'       : part.get_type('[no MIME type]'),
 373                     'maintype'   : part.get_main_type('[no main MIME type]'),
 374                     'subtype'    : part.get_subtype('[no sub-MIME type]'),
 375                     'filename'   : part.get_filename('[no filename]'),
 376                     'description': part.get('Content-Description',
 377                                             '[no description]'),
 378                     'encoding'   : part.get('Content-Transfer-Encoding',
 379                                             '[no encoding]'),
 380                     }
 381
 382
 383 \f
 384 # Helper
 385 def _make_boundary(text=None):
 386     # Craft a random boundary.  If text is given, ensure that the chosen
 387     # boundary doesn't appear in the text.
 388     boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
 389     if text is None:
 390         return boundary
 391     b = boundary
 392     counter = 0
 393     while 1:
 394         cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
 395         if not cre.search(text):
 396             break
 397         b = boundary + '.' + str(counter)
 398         counter += 1
 399     return b