1 # Copyright (C) 2001,2002 Python Software Foundation
2 # Author: barry@zope.com (Barry Warsaw)
4 """Classes to generate plain text from a message object tree.
13 from types
import ListType
, StringType
14 from cStringIO
import StringIO
16 from email
.Header
import Header
17 from email
.Parser
import NLCRE
20 from email
._compat
22 import _isstring
22 from email
._compat
21 import _isstring
39 fcre
= re
.compile(r
'^From ', re
.MULTILINE
)
42 if isinstance(s
, StringType
):
44 unicode(s
, 'us-ascii')
52 """Generates output from a Message object tree.
54 This basic generator writes the message to the given file object as plain
61 def __init__(self
, outfp
, mangle_from_
=True, maxheaderlen
=78):
62 """Create the generator for message flattening.
64 outfp is the output file-like object for writing the message to. It
65 must have a write() method.
67 Optional mangle_from_ is a flag that, when True (the default), escapes
68 From_ lines in the body of the message by putting a `>' in front of
71 Optional maxheaderlen specifies the longest length for a non-continued
72 header. When a header line is longer (in characters, with tabs
73 expanded to 8 spaces) than maxheaderlen, the header will split as
74 defined in the Header class. Set maxheaderlen to zero to disable
75 header wrapping. The default is 78, as recommended (but not required)
79 self
._mangle
_from
_ = mangle_from_
80 self
.__maxheaderlen
= maxheaderlen
83 # Just delegate to the file object
86 def flatten(self
, msg
, unixfrom
=False):
87 """Print the message object tree rooted at msg to the output file
88 specified when the Generator instance was created.
90 unixfrom is a flag that forces the printing of a Unix From_ delimiter
91 before the first object in the message tree. If the original message
92 has no From_ delimiter, a `standard' one is crafted. By default, this
93 is False to inhibit the printing of any From_ delimiter.
95 Note that for subobjects, no From_ line is printed.
98 ufrom
= msg
.get_unixfrom()
100 ufrom
= 'From nobody ' + time
.ctime(time
.time())
101 print >> self
._fp
, ufrom
104 # For backwards compatibility, but this is slower
108 """Clone this generator with the exact same options."""
109 return self
.__class
__(fp
, self
._mangle
_from
_, self
.__maxheaderlen
)
112 # Protected interface - undocumented ;/
115 def _write(self
, msg
):
116 # We can't write the headers yet because of the following scenario:
117 # say a multipart message includes the boundary string somewhere in
118 # its body. We'd have to calculate the new boundary /before/ we write
119 # the headers so that we can write the correct Content-Type:
122 # The way we do this, so as to make the _handle_*() methods simpler,
123 # is to cache any subpart writes into a StringIO. The we write the
124 # headers and the StringIO contents. That way, subpart handlers can
125 # Do The Right Thing, and can still modify the Content-Type: header if
129 self
._fp
= sfp
= StringIO()
133 # Write the headers. First we see if the message object wants to
134 # handle that itself. If not, we'll do it generically.
135 meth
= getattr(msg
, '_write_headers', None)
137 self
._write
_headers
(msg
)
140 self
._fp
.write(sfp
.getvalue())
142 def _dispatch(self
, msg
):
143 # Get the Content-Type: for the message, then try to dispatch to
144 # self._handle_<maintype>_<subtype>(). If there's no handler for the
145 # full MIME type, then dispatch to self._handle_<maintype>(). If
146 # that's missing too, then dispatch to self._writeBody().
147 main
= msg
.get_content_maintype()
148 sub
= msg
.get_content_subtype()
149 specific
= UNDERSCORE
.join((main
, sub
)).replace('-', '_')
150 meth
= getattr(self
, '_handle_' + specific
, None)
152 generic
= main
.replace('-', '_')
153 meth
= getattr(self
, '_handle_' + generic
, None)
155 meth
= self
._writeBody
162 def _write_headers(self
, msg
):
163 for h
, v
in msg
.items():
164 print >> self
._fp
, '%s:' % h
,
165 if self
.__maxheaderlen
== 0:
166 # Explicit no-wrapping
168 elif isinstance(v
, Header
):
169 # Header instances know what to do
170 print >> self
._fp
, v
.encode()
171 elif _is8bitstring(v
):
172 # If we have raw 8bit data in a byte string, we have no idea
173 # what the encoding is. There is no safe way to split this
174 # string. If it's ascii-subset, then we could do a normal
175 # ascii split, but if it's multibyte then we could break the
176 # string. There's no way to know so the least harm seems to
177 # be to not split the string and risk it being too long.
180 # Header's got lots of smarts, so use it.
181 print >> self
._fp
, Header(
182 v
, maxlinelen
=self
.__maxheaderlen
,
183 header_name
=h
, continuation_ws
='\t').encode()
184 # A blank line always separates headers from body
188 # Handlers for writing types and subtypes
191 def _handle_text(self
, msg
):
192 payload
= msg
.get_payload()
195 cset
= msg
.get_charset()
197 payload
= cset
.body_encode(payload
)
198 if not _isstring(payload
):
199 raise TypeError, 'string payload expected: %s' % type(payload
)
200 if self
._mangle
_from
_:
201 payload
= fcre
.sub('>From ', payload
)
202 self
._fp
.write(payload
)
204 # Default body handler
205 _writeBody
= _handle_text
207 def _handle_multipart(self
, msg
):
208 # The trick here is to write out each part separately, merge them all
209 # together, and then make sure that the boundary we've chosen isn't
210 # present in the payload.
212 subparts
= msg
.get_payload()
214 # Nothing has ever been attached
215 boundary
= msg
.get_boundary(failobj
=_make_boundary())
216 print >> self
._fp
, '--' + boundary
217 print >> self
._fp
, '\n'
218 print >> self
._fp
, '--' + boundary
+ '--'
220 elif _isstring(subparts
):
221 # e.g. a non-strict parse of a message with no starting boundary.
222 self
._fp
.write(subparts
)
224 elif not isinstance(subparts
, ListType
):
226 subparts
= [subparts
]
227 for part
in subparts
:
230 g
.flatten(part
, unixfrom
=False)
231 msgtexts
.append(s
.getvalue())
232 # Now make sure the boundary we've selected doesn't appear in any of
234 alltext
= NL
.join(msgtexts
)
235 # BAW: What about boundaries that are wrapped in double-quotes?
236 boundary
= msg
.get_boundary(failobj
=_make_boundary(alltext
))
237 # If we had to calculate a new boundary because the body text
238 # contained that string, set the new boundary. We don't do it
239 # unconditionally because, while set_boundary() preserves order, it
240 # doesn't preserve newlines/continuations in headers. This is no big
241 # deal in practice, but turns out to be inconvenient for the unittest
243 if msg
.get_boundary() <> boundary
:
244 msg
.set_boundary(boundary
)
245 # Write out any preamble
246 if msg
.preamble
is not None:
247 self
._fp
.write(msg
.preamble
)
248 # If preamble is the empty string, the length of the split will be
249 # 1, but the last element will be the empty string. If it's
250 # anything else but does not end in a line separator, the length
251 # will be > 1 and not end in an empty string. We need to
252 # guarantee a newline after the preamble, but don't add too many.
253 plines
= NLCRE
.split(msg
.preamble
)
254 if plines
<> [''] and plines
[-1] <> '':
256 # First boundary is a bit different; it doesn't have a leading extra
258 print >> self
._fp
, '--' + boundary
259 # Join and write the individual parts
260 joiner
= '\n--' + boundary
+ '\n'
261 self
._fp
.write(joiner
.join(msgtexts
))
262 print >> self
._fp
, '\n--' + boundary
+ '--',
263 # Write out any epilogue
264 if msg
.epilogue
is not None:
265 if not msg
.epilogue
.startswith('\n'):
267 self
._fp
.write(msg
.epilogue
)
269 def _handle_message_delivery_status(self
, msg
):
270 # We can't just write the headers directly to self's file object
271 # because this will leave an extra newline between the last header
272 # block and the boundary. Sigh.
274 for part
in msg
.get_payload():
277 g
.flatten(part
, unixfrom
=False)
279 lines
= text
.split('\n')
280 # Strip off the unnecessary trailing empty line
281 if lines
and lines
[-1] == '':
282 blocks
.append(NL
.join(lines
[:-1]))
285 # Now join all the blocks with an empty line. This has the lovely
286 # effect of separating each block with an empty line, but not adding
287 # an extra one after the last one.
288 self
._fp
.write(NL
.join(blocks
))
290 def _handle_message(self
, msg
):
293 # The payload of a message/rfc822 part should be a multipart sequence
294 # of length 1. The zeroth element of the list should be the Message
295 # object for the subpart. Extract that object, stringify it, and
297 g
.flatten(msg
.get_payload(0), unixfrom
=False)
298 self
._fp
.write(s
.getvalue())
302 class DecodedGenerator(Generator
):
303 """Generator a text representation of a message.
305 Like the Generator base class, except that non-text parts are substituted
306 with a format string representing the part.
308 def __init__(self
, outfp
, mangle_from_
=True, maxheaderlen
=78, fmt
=None):
309 """Like Generator.__init__() except that an additional optional
312 Walks through all subparts of a message. If the subpart is of main
313 type `text', then it prints the decoded payload of the subpart.
315 Otherwise, fmt is a format string that is used instead of the message
316 payload. fmt is expanded with the following keywords (in
319 type : Full MIME type of the non-text part
320 maintype : Main MIME type of the non-text part
321 subtype : Sub-MIME type of the non-text part
322 filename : Filename of the non-text part
323 description: Description associated with the non-text part
324 encoding : Content transfer encoding of the non-text part
326 The default value for fmt is None, meaning
328 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
330 Generator
.__init
__(self
, outfp
, mangle_from_
, maxheaderlen
)
332 fmt
= ('[Non-text (%(type)s) part of message omitted, '
333 'filename %(filename)s]')
336 def _dispatch(self
, msg
):
337 for part
in msg
.walk():
338 maintype
= part
.get_main_type('text')
339 if maintype
== 'text':
340 print >> self
, part
.get_payload(decode
=True)
341 elif maintype
== 'multipart':
345 print >> self
, self
._fmt
% {
346 'type' : part
.get_type('[no MIME type]'),
347 'maintype' : part
.get_main_type('[no main MIME type]'),
348 'subtype' : part
.get_subtype('[no sub-MIME type]'),
349 'filename' : part
.get_filename('[no filename]'),
350 'description': part
.get('Content-Description',
352 'encoding' : part
.get('Content-Transfer-Encoding',
359 _width
= len(repr(sys
.maxint
-1))
360 _fmt
= '%%0%dd' % _width
362 def _make_boundary(text
=None):
363 # Craft a random boundary. If text is given, ensure that the chosen
364 # boundary doesn't appear in the text.
365 token
= random
.randrange(sys
.maxint
)
366 boundary
= ('=' * 15) + (_fmt
% token
) + '=='
372 cre
= re
.compile('^--' + re
.escape(b
) + '(--)?$', re
.MULTILINE
)
373 if not cre
.search(text
):
375 b
= boundary
+ '.' + str(counter
)