This commit was manufactured by cvs2svn to create tag 'r234c1'.
[python/dscho.git] / Lib / email / Generator.py
blob56d44ea521765ba8abfce58b33bb50bd65448430
1 # Copyright (C) 2001,2002 Python Software Foundation
2 # Author: barry@zope.com (Barry Warsaw)
4 """Classes to generate plain text from a message object tree.
5 """
7 import re
8 import sys
9 import time
10 import locale
11 import random
13 from types import ListType, StringType
14 from cStringIO import StringIO
16 from email.Header import Header
17 from email.Parser import NLCRE
19 try:
20 from email._compat22 import _isstring
21 except SyntaxError:
22 from email._compat21 import _isstring
24 try:
25 True, False
26 except NameError:
27 True = 1
28 False = 0
30 EMPTYSTRING = ''
31 SEMISPACE = '; '
32 BAR = '|'
33 UNDERSCORE = '_'
34 NL = '\n'
35 NLTAB = '\n\t'
36 SEMINLTAB = ';\n\t'
37 SPACE8 = ' ' * 8
39 fcre = re.compile(r'^From ', re.MULTILINE)
41 def _is8bitstring(s):
42 if isinstance(s, StringType):
43 try:
44 unicode(s, 'us-ascii')
45 except UnicodeError:
46 return True
47 return False
51 class Generator:
52 """Generates output from a Message object tree.
54 This basic generator writes the message to the given file object as plain
55 text.
56 """
58 # Public interface
61 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
62 """Create the generator for message flattening.
64 outfp is the output file-like object for writing the message to. It
65 must have a write() method.
67 Optional mangle_from_ is a flag that, when True (the default), escapes
68 From_ lines in the body of the message by putting a `>' in front of
69 them.
71 Optional maxheaderlen specifies the longest length for a non-continued
72 header. When a header line is longer (in characters, with tabs
73 expanded to 8 spaces) than maxheaderlen, the header will split as
74 defined in the Header class. Set maxheaderlen to zero to disable
75 header wrapping. The default is 78, as recommended (but not required)
76 by RFC 2822.
77 """
78 self._fp = outfp
79 self._mangle_from_ = mangle_from_
80 self.__maxheaderlen = maxheaderlen
82 def write(self, s):
83 # Just delegate to the file object
84 self._fp.write(s)
86 def flatten(self, msg, unixfrom=False):
87 """Print the message object tree rooted at msg to the output file
88 specified when the Generator instance was created.
90 unixfrom is a flag that forces the printing of a Unix From_ delimiter
91 before the first object in the message tree. If the original message
92 has no From_ delimiter, a `standard' one is crafted. By default, this
93 is False to inhibit the printing of any From_ delimiter.
95 Note that for subobjects, no From_ line is printed.
96 """
97 if unixfrom:
98 ufrom = msg.get_unixfrom()
99 if not ufrom:
100 ufrom = 'From nobody ' + time.ctime(time.time())
101 print >> self._fp, ufrom
102 self._write(msg)
104 # For backwards compatibility, but this is slower
105 __call__ = flatten
107 def clone(self, fp):
108 """Clone this generator with the exact same options."""
109 return self.__class__(fp, self._mangle_from_, self.__maxheaderlen)
112 # Protected interface - undocumented ;/
115 def _write(self, msg):
116 # We can't write the headers yet because of the following scenario:
117 # say a multipart message includes the boundary string somewhere in
118 # its body. We'd have to calculate the new boundary /before/ we write
119 # the headers so that we can write the correct Content-Type:
120 # parameter.
122 # The way we do this, so as to make the _handle_*() methods simpler,
123 # is to cache any subpart writes into a StringIO. The we write the
124 # headers and the StringIO contents. That way, subpart handlers can
125 # Do The Right Thing, and can still modify the Content-Type: header if
126 # necessary.
127 oldfp = self._fp
128 try:
129 self._fp = sfp = StringIO()
130 self._dispatch(msg)
131 finally:
132 self._fp = oldfp
133 # Write the headers. First we see if the message object wants to
134 # handle that itself. If not, we'll do it generically.
135 meth = getattr(msg, '_write_headers', None)
136 if meth is None:
137 self._write_headers(msg)
138 else:
139 meth(self)
140 self._fp.write(sfp.getvalue())
142 def _dispatch(self, msg):
143 # Get the Content-Type: for the message, then try to dispatch to
144 # self._handle_<maintype>_<subtype>(). If there's no handler for the
145 # full MIME type, then dispatch to self._handle_<maintype>(). If
146 # that's missing too, then dispatch to self._writeBody().
147 main = msg.get_content_maintype()
148 sub = msg.get_content_subtype()
149 specific = UNDERSCORE.join((main, sub)).replace('-', '_')
150 meth = getattr(self, '_handle_' + specific, None)
151 if meth is None:
152 generic = main.replace('-', '_')
153 meth = getattr(self, '_handle_' + generic, None)
154 if meth is None:
155 meth = self._writeBody
156 meth(msg)
159 # Default handlers
162 def _write_headers(self, msg):
163 for h, v in msg.items():
164 print >> self._fp, '%s:' % h,
165 if self.__maxheaderlen == 0:
166 # Explicit no-wrapping
167 print >> self._fp, v
168 elif isinstance(v, Header):
169 # Header instances know what to do
170 print >> self._fp, v.encode()
171 elif _is8bitstring(v):
172 # If we have raw 8bit data in a byte string, we have no idea
173 # what the encoding is. There is no safe way to split this
174 # string. If it's ascii-subset, then we could do a normal
175 # ascii split, but if it's multibyte then we could break the
176 # string. There's no way to know so the least harm seems to
177 # be to not split the string and risk it being too long.
178 print >> self._fp, v
179 else:
180 # Header's got lots of smarts, so use it.
181 print >> self._fp, Header(
182 v, maxlinelen=self.__maxheaderlen,
183 header_name=h, continuation_ws='\t').encode()
184 # A blank line always separates headers from body
185 print >> self._fp
188 # Handlers for writing types and subtypes
191 def _handle_text(self, msg):
192 payload = msg.get_payload()
193 if payload is None:
194 return
195 cset = msg.get_charset()
196 if cset is not None:
197 payload = cset.body_encode(payload)
198 if not _isstring(payload):
199 raise TypeError, 'string payload expected: %s' % type(payload)
200 if self._mangle_from_:
201 payload = fcre.sub('>From ', payload)
202 self._fp.write(payload)
204 # Default body handler
205 _writeBody = _handle_text
207 def _handle_multipart(self, msg):
208 # The trick here is to write out each part separately, merge them all
209 # together, and then make sure that the boundary we've chosen isn't
210 # present in the payload.
211 msgtexts = []
212 subparts = msg.get_payload()
213 if subparts is None:
214 # Nothing has ever been attached
215 boundary = msg.get_boundary(failobj=_make_boundary())
216 print >> self._fp, '--' + boundary
217 print >> self._fp, '\n'
218 print >> self._fp, '--' + boundary + '--'
219 return
220 elif _isstring(subparts):
221 # e.g. a non-strict parse of a message with no starting boundary.
222 self._fp.write(subparts)
223 return
224 elif not isinstance(subparts, ListType):
225 # Scalar payload
226 subparts = [subparts]
227 for part in subparts:
228 s = StringIO()
229 g = self.clone(s)
230 g.flatten(part, unixfrom=False)
231 msgtexts.append(s.getvalue())
232 # Now make sure the boundary we've selected doesn't appear in any of
233 # the message texts.
234 alltext = NL.join(msgtexts)
235 # BAW: What about boundaries that are wrapped in double-quotes?
236 boundary = msg.get_boundary(failobj=_make_boundary(alltext))
237 # If we had to calculate a new boundary because the body text
238 # contained that string, set the new boundary. We don't do it
239 # unconditionally because, while set_boundary() preserves order, it
240 # doesn't preserve newlines/continuations in headers. This is no big
241 # deal in practice, but turns out to be inconvenient for the unittest
242 # suite.
243 if msg.get_boundary() <> boundary:
244 msg.set_boundary(boundary)
245 # Write out any preamble
246 if msg.preamble is not None:
247 self._fp.write(msg.preamble)
248 # If preamble is the empty string, the length of the split will be
249 # 1, but the last element will be the empty string. If it's
250 # anything else but does not end in a line separator, the length
251 # will be > 1 and not end in an empty string. We need to
252 # guarantee a newline after the preamble, but don't add too many.
253 plines = NLCRE.split(msg.preamble)
254 if plines <> [''] and plines[-1] <> '':
255 self._fp.write('\n')
256 # First boundary is a bit different; it doesn't have a leading extra
257 # newline.
258 print >> self._fp, '--' + boundary
259 # Join and write the individual parts
260 joiner = '\n--' + boundary + '\n'
261 self._fp.write(joiner.join(msgtexts))
262 print >> self._fp, '\n--' + boundary + '--',
263 # Write out any epilogue
264 if msg.epilogue is not None:
265 if not msg.epilogue.startswith('\n'):
266 print >> self._fp
267 self._fp.write(msg.epilogue)
269 def _handle_message_delivery_status(self, msg):
270 # We can't just write the headers directly to self's file object
271 # because this will leave an extra newline between the last header
272 # block and the boundary. Sigh.
273 blocks = []
274 for part in msg.get_payload():
275 s = StringIO()
276 g = self.clone(s)
277 g.flatten(part, unixfrom=False)
278 text = s.getvalue()
279 lines = text.split('\n')
280 # Strip off the unnecessary trailing empty line
281 if lines and lines[-1] == '':
282 blocks.append(NL.join(lines[:-1]))
283 else:
284 blocks.append(text)
285 # Now join all the blocks with an empty line. This has the lovely
286 # effect of separating each block with an empty line, but not adding
287 # an extra one after the last one.
288 self._fp.write(NL.join(blocks))
290 def _handle_message(self, msg):
291 s = StringIO()
292 g = self.clone(s)
293 # The payload of a message/rfc822 part should be a multipart sequence
294 # of length 1. The zeroth element of the list should be the Message
295 # object for the subpart. Extract that object, stringify it, and
296 # write it out.
297 g.flatten(msg.get_payload(0), unixfrom=False)
298 self._fp.write(s.getvalue())
302 class DecodedGenerator(Generator):
303 """Generator a text representation of a message.
305 Like the Generator base class, except that non-text parts are substituted
306 with a format string representing the part.
308 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
309 """Like Generator.__init__() except that an additional optional
310 argument is allowed.
312 Walks through all subparts of a message. If the subpart is of main
313 type `text', then it prints the decoded payload of the subpart.
315 Otherwise, fmt is a format string that is used instead of the message
316 payload. fmt is expanded with the following keywords (in
317 %(keyword)s format):
319 type : Full MIME type of the non-text part
320 maintype : Main MIME type of the non-text part
321 subtype : Sub-MIME type of the non-text part
322 filename : Filename of the non-text part
323 description: Description associated with the non-text part
324 encoding : Content transfer encoding of the non-text part
326 The default value for fmt is None, meaning
328 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
330 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
331 if fmt is None:
332 fmt = ('[Non-text (%(type)s) part of message omitted, '
333 'filename %(filename)s]')
334 self._fmt = fmt
336 def _dispatch(self, msg):
337 for part in msg.walk():
338 maintype = part.get_main_type('text')
339 if maintype == 'text':
340 print >> self, part.get_payload(decode=True)
341 elif maintype == 'multipart':
342 # Just skip this
343 pass
344 else:
345 print >> self, self._fmt % {
346 'type' : part.get_type('[no MIME type]'),
347 'maintype' : part.get_main_type('[no main MIME type]'),
348 'subtype' : part.get_subtype('[no sub-MIME type]'),
349 'filename' : part.get_filename('[no filename]'),
350 'description': part.get('Content-Description',
351 '[no description]'),
352 'encoding' : part.get('Content-Transfer-Encoding',
353 '[no encoding]'),
358 # Helper
359 _width = len(repr(sys.maxint-1))
360 _fmt = '%%0%dd' % _width
362 def _make_boundary(text=None):
363 # Craft a random boundary. If text is given, ensure that the chosen
364 # boundary doesn't appear in the text.
365 token = random.randrange(sys.maxint)
366 boundary = ('=' * 15) + (_fmt % token) + '=='
367 if text is None:
368 return boundary
369 b = boundary
370 counter = 0
371 while True:
372 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
373 if not cre.search(text):
374 break
375 b = boundary + '.' + str(counter)
376 counter += 1
377 return b