Bump version to 0.9.1.
[python/dscho.git] / Lib / mimify.py
blob20b4d6c0873718d695b264b8ec3d928785ec4b8c
1 #! /usr/bin/env python
3 """Mimification and unmimification of mail messages.
5 Decode quoted-printable parts of a mail message or encode using
6 quoted-printable.
8 Usage:
9 mimify(input, output)
10 unmimify(input, output, decode_base64 = 0)
11 to encode and decode respectively. Input and output may be the name
12 of a file or an open file object. Only a readline() method is used
13 on the input file, only a write() method is used on the output file.
14 When using file names, the input and output file names may be the
15 same.
17 Interactive usage:
18 mimify.py -e [infile [outfile]]
19 mimify.py -d [infile [outfile]]
20 to encode and decode respectively. Infile defaults to standard
21 input and outfile to standard output.
22 """
24 # Configure
25 MAXLEN = 200 # if lines longer than this, encode as quoted-printable
26 CHARSET = 'ISO-8859-1' # default charset for non-US-ASCII mail
27 QUOTE = '> ' # string replies are quoted with
28 # End configure
30 import re, string
32 qp = re.compile('^content-transfer-encoding:\\s*quoted-printable', re.I)
33 base64_re = re.compile('^content-transfer-encoding:\\s*base64', re.I)
34 mp = re.compile('^content-type:.*multipart/.*boundary="?([^;"\n]*)', re.I|re.S)
35 chrset = re.compile('^(content-type:.*charset=")(us-ascii|iso-8859-[0-9]+)(".*)', re.I|re.S)
36 he = re.compile('^-*\n')
37 mime_code = re.compile('=([0-9a-f][0-9a-f])', re.I)
38 mime_head = re.compile('=\\?iso-8859-1\\?q\\?([^? \t\n]+)\\?=', re.I)
39 repl = re.compile('^subject:\\s+re: ', re.I)
41 class File:
42 """A simple fake file object that knows about limited read-ahead and
43 boundaries. The only supported method is readline()."""
45 def __init__(self, file, boundary):
46 self.file = file
47 self.boundary = boundary
48 self.peek = None
50 def readline(self):
51 if self.peek is not None:
52 return ''
53 line = self.file.readline()
54 if not line:
55 return line
56 if self.boundary:
57 if line == self.boundary + '\n':
58 self.peek = line
59 return ''
60 if line == self.boundary + '--\n':
61 self.peek = line
62 return ''
63 return line
65 class HeaderFile:
66 def __init__(self, file):
67 self.file = file
68 self.peek = None
70 def readline(self):
71 if self.peek is not None:
72 line = self.peek
73 self.peek = None
74 else:
75 line = self.file.readline()
76 if not line:
77 return line
78 if he.match(line):
79 return line
80 while 1:
81 self.peek = self.file.readline()
82 if len(self.peek) == 0 or \
83 (self.peek[0] != ' ' and self.peek[0] != '\t'):
84 return line
85 line = line + self.peek
86 self.peek = None
88 def mime_decode(line):
89 """Decode a single line of quoted-printable text to 8bit."""
90 newline = ''
91 pos = 0
92 while 1:
93 res = mime_code.search(line, pos)
94 if res is None:
95 break
96 newline = newline + line[pos:res.start(0)] + \
97 chr(string.atoi(res.group(1), 16))
98 pos = res.end(0)
99 return newline + line[pos:]
101 def mime_decode_header(line):
102 """Decode a header line to 8bit."""
103 newline = ''
104 pos = 0
105 while 1:
106 res = mime_head.search(line, pos)
107 if res is None:
108 break
109 match = res.group(1)
110 # convert underscores to spaces (before =XX conversion!)
111 match = string.join(string.split(match, '_'), ' ')
112 newline = newline + line[pos:res.start(0)] + mime_decode(match)
113 pos = res.end(0)
114 return newline + line[pos:]
116 def unmimify_part(ifile, ofile, decode_base64 = 0):
117 """Convert a quoted-printable part of a MIME mail message to 8bit."""
118 multipart = None
119 quoted_printable = 0
120 is_base64 = 0
121 is_repl = 0
122 if ifile.boundary and ifile.boundary[:2] == QUOTE:
123 prefix = QUOTE
124 else:
125 prefix = ''
127 # read header
128 hfile = HeaderFile(ifile)
129 while 1:
130 line = hfile.readline()
131 if not line:
132 return
133 if prefix and line[:len(prefix)] == prefix:
134 line = line[len(prefix):]
135 pref = prefix
136 else:
137 pref = ''
138 line = mime_decode_header(line)
139 if qp.match(line):
140 quoted_printable = 1
141 continue # skip this header
142 if decode_base64 and base64_re.match(line):
143 is_base64 = 1
144 continue
145 ofile.write(pref + line)
146 if not prefix and repl.match(line):
147 # we're dealing with a reply message
148 is_repl = 1
149 mp_res = mp.match(line)
150 if mp_res:
151 multipart = '--' + mp_res.group(1)
152 if he.match(line):
153 break
154 if is_repl and (quoted_printable or multipart):
155 is_repl = 0
157 # read body
158 while 1:
159 line = ifile.readline()
160 if not line:
161 return
162 line = re.sub(mime_head, '\\1', line)
163 if prefix and line[:len(prefix)] == prefix:
164 line = line[len(prefix):]
165 pref = prefix
166 else:
167 pref = ''
168 ## if is_repl and len(line) >= 4 and line[:4] == QUOTE+'--' and line[-3:] != '--\n':
169 ## multipart = line[:-1]
170 while multipart:
171 if line == multipart + '--\n':
172 ofile.write(pref + line)
173 multipart = None
174 line = None
175 break
176 if line == multipart + '\n':
177 ofile.write(pref + line)
178 nifile = File(ifile, multipart)
179 unmimify_part(nifile, ofile, decode_base64)
180 line = nifile.peek
181 if not line:
182 # premature end of file
183 break
184 continue
185 # not a boundary between parts
186 break
187 if line and quoted_printable:
188 while line[-2:] == '=\n':
189 line = line[:-2]
190 newline = ifile.readline()
191 if newline[:len(QUOTE)] == QUOTE:
192 newline = newline[len(QUOTE):]
193 line = line + newline
194 line = mime_decode(line)
195 if line and is_base64 and not pref:
196 import base64
197 line = base64.decodestring(line)
198 if line:
199 ofile.write(pref + line)
201 def unmimify(infile, outfile, decode_base64 = 0):
202 """Convert quoted-printable parts of a MIME mail message to 8bit."""
203 if type(infile) == type(''):
204 ifile = open(infile)
205 if type(outfile) == type('') and infile == outfile:
206 import os
207 d, f = os.path.split(infile)
208 os.rename(infile, os.path.join(d, ',' + f))
209 else:
210 ifile = infile
211 if type(outfile) == type(''):
212 ofile = open(outfile, 'w')
213 else:
214 ofile = outfile
215 nifile = File(ifile, None)
216 unmimify_part(nifile, ofile, decode_base64)
217 ofile.flush()
219 mime_char = re.compile('[=\177-\377]') # quote these chars in body
220 mime_header_char = re.compile('[=?\177-\377]') # quote these in header
222 def mime_encode(line, header):
223 """Code a single line as quoted-printable.
224 If header is set, quote some extra characters."""
225 if header:
226 reg = mime_header_char
227 else:
228 reg = mime_char
229 newline = ''
230 pos = 0
231 if len(line) >= 5 and line[:5] == 'From ':
232 # quote 'From ' at the start of a line for stupid mailers
233 newline = string.upper('=%02x' % ord('F'))
234 pos = 1
235 while 1:
236 res = reg.search(line, pos)
237 if res is None:
238 break
239 newline = newline + line[pos:res.start(0)] + \
240 string.upper('=%02x' % ord(res.group(0)))
241 pos = res.end(0)
242 line = newline + line[pos:]
244 newline = ''
245 while len(line) >= 75:
246 i = 73
247 while line[i] == '=' or line[i-1] == '=':
248 i = i - 1
249 i = i + 1
250 newline = newline + line[:i] + '=\n'
251 line = line[i:]
252 return newline + line
254 mime_header = re.compile('([ \t(]|^)([-a-zA-Z0-9_+]*[\177-\377][-a-zA-Z0-9_+\177-\377]*)([ \t)]|\n)')
256 def mime_encode_header(line):
257 """Code a single header line as quoted-printable."""
258 newline = ''
259 pos = 0
260 while 1:
261 res = mime_header.search(line, pos)
262 if res is None:
263 break
264 newline = '%s%s%s=?%s?Q?%s?=%s' % \
265 (newline, line[pos:res.start(0)], res.group(1),
266 CHARSET, mime_encode(res.group(2), 1), res.group(3))
267 pos = res.end(0)
268 return newline + line[pos:]
270 mv = re.compile('^mime-version:', re.I)
271 cte = re.compile('^content-transfer-encoding:', re.I)
272 iso_char = re.compile('[\177-\377]')
274 def mimify_part(ifile, ofile, is_mime):
275 """Convert an 8bit part of a MIME mail message to quoted-printable."""
276 has_cte = is_qp = is_base64 = 0
277 multipart = None
278 must_quote_body = must_quote_header = has_iso_chars = 0
280 header = []
281 header_end = ''
282 message = []
283 message_end = ''
284 # read header
285 hfile = HeaderFile(ifile)
286 while 1:
287 line = hfile.readline()
288 if not line:
289 break
290 if not must_quote_header and iso_char.search(line):
291 must_quote_header = 1
292 if mv.match(line):
293 is_mime = 1
294 if cte.match(line):
295 has_cte = 1
296 if qp.match(line):
297 is_qp = 1
298 elif base64_re.match(line):
299 is_base64 = 1
300 mp_res = mp.match(line)
301 if mp_res:
302 multipart = '--' + mp_res.group(1)
303 if he.match(line):
304 header_end = line
305 break
306 header.append(line)
308 # read body
309 while 1:
310 line = ifile.readline()
311 if not line:
312 break
313 if multipart:
314 if line == multipart + '--\n':
315 message_end = line
316 break
317 if line == multipart + '\n':
318 message_end = line
319 break
320 if is_base64:
321 message.append(line)
322 continue
323 if is_qp:
324 while line[-2:] == '=\n':
325 line = line[:-2]
326 newline = ifile.readline()
327 if newline[:len(QUOTE)] == QUOTE:
328 newline = newline[len(QUOTE):]
329 line = line + newline
330 line = mime_decode(line)
331 message.append(line)
332 if not has_iso_chars:
333 if iso_char.search(line):
334 has_iso_chars = must_quote_body = 1
335 if not must_quote_body:
336 if len(line) > MAXLEN:
337 must_quote_body = 1
339 # convert and output header and body
340 for line in header:
341 if must_quote_header:
342 line = mime_encode_header(line)
343 chrset_res = chrset.match(line)
344 if chrset_res:
345 if has_iso_chars:
346 # change us-ascii into iso-8859-1
347 if string.lower(chrset_res.group(2)) == 'us-ascii':
348 line = '%s%s%s' % (chrset_res.group(1),
349 CHARSET,
350 chrset_res.group(3))
351 else:
352 # change iso-8859-* into us-ascii
353 line = '%sus-ascii%s' % chrset_res.group(1, 3)
354 if has_cte and cte.match(line):
355 line = 'Content-Transfer-Encoding: '
356 if is_base64:
357 line = line + 'base64\n'
358 elif must_quote_body:
359 line = line + 'quoted-printable\n'
360 else:
361 line = line + '7bit\n'
362 ofile.write(line)
363 if (must_quote_header or must_quote_body) and not is_mime:
364 ofile.write('Mime-Version: 1.0\n')
365 ofile.write('Content-Type: text/plain; ')
366 if has_iso_chars:
367 ofile.write('charset="%s"\n' % CHARSET)
368 else:
369 ofile.write('charset="us-ascii"\n')
370 if must_quote_body and not has_cte:
371 ofile.write('Content-Transfer-Encoding: quoted-printable\n')
372 ofile.write(header_end)
374 for line in message:
375 if must_quote_body:
376 line = mime_encode(line, 0)
377 ofile.write(line)
378 ofile.write(message_end)
380 line = message_end
381 while multipart:
382 if line == multipart + '--\n':
383 # read bit after the end of the last part
384 while 1:
385 line = ifile.readline()
386 if not line:
387 return
388 if must_quote_body:
389 line = mime_encode(line, 0)
390 ofile.write(line)
391 if line == multipart + '\n':
392 nifile = File(ifile, multipart)
393 mimify_part(nifile, ofile, 1)
394 line = nifile.peek
395 if not line:
396 # premature end of file
397 break
398 ofile.write(line)
399 continue
400 # unexpectedly no multipart separator--copy rest of file
401 while 1:
402 line = ifile.readline()
403 if not line:
404 return
405 if must_quote_body:
406 line = mime_encode(line, 0)
407 ofile.write(line)
409 def mimify(infile, outfile):
410 """Convert 8bit parts of a MIME mail message to quoted-printable."""
411 if type(infile) == type(''):
412 ifile = open(infile)
413 if type(outfile) == type('') and infile == outfile:
414 import os
415 d, f = os.path.split(infile)
416 os.rename(infile, os.path.join(d, ',' + f))
417 else:
418 ifile = infile
419 if type(outfile) == type(''):
420 ofile = open(outfile, 'w')
421 else:
422 ofile = outfile
423 nifile = File(ifile, None)
424 mimify_part(nifile, ofile, 0)
425 ofile.flush()
427 import sys
428 if __name__ == '__main__' or (len(sys.argv) > 0 and sys.argv[0] == 'mimify'):
429 import getopt
430 usage = 'Usage: mimify [-l len] -[ed] [infile [outfile]]'
432 decode_base64 = 0
433 opts, args = getopt.getopt(sys.argv[1:], 'l:edb')
434 if len(args) not in (0, 1, 2):
435 print usage
436 sys.exit(1)
437 if (('-e', '') in opts) == (('-d', '') in opts) or \
438 ((('-b', '') in opts) and (('-d', '') not in opts)):
439 print usage
440 sys.exit(1)
441 for o, a in opts:
442 if o == '-e':
443 encode = mimify
444 elif o == '-d':
445 encode = unmimify
446 elif o == '-l':
447 try:
448 MAXLEN = string.atoi(a)
449 except:
450 print usage
451 sys.exit(1)
452 elif o == '-b':
453 decode_base64 = 1
454 if len(args) == 0:
455 encode_args = (sys.stdin, sys.stdout)
456 elif len(args) == 1:
457 encode_args = (args[0], sys.stdout)
458 else:
459 encode_args = (args[0], args[1])
460 if decode_base64:
461 encode_args = encode_args + (decode_base64,)
462 apply(encode, encode_args)