Results of a rewrite pass
[python/dscho.git] / Lib / email / base64MIME.py
blob56e44e1c2c62d5b0d86e7e015123452463575cab
1 # Copyright (C) 2002 Python Software Foundation
2 # Author: che@debian.org (Ben Gertzfield)
4 """Base64 content transfer encoding per RFCs 2045-2047.
6 This module handles the content transfer encoding method defined in RFC 2045
7 to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
8 characters encoding known as Base64.
10 It is used in the MIME standards for email to attach images, audio, and text
11 using some 8-bit character sets to messages.
13 This module provides an interface to encode and decode both headers and bodies
14 with Base64 encoding.
16 RFC 2045 defines a method for including character set information in an
17 `encoded-word' in a header. This method is commonly used for 8-bit real names
18 in To:, From:, Cc:, etc. fields, as well as Subject: lines.
20 This module does not do the line wrapping or end-of-line character conversion
21 necessary for proper internationalized headers; it only does dumb encoding and
22 decoding. To deal with the various line wrapping issues, use the email.Header
23 module.
24 """
26 import re
27 from binascii import b2a_base64, a2b_base64
28 from email.Utils import fix_eols
30 try:
31 from email._compat22 import _floordiv
32 except SyntaxError:
33 # Python 2.1 spells integer division differently
34 from email._compat21 import _floordiv
37 CRLF = '\r\n'
38 NL = '\n'
39 EMPTYSTRING = ''
41 # See also Charset.py
42 MISC_LEN = 7
44 try:
45 True, False
46 except NameError:
47 True = 1
48 False = 0
52 # Helpers
53 def base64_len(s):
54 """Return the length of s when it is encoded with base64."""
55 groups_of_3, leftover = divmod(len(s), 3)
56 # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
57 # Thanks, Tim!
58 n = groups_of_3 * 4
59 if leftover:
60 n += 4
61 return n
65 def header_encode(header, charset='iso-8859-1', keep_eols=False,
66 maxlinelen=76, eol=NL):
67 """Encode a single header line with Base64 encoding in a given charset.
69 Defined in RFC 2045, this Base64 encoding is identical to normal Base64
70 encoding, except that each line must be intelligently wrapped (respecting
71 the Base64 encoding), and subsequent lines must start with a space.
73 charset names the character set to use to encode the header. It defaults
74 to iso-8859-1.
76 End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
77 to the canonical email line separator \\r\\n unless the keep_eols
78 parameter is True (the default is False).
80 Each line of the header will be terminated in the value of eol, which
81 defaults to "\\n". Set this to "\\r\\n" if you are using the result of
82 this function directly in email.
84 The resulting string will be in the form:
86 "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
87 =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
89 with each line wrapped at, at most, maxlinelen characters (defaults to 76
90 characters).
91 """
92 # Return empty headers unchanged
93 if not header:
94 return header
96 if not keep_eols:
97 header = fix_eols(header)
99 # Base64 encode each line, in encoded chunks no greater than maxlinelen in
100 # length, after the RFC chrome is added in.
101 base64ed = []
102 max_encoded = maxlinelen - len(charset) - MISC_LEN
103 max_unencoded = _floordiv(max_encoded * 3, 4)
105 # BAW: Ben's original code used a step of max_unencoded, but I think it
106 # ought to be max_encoded. Otherwise, where's max_encoded used? I'm
107 # still not sure what the
108 for i in range(0, len(header), max_unencoded):
109 base64ed.append(b2a_base64(header[i:i+max_unencoded]))
111 # Now add the RFC chrome to each encoded chunk
112 lines = []
113 for line in base64ed:
114 # Ignore the last character of each line if it is a newline
115 if line.endswith(NL):
116 line = line[:-1]
117 # Add the chrome
118 lines.append('=?%s?b?%s?=' % (charset, line))
119 # Glue the lines together and return it. BAW: should we be able to
120 # specify the leading whitespace in the joiner?
121 joiner = eol + ' '
122 return joiner.join(lines)
126 def encode(s, binary=True, maxlinelen=76, eol=NL):
127 """Encode a string with base64.
129 Each line will be wrapped at, at most, maxlinelen characters (defaults to
130 76 characters).
132 If binary is False, end-of-line characters will be converted to the
133 canonical email end-of-line sequence \\r\\n. Otherwise they will be left
134 verbatim (this is the default).
136 Each line of encoded text will end with eol, which defaults to "\\n". Set
137 this to "\r\n" if you will be using the result of this function directly
138 in an email.
140 if not s:
141 return s
143 if not binary:
144 s = fix_eols(s)
146 encvec = []
147 max_unencoded = _floordiv(maxlinelen * 3, 4)
148 for i in range(0, len(s), max_unencoded):
149 # BAW: should encode() inherit b2a_base64()'s dubious behavior in
150 # adding a newline to the encoded string?
151 enc = b2a_base64(s[i:i + max_unencoded])
152 if enc.endswith(NL) and eol <> NL:
153 enc = enc[:-1] + eol
154 encvec.append(enc)
155 return EMPTYSTRING.join(encvec)
158 # For convenience and backwards compatibility w/ standard base64 module
159 body_encode = encode
160 encodestring = encode
164 def decode(s, convert_eols=None):
165 """Decode a raw base64 string.
167 If convert_eols is set to a string value, all canonical email linefeeds,
168 e.g. "\\r\\n", in the decoded text will be converted to the value of
169 convert_eols. os.linesep is a good choice for convert_eols if you are
170 decoding a text attachment.
172 This function does not parse a full MIME header value encoded with
173 base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
174 level email.Header class for that functionality.
176 if not s:
177 return s
179 dec = a2b_base64(s)
180 if convert_eols:
181 return dec.replace(CRLF, convert_eols)
182 return dec
185 # For convenience and backwards compatibility w/ standard base64 module
186 body_decode = decode
187 decodestring = decode