1 # Copyright (C) 2002 Python Software Foundation
2 # Author: che@debian.org (Ben Gertzfield)
4 """Header encoding and decoding functionality."""
7 import email
.quopriMIME
8 import email
.base64MIME
9 from email
.Charset
import Charset
12 from email
._compat
22 import _floordiv
14 # Python 2.1 spells integer division differently
15 from email
._compat
21 import _floordiv
26 # Match encoded-word strings in the form =?charset?q?Hello_World?=
27 ecre
= re
.compile(r
'''
29 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
31 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
33 (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
35 ''', re
.VERBOSE | re
.IGNORECASE
)
40 _max_append
= email
.quopriMIME
._max
_append
44 def decode_header(header
):
45 """Decode a message header value without converting charset.
47 Returns a list of (decoded_string, charset) pairs containing each of the
48 decoded parts of the header. Charset is None for non-encoded parts of the
49 header, otherwise a lower-case string containing the name of the character
50 set specified in the encoded string.
52 # If no encoding, just return the header
54 if not ecre
.search(header
):
55 return [(header
, None)]
59 for line
in header
.splitlines():
60 # This line might not have an encoding in it
61 if not ecre
.search(line
):
62 decoded
.append((line
, None))
65 parts
= ecre
.split(line
)
67 unenc
= parts
.pop(0).strip()
69 # Should we continue a long line?
70 if decoded
and decoded
[-1][1] is None:
71 decoded
[-1] = (decoded
[-1][0] + dec
, None)
73 decoded
.append((unenc
, None))
75 charset
, encoding
= [s
.lower() for s
in parts
[0:2]]
79 dec
= email
.quopriMIME
.header_decode(encoded
)
81 dec
= email
.base64MIME
.decode(encoded
)
85 if decoded
and decoded
[-1][1] == charset
:
86 decoded
[-1] = (decoded
[-1][0] + dec
, decoded
[-1][1])
88 decoded
.append((dec
, charset
))
95 def __init__(self
, s
, charset
=None, maxlinelen
=None, header_name
=None):
96 """Create a MIME-compliant header that can contain many languages.
98 Specify the initial header value in s. Specify its character set as a
99 Charset object in the charset argument. If none, a default Charset
100 instance will be used.
102 You can later append to the header with append(s, charset) below;
103 charset does not have to be the same as the one initially specified
104 here. In fact, it's optional, and if not given, defaults to the
105 charset specified in the constructor.
107 The maximum line length can be specified explicitly via maxlinelen.
108 You can also pass None for maxlinelen and the name of a header field
109 (e.g. "Subject") to let the constructor guess the best line length to
110 use. The default maxlinelen is 76.
114 self
._charset
= charset
115 # BAW: I believe `chunks' and `maxlinelen' should be non-public.
117 self
.append(s
, charset
)
118 if maxlinelen
is None:
119 if header_name
is None:
120 self
._maxlinelen
= MAXLINELEN
122 self
.guess_maxlinelen(header_name
)
124 self
._maxlinelen
= maxlinelen
127 """A synonym for self.encode()."""
130 def guess_maxlinelen(self
, s
=None):
131 """Guess the maximum length to make each header line.
133 Given a header name (e.g. "Subject"), set this header's maximum line
134 length to an appropriate length to avoid line wrapping. If s is not
135 given, return the previous maximum line length and don't set it.
137 Returns the new maximum line length.
139 # BAW: is this semantic necessary?
141 self
._maxlinelen
= MAXLINELEN
- len(s
) - 2
142 return self
._maxlinelen
144 def append(self
, s
, charset
=None):
145 """Append string s with Charset charset to the MIME header.
147 charset defaults to the one given in the class constructor.
150 charset
= self
._charset
151 self
._chunks
.append((s
, charset
))
153 def _split(self
, s
, charset
):
154 # Split up a header safely for use with encode_chunks. BAW: this
155 # appears to be a private convenience method.
156 splittable
= charset
.to_splittable(s
)
157 encoded
= charset
.from_splittable(splittable
)
158 elen
= charset
.encoded_header_len(encoded
)
160 if elen
<= self
._maxlinelen
:
161 return [(encoded
, charset
)]
162 # BAW: should we use encoded?
164 # We can split on _maxlinelen boundaries because we know that the
165 # encoding won't change the size of the string
166 splitpnt
= self
._maxlinelen
167 first
= charset
.from_splittable(splittable
[:splitpnt
], 0)
168 last
= charset
.from_splittable(splittable
[splitpnt
:], 0)
169 return self
._split
(first
, charset
) + self
._split
(last
, charset
)
171 # Divide and conquer.
172 halfway
= _floordiv(len(splittable
), 2)
173 first
= charset
.from_splittable(splittable
[:halfway
], 0)
174 last
= charset
.from_splittable(splittable
[halfway
:], 0)
175 return self
._split
(first
, charset
) + self
._split
(last
, charset
)
178 """Encode a message header, possibly converting charset and encoding.
180 There are many issues involved in converting a given string for use in
181 an email header. Only certain character sets are readable in most
182 email clients, and as header strings can only contain a subset of
183 7-bit ASCII, care must be taken to properly convert and encode (with
184 Base64 or quoted-printable) header strings. In addition, there is a
185 75-character length limit on any given encoded header field, so
186 line-wrapping must be performed, even with double-byte character sets.
188 This method will do its best to convert the string to the correct
189 character set used in email, and encode and line wrap it safely with
190 the appropriate scheme for that character set.
192 If the given charset is not known or an error occurs during
193 conversion, this function will return the header untouched.
196 for s
, charset
in self
._chunks
:
197 newchunks
+= self
._split
(s
, charset
)
198 self
._chunks
= newchunks
199 return self
.encode_chunks()
201 def encode_chunks(self
):
202 """MIME-encode a header with many different charsets and/or encodings.
204 Given a list of pairs (string, charset), return a MIME-encoded string
205 suitable for use in a header field. Each pair may have different
206 charsets and/or encodings, and the resulting header will accurately
207 reflect each setting.
209 Each encoding can be email.Utils.QP (quoted-printable, for ASCII-like
210 character sets like iso-8859-1), email.Utils.BASE64 (Base64, for
211 non-ASCII like character sets like KOI8-R and iso-2022-jp), or None
214 Each pair will be represented on a separate line; the resulting string
215 will be in the format:
217 "=?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
218 =?charset2?b?SvxyZ2VuIEL2aW5n?="
221 for header
, charset
in self
._chunks
:
223 _max_append(chunks
, header
, self
._maxlinelen
, ' ')
225 _max_append(chunks
, charset
.header_encode(header
, 0),
226 self
._maxlinelen
, ' ')
227 return NLSPACE
.join(chunks
)