2 * Copyright 2004 the mime4j project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 package org
.mime4j
.decoder
;
18 import java
.io
.ByteArrayInputStream
;
19 import java
.io
.ByteArrayOutputStream
;
20 import java
.io
.IOException
;
21 import java
.io
.UnsupportedEncodingException
;
23 import org
.apache
.commons
.logging
.Log
;
24 import org
.apache
.commons
.logging
.LogFactory
;
25 import org
.mime4j
.util
.CharsetUtil
;
28 * Static methods for decoding strings, byte arrays and encoded words.
30 * @author Niklas Therning
31 * @version $Id: DecoderUtil.java,v 1.3 2005/02/07 15:33:59 ntherning Exp $
33 public class DecoderUtil
{
34 private static Log log
= LogFactory
.getLog(DecoderUtil
.class);
37 * Decodes a string containing quoted-printable encoded data.
39 * @param s the string to decode.
40 * @return the decoded bytes.
42 public static byte[] decodeBaseQuotedPrintable(String s
) {
43 ByteArrayOutputStream baos
= new ByteArrayOutputStream();
46 byte[] bytes
= s
.getBytes("US-ASCII");
48 QuotedPrintableInputStream is
= new QuotedPrintableInputStream(
49 new ByteArrayInputStream(bytes
));
52 while ((b
= is
.read()) != -1) {
55 } catch (IOException e
) {
57 * This should never happen!
62 return baos
.toByteArray();
66 * Decodes a string containing base64 encoded data.
68 * @param s the string to decode.
69 * @return the decoded bytes.
71 public static byte[] decodeBase64(String s
) {
72 ByteArrayOutputStream baos
= new ByteArrayOutputStream();
75 byte[] bytes
= s
.getBytes("US-ASCII");
77 Base64InputStream is
= new Base64InputStream(
78 new ByteArrayInputStream(bytes
));
81 while ((b
= is
.read()) != -1) {
84 } catch (IOException e
) {
86 * This should never happen!
91 return baos
.toByteArray();
95 * Decodes an encoded word encoded with the 'B' encoding (described in
96 * RFC 2047) found in a header field body.
98 * @param encodedWord the encoded word to decode.
99 * @param charset the Java charset to use.
100 * @return the decoded string.
101 * @throws UnsupportedEncodingException if the given Java charset isn't
104 public static String
decodeB(String encodedWord
, String charset
)
105 throws UnsupportedEncodingException
{
107 return new String(decodeBase64(encodedWord
), charset
);
111 * Decodes an encoded word encoded with the 'Q' encoding (described in
112 * RFC 2047) found in a header field body.
114 * @param encodedWord the encoded word to decode.
115 * @param charset the Java charset to use.
116 * @return the decoded string.
117 * @throws UnsupportedEncodingException if the given Java charset isn't
120 public static String
decodeQ(String encodedWord
, String charset
)
121 throws UnsupportedEncodingException
{
126 StringBuffer sb
= new StringBuffer();
127 for (int i
= 0; i
< encodedWord
.length(); i
++) {
128 char c
= encodedWord
.charAt(i
);
136 return new String(decodeBaseQuotedPrintable(sb
.toString()), charset
);
140 * Decodes a string containing encoded words as defined by RFC 2047.
141 * Encoded words in have the form
142 * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
143 * quoted-printable and 'B' or 'b' for Base64.
145 * @param body the string to decode.
146 * @return the decoded string.
148 public static String
decodeEncodedWords(String body
) {
149 StringBuffer sb
= new StringBuffer();
157 * Encoded words in headers have the form
158 * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
159 * quoted printable and 'B' and 'b' for Base64
162 while (p2
< body
.length()) {
164 * Find beginning of first encoded word
166 p1
= body
.indexOf("=?", p2
);
169 * None found. Emit the rest of the header and exit.
171 sb
.append(body
.substring(p2
));
176 * p2 points to the previously found end marker or the start
177 * of the entire header text. Append the text between that
178 * marker and the one pointed to by p1.
181 sb
.append(body
.substring(p2
, p1
));
185 * Find the first and second '?':s after the marker pointed to
188 int t1
= body
.indexOf('?', p1
+ 2);
189 int t2
= t1
!= -1 ? body
.indexOf('?', t1
+ 1) : -1;
192 * Find this words end marker.
194 p2
= t2
!= -1 ? body
.indexOf("?=", t2
+ 1) : -1;
196 if (t2
!= -1 && body
.charAt(t2
+ 1) == '=') {
198 * The text "=?charset?enc?=" appears to be valid for
204 * No end marker was found. Append the rest of the
207 sb
.append(body
.substring(p1
));
213 * [p1+2, t1] -> charset
214 * [t1+1, t2] -> encoding
215 * [t2+1, p2] -> encoded word
218 String decodedWord
= null;
226 String mimeCharset
= body
.substring(p1
+ 2, t1
);
227 String enc
= body
.substring(t1
+ 1, t2
);
228 String encodedWord
= body
.substring(t2
+ 1, p2
);
231 * Convert the MIME charset to a corresponding Java one.
233 String charset
= CharsetUtil
.toJavaCharset(mimeCharset
);
234 if (charset
== null) {
235 decodedWord
= body
.substring(p1
, p2
+ 2);
236 if (log
.isWarnEnabled()) {
237 log
.warn("MIME charset '" + mimeCharset
238 + "' in header field doesn't have a "
239 +"corresponding Java charset");
241 } else if (!CharsetUtil
.isDecodingSupported(charset
)) {
242 decodedWord
= body
.substring(p1
, p2
+ 2);
243 if (log
.isWarnEnabled()) {
244 log
.warn("Current JDK doesn't support decoding "
245 + "of charset '" + charset
246 + "' (MIME charset '"
247 + mimeCharset
+ "')");
250 if (enc
.equalsIgnoreCase("Q")) {
251 decodedWord
= DecoderUtil
.decodeQ(encodedWord
, charset
);
252 } else if (enc
.equalsIgnoreCase("B")) {
253 decodedWord
= DecoderUtil
.decodeB(encodedWord
, charset
);
255 decodedWord
= encodedWord
;
256 if (log
.isWarnEnabled()) {
257 log
.warn("Warning: Unknown encoding in "
258 + "header field '" + enc
+ "'");
264 sb
.append(decodedWord
);
266 } catch (Throwable t
) {
267 log
.error("Decoding header field body '" + body
+ "'", t
);
270 return sb
.toString();