Fixed bug causing OutOfMemoryError when decoding strings which contain =? but are...
[mime4j.git] / src / java / org / mime4j / decoder / DecoderUtil.java
blobf585f114469cb0392385e8ea7565c96cc1eae780
1 /*
2 * Copyright 2004 the mime4j project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 package org.mime4j.decoder;
18 import java.io.ByteArrayInputStream;
19 import java.io.ByteArrayOutputStream;
20 import java.io.IOException;
21 import java.io.UnsupportedEncodingException;
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.mime4j.util.CharsetUtil;
27 /**
28 * Static methods for decoding strings, byte arrays and encoded words.
30 * @author Niklas Therning
31 * @version $Id: DecoderUtil.java,v 1.3 2005/02/07 15:33:59 ntherning Exp $
33 public class DecoderUtil {
34 private static Log log = LogFactory.getLog(DecoderUtil.class);
36 /**
37 * Decodes a string containing quoted-printable encoded data.
39 * @param s the string to decode.
40 * @return the decoded bytes.
42 public static byte[] decodeBaseQuotedPrintable(String s) {
43 ByteArrayOutputStream baos = new ByteArrayOutputStream();
45 try {
46 byte[] bytes = s.getBytes("US-ASCII");
48 QuotedPrintableInputStream is = new QuotedPrintableInputStream(
49 new ByteArrayInputStream(bytes));
51 int b = 0;
52 while ((b = is.read()) != -1) {
53 baos.write(b);
55 } catch (IOException e) {
57 * This should never happen!
59 log.error(e);
62 return baos.toByteArray();
65 /**
66 * Decodes a string containing base64 encoded data.
68 * @param s the string to decode.
69 * @return the decoded bytes.
71 public static byte[] decodeBase64(String s) {
72 ByteArrayOutputStream baos = new ByteArrayOutputStream();
74 try {
75 byte[] bytes = s.getBytes("US-ASCII");
77 Base64InputStream is = new Base64InputStream(
78 new ByteArrayInputStream(bytes));
80 int b = 0;
81 while ((b = is.read()) != -1) {
82 baos.write(b);
84 } catch (IOException e) {
86 * This should never happen!
88 log.error(e);
91 return baos.toByteArray();
94 /**
95 * Decodes an encoded word encoded with the 'B' encoding (described in
96 * RFC 2047) found in a header field body.
98 * @param encodedWord the encoded word to decode.
99 * @param charset the Java charset to use.
100 * @return the decoded string.
101 * @throws UnsupportedEncodingException if the given Java charset isn't
102 * supported.
104 public static String decodeB(String encodedWord, String charset)
105 throws UnsupportedEncodingException {
107 return new String(decodeBase64(encodedWord), charset);
111 * Decodes an encoded word encoded with the 'Q' encoding (described in
112 * RFC 2047) found in a header field body.
114 * @param encodedWord the encoded word to decode.
115 * @param charset the Java charset to use.
116 * @return the decoded string.
117 * @throws UnsupportedEncodingException if the given Java charset isn't
118 * supported.
120 public static String decodeQ(String encodedWord, String charset)
121 throws UnsupportedEncodingException {
124 * Replace _ with =20
126 StringBuffer sb = new StringBuffer();
127 for (int i = 0; i < encodedWord.length(); i++) {
128 char c = encodedWord.charAt(i);
129 if (c == '_') {
130 sb.append("=20");
131 } else {
132 sb.append(c);
136 return new String(decodeBaseQuotedPrintable(sb.toString()), charset);
140 * Decodes a string containing encoded words as defined by RFC 2047.
141 * Encoded words in have the form
142 * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
143 * quoted-printable and 'B' or 'b' for Base64.
145 * @param body the string to decode.
146 * @return the decoded string.
148 public static String decodeEncodedWords(String body) {
149 StringBuffer sb = new StringBuffer();
151 int p1 = 0;
152 int p2 = 0;
154 try {
157 * Encoded words in headers have the form
158 * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
159 * quoted printable and 'B' and 'b' for Base64
162 while (p2 < body.length()) {
164 * Find beginning of first encoded word
166 p1 = body.indexOf("=?", p2);
167 if (p1 == -1) {
169 * None found. Emit the rest of the header and exit.
171 sb.append(body.substring(p2));
172 break;
176 * p2 points to the previously found end marker or the start
177 * of the entire header text. Append the text between that
178 * marker and the one pointed to by p1.
180 if (p1 - p2 > 0) {
181 sb.append(body.substring(p2, p1));
185 * Find the first and second '?':s after the marker pointed to
186 * by p1.
188 int t1 = body.indexOf('?', p1 + 2);
189 int t2 = t1 != -1 ? body.indexOf('?', t1 + 1) : -1;
192 * Find this words end marker.
194 p2 = t2 != -1 ? body.indexOf("?=", t2 + 1) : -1;
195 if (p2 == -1) {
196 if (t2 != -1 && body.charAt(t2 + 1) == '=') {
198 * The text "=?charset?enc?=" appears to be valid for
199 * empty strings.
201 p2 = t2;
202 } else {
204 * No end marker was found. Append the rest of the
205 * header and exit.
207 sb.append(body.substring(p1));
208 break;
213 * [p1+2, t1] -> charset
214 * [t1+1, t2] -> encoding
215 * [t2+1, p2] -> encoded word
218 String decodedWord = null;
219 if (t2 == p2) {
221 * The text is empty
223 decodedWord = "";
224 } else {
226 String mimeCharset = body.substring(p1 + 2, t1);
227 String enc = body.substring(t1 + 1, t2);
228 String encodedWord = body.substring(t2 + 1, p2);
231 * Convert the MIME charset to a corresponding Java one.
233 String charset = CharsetUtil.toJavaCharset(mimeCharset);
234 if (charset == null) {
235 decodedWord = body.substring(p1, p2 + 2);
236 if (log.isWarnEnabled()) {
237 log.warn("MIME charset '" + mimeCharset
238 + "' in header field doesn't have a "
239 +"corresponding Java charset");
241 } else if (!CharsetUtil.isDecodingSupported(charset)) {
242 decodedWord = body.substring(p1, p2 + 2);
243 if (log.isWarnEnabled()) {
244 log.warn("Current JDK doesn't support decoding "
245 + "of charset '" + charset
246 + "' (MIME charset '"
247 + mimeCharset + "')");
249 } else {
250 if (enc.equalsIgnoreCase("Q")) {
251 decodedWord = DecoderUtil.decodeQ(encodedWord, charset);
252 } else if (enc.equalsIgnoreCase("B")) {
253 decodedWord = DecoderUtil.decodeB(encodedWord, charset);
254 } else {
255 decodedWord = encodedWord;
256 if (log.isWarnEnabled()) {
257 log.warn("Warning: Unknown encoding in "
258 + "header field '" + enc + "'");
263 p2 += 2;
264 sb.append(decodedWord);
266 } catch (Throwable t) {
267 log.error("Decoding header field body '" + body + "'", t);
270 return sb.toString();