1 /****************************************************************
2 * Licensed to the Apache Software Foundation (ASF) under one *
3 * or more contributor license agreements. See the NOTICE file *
4 * distributed with this work for additional information *
5 * regarding copyright ownership. The ASF licenses this file *
6 * to you under the Apache License, Version 2.0 (the *
7 * "License"); you may not use this file except in compliance *
8 * with the License. You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
12 * Unless required by applicable law or agreed to in writing, *
13 * software distributed under the License is distributed on an *
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
15 * KIND, either express or implied. See the License for the *
16 * specific language governing permissions and limitations *
17 * under the License. *
18 ****************************************************************/
20 package org
.apache
.james
.mime4j
;
22 import java
.io
.IOException
;
23 import java
.io
.InputStream
;
27 * Parses MIME (or RFC822) message streams of bytes or characters and reports
28 * parsing events to a <code>ContentHandler</code> instance.
33 * ContentHandler handler = new MyHandler();
34 * MimeStreamParser parser = new MimeStreamParser();
35 * parser.setContentHandler(handler);
36 * parser.parse(new FileInputStream("mime.msg"));
39 * @version $Id: MimeStreamParser.java,v 1.8 2005/02/11 10:12:02 ntherning Exp $
41 public class MimeStreamParser
{
43 private ContentHandler handler
= null;
44 private boolean contentDecoding
;
45 private final MimeTokenStream mimeTokenStream
= new MimeTokenStream();
47 public MimeStreamParser() {
49 this.contentDecoding
= false;
53 * Determines whether this parser automatically decodes body content
54 * based on the on the MIME fields with the standard defaults.
56 public boolean isContentDecoding() {
57 return contentDecoding
;
61 * Defines whether parser should automatically decode body content
62 * based on the on the MIME fields with the standard defaults.
64 public void setContentDecoding(boolean b
) {
65 this.contentDecoding
= b
;
69 * Parses a stream of bytes containing a MIME message.
71 * @param is the stream to parse.
72 * @throws MimeException if the message can not be processed
73 * @throws IOException on I/O errors.
75 public void parse(InputStream is
) throws MimeException
, IOException
{
76 mimeTokenStream
.parse(is
);
78 int state
= mimeTokenStream
.getState();
80 case MimeTokenStream
.T_BODY
:
81 BodyDescriptor desc
= mimeTokenStream
.getBodyDescriptor();
82 InputStream bodyContent
;
83 if (contentDecoding
) {
84 bodyContent
= mimeTokenStream
.getDecodedInputStream();
86 bodyContent
= mimeTokenStream
.getInputStream();
88 handler
.body(desc
, bodyContent
);
90 case MimeTokenStream
.T_END_BODYPART
:
91 handler
.endBodyPart();
93 case MimeTokenStream
.T_END_HEADER
:
96 case MimeTokenStream
.T_END_MESSAGE
:
99 case MimeTokenStream
.T_END_MULTIPART
:
100 handler
.endMultipart();
102 case MimeTokenStream
.T_END_OF_STREAM
:
104 case MimeTokenStream
.T_EPILOGUE
:
105 handler
.epilogue(mimeTokenStream
.getInputStream());
107 case MimeTokenStream
.T_FIELD
:
108 handler
.field(mimeTokenStream
.getField());
110 case MimeTokenStream
.T_PREAMBLE
:
111 handler
.preamble(mimeTokenStream
.getInputStream());
113 case MimeTokenStream
.T_RAW_ENTITY
:
114 handler
.raw(mimeTokenStream
.getInputStream());
116 case MimeTokenStream
.T_START_BODYPART
:
117 handler
.startBodyPart();
119 case MimeTokenStream
.T_START_HEADER
:
120 handler
.startHeader();
122 case MimeTokenStream
.T_START_MESSAGE
:
123 handler
.startMessage();
125 case MimeTokenStream
.T_START_MULTIPART
:
126 handler
.startMultipart(mimeTokenStream
.getBodyDescriptor());
129 throw new IllegalStateException("Invalid state: " + state
);
131 state
= mimeTokenStream
.next();
136 * Determines if this parser is currently in raw mode.
138 * @return <code>true</code> if in raw mode, <code>false</code>
140 * @see #setRaw(boolean)
142 public boolean isRaw() {
143 return mimeTokenStream
.isRaw();
147 * Enables or disables raw mode. In raw mode all future entities
148 * (messages or body parts) in the stream will be reported to the
149 * {@link ContentHandler#raw(InputStream)} handler method only.
150 * The stream will contain the entire unparsed entity contents
151 * including header fields and whatever is in the body.
153 * @param raw <code>true</code> enables raw mode, <code>false</code>
156 public void setRaw(boolean raw
) {
157 mimeTokenStream
.setRecursionMode(MimeTokenStream
.M_RAW
);
161 * Finishes the parsing and stops reading lines.
162 * NOTE: No more lines will be parsed but the parser
164 * {@link ContentHandler#endMultipart()},
165 * {@link ContentHandler#endBodyPart()},
166 * {@link ContentHandler#endMessage()}, etc to match previous calls
168 * {@link ContentHandler#startMultipart(BodyDescriptor)},
169 * {@link ContentHandler#startBodyPart()},
170 * {@link ContentHandler#startMessage()}, etc.
173 mimeTokenStream
.stop();
177 * Sets the <code>ContentHandler</code> to use when reporting
180 * @param h the <code>ContentHandler</code>.
182 public void setContentHandler(ContentHandler h
) {