1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
38 #include "nsConverterInputStream.h"
39 #include "nsIInputStream.h"
40 #include "nsICharsetConverterManager.h"
41 #include "nsIServiceManager.h"
43 #define CONVERTER_BUFFER_SIZE 8192
45 NS_IMPL_ISUPPORTS3(nsConverterInputStream
, nsIConverterInputStream
,
46 nsIUnicharInputStream
, nsIUnicharLineInputStream
)
48 static NS_DEFINE_CID(kCharsetConverterManagerCID
, NS_ICHARSETCONVERTERMANAGER_CID
);
51 nsConverterInputStream::Init(nsIInputStream
* aStream
,
54 PRUnichar aReplacementChar
)
61 if (aBufferSize
<=0) aBufferSize
=CONVERTER_BUFFER_SIZE
;
64 nsCOMPtr
<nsICharsetConverterManager
> ccm
=
65 do_GetService(kCharsetConverterManagerCID
, &rv
);
66 if (NS_FAILED(rv
)) return rv
;
68 rv
= ccm
->GetUnicodeDecoder(aCharset
? aCharset
: "ISO-8859-1", getter_AddRefs(mConverter
));
69 if (NS_FAILED(rv
)) return rv
;
72 rv
= NS_NewByteBuffer(getter_AddRefs(mByteData
), nsnull
, aBufferSize
);
73 if (NS_FAILED(rv
)) return rv
;
75 rv
= NS_NewUnicharBuffer(getter_AddRefs(mUnicharData
), nsnull
, aBufferSize
);
76 if (NS_FAILED(rv
)) return rv
;
79 mReplacementChar
= aReplacementChar
;
85 nsConverterInputStream::Close()
87 nsresult rv
= mInput
? mInput
->Close() : NS_OK
;
88 PR_FREEIF(mLineBuffer
);
92 mUnicharData
= nsnull
;
97 nsConverterInputStream::Read(PRUnichar
* aBuf
,
101 NS_ASSERTION(mUnicharDataLength
>= mUnicharDataOffset
, "unsigned madness");
102 PRUint32 readCount
= mUnicharDataLength
- mUnicharDataOffset
;
103 if (0 == readCount
) {
104 // Fill the unichar buffer
105 readCount
= Fill(&mLastErrorCode
);
106 if (readCount
== 0) {
108 return mLastErrorCode
;
111 if (readCount
> aCount
) {
114 memcpy(aBuf
, mUnicharData
->GetBuffer() + mUnicharDataOffset
,
115 readCount
* sizeof(PRUnichar
));
116 mUnicharDataOffset
+= readCount
;
117 *aReadCount
= readCount
;
122 nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter
,
124 PRUint32 aCount
, PRUint32
*aReadCount
)
126 NS_ASSERTION(mUnicharDataLength
>= mUnicharDataOffset
, "unsigned madness");
127 PRUint32 bytesToWrite
= mUnicharDataLength
- mUnicharDataOffset
;
129 if (0 == bytesToWrite
) {
130 // Fill the unichar buffer
131 bytesToWrite
= Fill(&rv
);
132 if (bytesToWrite
<= 0) {
138 if (bytesToWrite
> aCount
)
139 bytesToWrite
= aCount
;
141 PRUint32 bytesWritten
;
142 PRUint32 totalBytesWritten
= 0;
144 while (bytesToWrite
) {
145 rv
= aWriter(this, aClosure
,
146 mUnicharData
->GetBuffer() + mUnicharDataOffset
,
147 totalBytesWritten
, bytesToWrite
, &bytesWritten
);
149 // don't propagate errors to the caller
153 bytesToWrite
-= bytesWritten
;
154 totalBytesWritten
+= bytesWritten
;
155 mUnicharDataOffset
+= bytesWritten
;
159 *aReadCount
= totalBytesWritten
;
165 nsConverterInputStream::ReadString(PRUint32 aCount
, nsAString
& aString
,
166 PRUint32
* aReadCount
)
168 NS_ASSERTION(mUnicharDataLength
>= mUnicharDataOffset
, "unsigned madness");
169 PRUint32 readCount
= mUnicharDataLength
- mUnicharDataOffset
;
170 if (0 == readCount
) {
171 // Fill the unichar buffer
172 readCount
= Fill(&mLastErrorCode
);
173 if (readCount
== 0) {
175 return mLastErrorCode
;
178 if (readCount
> aCount
) {
181 const PRUnichar
* buf
= reinterpret_cast<const PRUnichar
*>(mUnicharData
->GetBuffer() +
183 aString
.Assign(buf
, readCount
);
184 mUnicharDataOffset
+= readCount
;
185 *aReadCount
= readCount
;
190 nsConverterInputStream::Fill(nsresult
* aErrorCode
)
192 if (nsnull
== mInput
) {
193 // We already closed the stream!
194 *aErrorCode
= NS_BASE_STREAM_CLOSED
;
198 if (NS_FAILED(mLastErrorCode
)) {
199 // We failed to completely convert last time, and error-recovery
200 // is disabled. We will fare no better this time, so...
201 *aErrorCode
= mLastErrorCode
;
205 // We assume a many to one conversion and are using equal sizes for
206 // the two buffers. However if an error happens at the very start
207 // of a byte buffer we may end up in a situation where n bytes lead
208 // to n+1 unicode chars. Thus we need to keep track of the leftover
209 // bytes as we convert.
211 PRInt32 nb
= mByteData
->Fill(aErrorCode
, mInput
, mLeftOverBytes
);
212 #if defined(DEBUG_bzbarsky) && 0
213 for (unsigned int foo
= 0; foo
< mByteData
->GetLength(); ++foo
) {
214 fprintf(stderr
, "%c", mByteData
->GetBuffer()[foo
]);
216 fprintf(stderr
, "\n");
218 if (nb
<= 0 && mLeftOverBytes
== 0) {
224 NS_ASSERTION(PRUint32(nb
) + mLeftOverBytes
== mByteData
->GetLength(),
225 "mByteData is lying to us somewhere");
227 // Now convert as much of the byte buffer to unicode as possible
228 mUnicharDataOffset
= 0;
229 mUnicharDataLength
= 0;
230 PRUint32 srcConsumed
= 0;
232 PRInt32 srcLen
= mByteData
->GetLength() - srcConsumed
;
233 PRInt32 dstLen
= mUnicharData
->GetBufferSize() - mUnicharDataLength
;
234 *aErrorCode
= mConverter
->Convert(mByteData
->GetBuffer()+srcConsumed
,
236 mUnicharData
->GetBuffer()+mUnicharDataLength
,
238 mUnicharDataLength
+= dstLen
;
239 // XXX if srcLen is negative, we want to drop the _first_ byte in
240 // the erroneous byte sequence and try again. This is not quite
241 // possible right now -- see bug 160784
242 srcConsumed
+= srcLen
;
243 if (NS_FAILED(*aErrorCode
) && mReplacementChar
) {
244 NS_ASSERTION(0 < mUnicharData
->GetBufferSize() - mUnicharDataLength
,
245 "Decoder returned an error but filled the output buffer! "
246 "Should not happen.");
247 mUnicharData
->GetBuffer()[mUnicharDataLength
++] = mReplacementChar
;
249 // XXX this is needed to make sure we don't underrun our buffer;
251 srcConsumed
= PR_MAX(srcConsumed
, 0);
254 NS_ASSERTION(srcConsumed
<= mByteData
->GetLength(),
255 "Whoa. The converter should have returned NS_OK_UDEC_MOREINPUT before this point!");
256 } while (mReplacementChar
&&
257 NS_FAILED(*aErrorCode
));
259 mLeftOverBytes
= mByteData
->GetLength() - srcConsumed
;
261 return mUnicharDataLength
;
265 nsConverterInputStream::ReadLine(nsAString
& aLine
, PRBool
* aResult
)
268 nsresult rv
= NS_InitLineBuffer(&mLineBuffer
);
269 if (NS_FAILED(rv
)) return rv
;
271 return NS_ReadLine(this, mLineBuffer
, aLine
, aResult
);