2 * Copyright (C) 2004, 2006, 2008, 2010 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "wtf/text/TextCodecUTF16.h"
29 #include "wtf/PassOwnPtr.h"
30 #include "wtf/text/CString.h"
31 #include "wtf/text/CharacterNames.h"
32 #include "wtf/text/StringBuffer.h"
33 #include "wtf/text/WTFString.h"
39 void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar
)
41 registrar("UTF-16LE", "UTF-16LE");
42 registrar("UTF-16BE", "UTF-16BE");
44 registrar("ISO-10646-UCS-2", "UTF-16LE");
45 registrar("UCS-2", "UTF-16LE");
46 registrar("UTF-16", "UTF-16LE");
47 registrar("Unicode", "UTF-16LE");
48 registrar("csUnicode", "UTF-16LE");
49 registrar("unicodeFEFF", "UTF-16LE");
51 registrar("unicodeFFFE", "UTF-16BE");
54 static PassOwnPtr
<TextCodec
> newStreamingTextDecoderUTF16LE(const TextEncoding
&, const void*)
56 return adoptPtr(new TextCodecUTF16(true));
59 static PassOwnPtr
<TextCodec
> newStreamingTextDecoderUTF16BE(const TextEncoding
&, const void*)
61 return adoptPtr(new TextCodecUTF16(false));
64 void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar
)
66 registrar("UTF-16LE", newStreamingTextDecoderUTF16LE
, 0);
67 registrar("UTF-16BE", newStreamingTextDecoderUTF16BE
, 0);
70 String
TextCodecUTF16::decode(const char* bytes
, size_t length
, FlushBehavior flush
, bool, bool& sawError
)
72 // For compatibility reasons, ignore flush from fetch EOF.
73 const bool reallyFlush
= flush
!= DoNotFlush
&& flush
!= FetchEOF
;
76 if (!reallyFlush
|| !m_haveBufferedByte
)
79 return String(&replacementCharacter
, 1);
82 // FIXME: This should generate an error if there is an unpaired surrogate.
84 const unsigned char* p
= reinterpret_cast<const unsigned char*>(bytes
);
85 size_t numBytes
= length
+ m_haveBufferedByte
;
86 size_t numCharsIn
= numBytes
/ 2;
87 size_t numCharsOut
= ((numBytes
& 1) && reallyFlush
) ? numCharsIn
+ 1 : numCharsIn
;
89 StringBuffer
<UChar
> buffer(numCharsOut
);
90 UChar
* q
= buffer
.characters();
92 if (m_haveBufferedByte
) {
95 c
= m_bufferedByte
| (p
[0] << 8);
97 c
= (m_bufferedByte
<< 8) | p
[0];
99 m_haveBufferedByte
= false;
104 if (m_littleEndian
) {
105 for (size_t i
= 0; i
< numCharsIn
; ++i
) {
106 UChar c
= p
[0] | (p
[1] << 8);
111 for (size_t i
= 0; i
< numCharsIn
; ++i
) {
112 UChar c
= (p
[0] << 8) | p
[1];
119 ASSERT(!m_haveBufferedByte
);
123 *q
++ = replacementCharacter
;
125 m_haveBufferedByte
= true;
126 m_bufferedByte
= p
[0];
130 buffer
.shrink(q
- buffer
.characters());
132 return String::adopt(buffer
);
135 CString
TextCodecUTF16::encode(const UChar
* characters
, size_t length
, UnencodableHandling
)
137 // We need to be sure we can double the length without overflowing.
138 // Since the passed-in length is the length of an actual existing
139 // character buffer, each character is two bytes, and we know
140 // the buffer doesn't occupy the entire address space, we can
141 // assert here that doubling the length does not overflow size_t
142 // and there's no need for a runtime check.
143 ASSERT(length
<= numeric_limits
<size_t>::max() / 2);
146 CString result
= CString::newUninitialized(length
* 2, bytes
);
148 // FIXME: CString is not a reasonable data structure for encoded UTF-16, which will have
149 // null characters inside it. Perhaps the result of encode should not be a CString.
150 if (m_littleEndian
) {
151 for (size_t i
= 0; i
< length
; ++i
) {
152 UChar c
= characters
[i
];
153 bytes
[i
* 2] = static_cast<char>(c
);
154 bytes
[i
* 2 + 1] = c
>> 8;
157 for (size_t i
= 0; i
< length
; ++i
) {
158 UChar c
= characters
[i
];
159 bytes
[i
* 2] = c
>> 8;
160 bytes
[i
* 2 + 1] = static_cast<char>(c
);
167 CString
TextCodecUTF16::encode(const LChar
* characters
, size_t length
, UnencodableHandling
)
169 // In the LChar case, we do actually need to perform this check in release. :)
170 RELEASE_ASSERT(length
<= numeric_limits
<size_t>::max() / 2);
173 CString result
= CString::newUninitialized(length
* 2, bytes
);
175 if (m_littleEndian
) {
176 for (size_t i
= 0; i
< length
; ++i
) {
177 bytes
[i
* 2] = characters
[i
];
178 bytes
[i
* 2 + 1] = 0;
181 for (size_t i
= 0; i
< length
; ++i
) {
183 bytes
[i
* 2 + 1] = characters
[i
];