2 ==============================================================================
4 This file is part of the JUCE library - "Jules' Utility Class Extensions"
5 Copyright 2004-11 by Raw Material Software Ltd.
7 ------------------------------------------------------------------------------
9 JUCE can be redistributed and/or modified under the terms of the GNU General
10 Public License (Version 2), as published by the Free Software Foundation.
11 A copy of the license is included in the JUCE distribution, or can be found
12 online at www.gnu.org/licenses.
14 JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
16 A PARTICULAR PURPOSE. See the GNU General Public License for more details.
18 ------------------------------------------------------------------------------
20 To release a closed-source product which uses JUCE, commercial licenses are
21 available: visit www.rawmaterialsoftware.com/juce for more information.
23 ==============================================================================
26 #ifndef __JUCE_CHARPOINTER_UTF16_JUCEHEADER__
27 #define __JUCE_CHARPOINTER_UTF16_JUCEHEADER__
30 //==============================================================================
32 Wraps a pointer to a null-terminated UTF-16 character string, and provides
33 various methods to operate on the data.
34 @see CharPointer_UTF8, CharPointer_UTF32
36 class CharPointer_UTF16
39 #if JUCE_NATIVE_WCHAR_IS_UTF16
40 typedef wchar_t CharType
;
42 typedef int16 CharType
;
45 inline explicit CharPointer_UTF16 (const CharType
* const rawPointer
) noexcept
46 : data (const_cast <CharType
*> (rawPointer
))
50 inline CharPointer_UTF16 (const CharPointer_UTF16
& other
) noexcept
55 inline CharPointer_UTF16
& operator= (const CharPointer_UTF16
& other
) noexcept
61 inline CharPointer_UTF16
& operator= (const CharType
* text
) noexcept
63 data
= const_cast <CharType
*> (text
);
67 /** This is a pointer comparison, it doesn't compare the actual text. */
68 inline bool operator== (const CharPointer_UTF16
& other
) const noexcept
{ return data
== other
.data
; }
69 inline bool operator!= (const CharPointer_UTF16
& other
) const noexcept
{ return data
!= other
.data
; }
70 inline bool operator<= (const CharPointer_UTF16
& other
) const noexcept
{ return data
<= other
.data
; }
71 inline bool operator< (const CharPointer_UTF16
& other
) const noexcept
{ return data
< other
.data
; }
72 inline bool operator>= (const CharPointer_UTF16
& other
) const noexcept
{ return data
>= other
.data
; }
73 inline bool operator> (const CharPointer_UTF16
& other
) const noexcept
{ return data
> other
.data
; }
75 /** Returns the address that this pointer is pointing to. */
76 inline CharType
* getAddress() const noexcept
{ return data
; }
78 /** Returns the address that this pointer is pointing to. */
79 inline operator const CharType
*() const noexcept
{ return data
; }
81 /** Returns true if this pointer is pointing to a null character. */
82 inline bool isEmpty() const noexcept
{ return *data
== 0; }
84 /** Returns the unicode character that this pointer is pointing to. */
85 juce_wchar
operator*() const noexcept
87 uint32 n
= (uint32
) (uint16
) *data
;
89 if (n
>= 0xd800 && n
<= 0xdfff && ((uint32
) (uint16
) data
[1]) >= 0xdc00)
90 n
= 0x10000 + (((n
- 0xd800) << 10) | (((uint32
) (uint16
) data
[1]) - 0xdc00));
92 return (juce_wchar
) n
;
95 /** Moves this pointer along to the next character in the string. */
96 CharPointer_UTF16
& operator++() noexcept
98 const juce_wchar n
= *data
++;
100 if (n
>= 0xd800 && n
<= 0xdfff && ((uint32
) (uint16
) *data
) >= 0xdc00)
106 /** Moves this pointer back to the previous character in the string. */
107 CharPointer_UTF16
& operator--() noexcept
109 const juce_wchar n
= *--data
;
111 if (n
>= 0xdc00 && n
<= 0xdfff)
117 /** Returns the character that this pointer is currently pointing to, and then
118 advances the pointer to point to the next character. */
119 juce_wchar
getAndAdvance() noexcept
121 uint32 n
= (uint32
) (uint16
) *data
++;
123 if (n
>= 0xd800 && n
<= 0xdfff && ((uint32
) (uint16
) *data
) >= 0xdc00)
124 n
= 0x10000 + ((((n
- 0xd800) << 10) | (((uint32
) (uint16
) *data
++) - 0xdc00)));
126 return (juce_wchar
) n
;
129 /** Moves this pointer along to the next character in the string. */
130 CharPointer_UTF16
operator++ (int) noexcept
132 CharPointer_UTF16
temp (*this);
137 /** Moves this pointer forwards by the specified number of characters. */
138 void operator+= (int numToSkip
) noexcept
142 while (++numToSkip
<= 0)
147 while (--numToSkip
>= 0)
152 /** Moves this pointer backwards by the specified number of characters. */
153 void operator-= (int numToSkip
) noexcept
155 operator+= (-numToSkip
);
158 /** Returns the character at a given character index from the start of the string. */
159 juce_wchar
operator[] (const int characterIndex
) const noexcept
161 CharPointer_UTF16
p (*this);
166 /** Returns a pointer which is moved forwards from this one by the specified number of characters. */
167 CharPointer_UTF16
operator+ (const int numToSkip
) const noexcept
169 CharPointer_UTF16
p (*this);
174 /** Returns a pointer which is moved backwards from this one by the specified number of characters. */
175 CharPointer_UTF16
operator- (const int numToSkip
) const noexcept
177 CharPointer_UTF16
p (*this);
182 /** Writes a unicode character to this string, and advances this pointer to point to the next position. */
183 void write (juce_wchar charToWrite
) noexcept
185 if (charToWrite
>= 0x10000)
187 charToWrite
-= 0x10000;
188 *data
++ = (CharType
) (0xd800 + (charToWrite
>> 10));
189 *data
++ = (CharType
) (0xdc00 + (charToWrite
& 0x3ff));
193 *data
++ = (CharType
) charToWrite
;
197 /** Writes a null character to this string (leaving the pointer's position unchanged). */
198 inline void writeNull() const noexcept
203 /** Returns the number of characters in this string. */
204 size_t length() const noexcept
206 const CharType
* d
= data
;
213 if (n
>= 0xd800 && n
<= 0xdfff)
227 /** Returns the number of characters in this string, or the given value, whichever is lower. */
228 size_t lengthUpTo (const size_t maxCharsToCount
) const noexcept
230 return CharacterFunctions::lengthUpTo (*this, maxCharsToCount
);
233 /** Returns the number of characters in this string, or up to the given end pointer, whichever is lower. */
234 size_t lengthUpTo (const CharPointer_UTF16
& end
) const noexcept
236 return CharacterFunctions::lengthUpTo (*this, end
);
239 /** Returns the number of bytes that are used to represent this string.
240 This includes the terminating null character.
242 size_t sizeInBytes() const noexcept
244 return sizeof (CharType
) * (findNullIndex (data
) + 1);
247 /** Returns the number of bytes that would be needed to represent the given
248 unicode character in this encoding format.
250 static size_t getBytesRequiredFor (const juce_wchar charToWrite
) noexcept
252 return (charToWrite
>= 0x10000) ? (sizeof (CharType
) * 2) : sizeof (CharType
);
255 /** Returns the number of bytes that would be needed to represent the given
256 string in this encoding format.
257 The value returned does NOT include the terminating null character.
259 template <class CharPointer
>
260 static size_t getBytesRequiredFor (CharPointer text
) noexcept
265 while ((n
= text
.getAndAdvance()) != 0)
266 count
+= getBytesRequiredFor (n
);
271 /** Returns a pointer to the null character that terminates this string. */
272 CharPointer_UTF16
findTerminatingNull() const noexcept
274 const CharType
* t
= data
;
279 return CharPointer_UTF16 (t
);
282 /** Copies a source string to this pointer, advancing this pointer as it goes. */
283 template <typename CharPointer
>
284 void writeAll (const CharPointer
& src
) noexcept
286 CharacterFunctions::copyAll (*this, src
);
289 /** Copies a source string to this pointer, advancing this pointer as it goes. */
290 void writeAll (const CharPointer_UTF16
& src
) noexcept
292 const CharType
* s
= src
.data
;
294 while ((*data
= *s
) != 0)
301 /** Copies a source string to this pointer, advancing this pointer as it goes.
302 The maxDestBytes parameter specifies the maximum number of bytes that can be written
303 to the destination buffer before stopping.
305 template <typename CharPointer
>
306 int writeWithDestByteLimit (const CharPointer
& src
, const int maxDestBytes
) noexcept
308 return CharacterFunctions::copyWithDestByteLimit (*this, src
, maxDestBytes
);
311 /** Copies a source string to this pointer, advancing this pointer as it goes.
312 The maxChars parameter specifies the maximum number of characters that can be
313 written to the destination buffer before stopping (including the terminating null).
315 template <typename CharPointer
>
316 void writeWithCharLimit (const CharPointer
& src
, const int maxChars
) noexcept
318 CharacterFunctions::copyWithCharLimit (*this, src
, maxChars
);
321 /** Compares this string with another one. */
322 template <typename CharPointer
>
323 int compare (const CharPointer
& other
) const noexcept
325 return CharacterFunctions::compare (*this, other
);
328 /** Compares this string with another one, up to a specified number of characters. */
329 template <typename CharPointer
>
330 int compareUpTo (const CharPointer
& other
, const int maxChars
) const noexcept
332 return CharacterFunctions::compareUpTo (*this, other
, maxChars
);
335 /** Compares this string with another one. */
336 template <typename CharPointer
>
337 int compareIgnoreCase (const CharPointer
& other
) const noexcept
339 return CharacterFunctions::compareIgnoreCase (*this, other
);
342 /** Compares this string with another one, up to a specified number of characters. */
343 template <typename CharPointer
>
344 int compareIgnoreCaseUpTo (const CharPointer
& other
, const int maxChars
) const noexcept
346 return CharacterFunctions::compareIgnoreCaseUpTo (*this, other
, maxChars
);
349 #if JUCE_WINDOWS && ! DOXYGEN
350 int compareIgnoreCase (const CharPointer_UTF16
& other
) const noexcept
352 return _wcsicmp (data
, other
.data
);
355 int compareIgnoreCaseUpTo (const CharPointer_UTF16
& other
, int maxChars
) const noexcept
357 return _wcsnicmp (data
, other
.data
, maxChars
);
360 int indexOf (const CharPointer_UTF16
& stringToFind
) const noexcept
362 const CharType
* const t
= wcsstr (data
, stringToFind
.getAddress());
363 return t
== nullptr ? -1 : (int) (t
- data
);
367 /** Returns the character index of a substring, or -1 if it isn't found. */
368 template <typename CharPointer
>
369 int indexOf (const CharPointer
& stringToFind
) const noexcept
371 return CharacterFunctions::indexOf (*this, stringToFind
);
374 /** Returns the character index of a unicode character, or -1 if it isn't found. */
375 int indexOf (const juce_wchar charToFind
) const noexcept
377 return CharacterFunctions::indexOfChar (*this, charToFind
);
380 /** Returns the character index of a unicode character, or -1 if it isn't found. */
381 int indexOf (const juce_wchar charToFind
, const bool ignoreCase
) const noexcept
383 return ignoreCase
? CharacterFunctions::indexOfCharIgnoreCase (*this, charToFind
)
384 : CharacterFunctions::indexOfChar (*this, charToFind
);
387 /** Returns true if the first character of this string is whitespace. */
388 bool isWhitespace() const noexcept
{ return CharacterFunctions::isWhitespace (operator*()) != 0; }
389 /** Returns true if the first character of this string is a digit. */
390 bool isDigit() const noexcept
{ return CharacterFunctions::isDigit (operator*()) != 0; }
391 /** Returns true if the first character of this string is a letter. */
392 bool isLetter() const noexcept
{ return CharacterFunctions::isLetter (operator*()) != 0; }
393 /** Returns true if the first character of this string is a letter or digit. */
394 bool isLetterOrDigit() const noexcept
{ return CharacterFunctions::isLetterOrDigit (operator*()) != 0; }
395 /** Returns true if the first character of this string is upper-case. */
396 bool isUpperCase() const noexcept
{ return CharacterFunctions::isUpperCase (operator*()) != 0; }
397 /** Returns true if the first character of this string is lower-case. */
398 bool isLowerCase() const noexcept
{ return CharacterFunctions::isLowerCase (operator*()) != 0; }
400 /** Returns an upper-case version of the first character of this string. */
401 juce_wchar
toUpperCase() const noexcept
{ return CharacterFunctions::toUpperCase (operator*()); }
402 /** Returns a lower-case version of the first character of this string. */
403 juce_wchar
toLowerCase() const noexcept
{ return CharacterFunctions::toLowerCase (operator*()); }
405 /** Parses this string as a 32-bit integer. */
406 int getIntValue32() const noexcept
411 return CharacterFunctions::getIntValue
<int, CharPointer_UTF16
> (*this);
415 /** Parses this string as a 64-bit integer. */
416 int64
getIntValue64() const noexcept
419 return _wtoi64 (data
);
421 return CharacterFunctions::getIntValue
<int64
, CharPointer_UTF16
> (*this);
425 /** Parses this string as a floating point double. */
426 double getDoubleValue() const noexcept
{ return CharacterFunctions::getDoubleValue (*this); }
428 /** Returns the first non-whitespace character in the string. */
429 CharPointer_UTF16
findEndOfWhitespace() const noexcept
{ return CharacterFunctions::findEndOfWhitespace (*this); }
431 /** Returns true if the given unicode character can be represented in this encoding. */
432 static bool canRepresent (juce_wchar character
) noexcept
434 return ((unsigned int) character
) < (unsigned int) 0x10ffff
435 && (((unsigned int) character
) < 0xd800 || ((unsigned int) character
) > 0xdfff);
438 /** Returns true if this data contains a valid string in this encoding. */
439 static bool isValidString (const CharType
* dataToTest
, int maxBytesToRead
)
441 maxBytesToRead
/= sizeof (CharType
);
443 while (--maxBytesToRead
>= 0 && *dataToTest
!= 0)
445 const uint32 n
= (uint32
) (uint16
) *dataToTest
++;
457 const uint32 nextChar
= (uint32
) (uint16
) *dataToTest
++;
459 if (nextChar
< 0xdc00 || nextChar
> 0xdfff)
468 /** Atomically swaps this pointer for a new value, returning the previous value. */
469 CharPointer_UTF16
atomicSwap (const CharPointer_UTF16
& newValue
)
471 return CharPointer_UTF16 (reinterpret_cast <Atomic
<CharType
*>&> (data
).exchange (newValue
.data
));
474 /** These values are the byte-order-mark (BOM) values for a UTF-16 stream. */
477 byteOrderMarkBE1
= 0xfe,
478 byteOrderMarkBE2
= 0xff,
479 byteOrderMarkLE1
= 0xff,
480 byteOrderMarkLE2
= 0xfe
486 static int findNullIndex (const CharType
* const t
) noexcept
498 #endif // __JUCE_CHARPOINTER_UTF16_JUCEHEADER__