2 ==============================================================================
4 This file is part of the JUCE library.
5 Copyright (c) 2022 - Raw Material Software Limited
7 JUCE is an open source library subject to commercial or open-source
10 The code included in this file is provided under the terms of the ISC license
11 http://www.isc.org/downloads/software-support-policy/isc-license. Permission
12 To use, copy, modify, and/or distribute this software for any purpose with or
13 without fee is hereby granted provided that the above copyright notice and
14 this permission notice appear in all copies.
16 JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
17 EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
20 ==============================================================================
26 //==============================================================================
28 Wraps a pointer to a null-terminated UTF-8 character string, and provides
29 various methods to operate on the data.
30 @see CharPointer_UTF16, CharPointer_UTF32
34 class CharPointer_UTF8 final
37 using CharType
= char;
39 explicit CharPointer_UTF8 (const CharType
* rawPointer
) noexcept
40 : data (const_cast<CharType
*> (rawPointer
))
44 CharPointer_UTF8 (const CharPointer_UTF8
& other
) = default;
46 CharPointer_UTF8
operator= (CharPointer_UTF8 other
) noexcept
52 CharPointer_UTF8
operator= (const CharType
* text
) noexcept
54 data
= const_cast<CharType
*> (text
);
58 /** This is a pointer comparison, it doesn't compare the actual text. */
59 bool operator== (CharPointer_UTF8 other
) const noexcept
{ return data
== other
.data
; }
60 bool operator!= (CharPointer_UTF8 other
) const noexcept
{ return data
!= other
.data
; }
61 bool operator<= (CharPointer_UTF8 other
) const noexcept
{ return data
<= other
.data
; }
62 bool operator< (CharPointer_UTF8 other
) const noexcept
{ return data
< other
.data
; }
63 bool operator>= (CharPointer_UTF8 other
) const noexcept
{ return data
>= other
.data
; }
64 bool operator> (CharPointer_UTF8 other
) const noexcept
{ return data
> other
.data
; }
66 /** Returns the address that this pointer is pointing to. */
67 CharType
* getAddress() const noexcept
{ return data
; }
69 /** Returns the address that this pointer is pointing to. */
70 operator const CharType
*() const noexcept
{ return data
; }
72 /** Returns true if this pointer is pointing to a null character. */
73 bool isEmpty() const noexcept
{ return *data
== 0; }
75 /** Returns true if this pointer is not pointing to a null character. */
76 bool isNotEmpty() const noexcept
{ return *data
!= 0; }
78 /** Returns the unicode character that this pointer is pointing to. */
79 juce_wchar
operator*() const noexcept
81 auto byte
= (signed char) *data
;
84 return (juce_wchar
) (uint8
) byte
;
86 uint32 n
= (uint32
) (uint8
) byte
;
89 int numExtraValues
= 0;
91 while ((n
& bit
) != 0 && bit
> 0x8)
100 for (int i
= 1; i
<= numExtraValues
; ++i
)
102 auto nextByte
= (uint32
) (uint8
) data
[i
];
104 if ((nextByte
& 0xc0) != 0x80)
108 n
|= (nextByte
& 0x3f);
111 return (juce_wchar
) n
;
114 /** Moves this pointer along to the next character in the string. */
115 CharPointer_UTF8
& operator++() noexcept
117 jassert (*data
!= 0); // trying to advance past the end of the string?
118 auto n
= (signed char) *data
++;
124 while ((static_cast<uint8
> (n
) & bit
) != 0 && bit
> 0x8)
127 bit
= static_cast<uint8
> (bit
>> 1);
134 /** Moves this pointer back to the previous character in the string. */
135 CharPointer_UTF8
operator--() noexcept
139 while ((*--data
& 0xc0) == 0x80 && ++count
< 4)
145 /** Returns the character that this pointer is currently pointing to, and then
146 advances the pointer to point to the next character. */
147 juce_wchar
getAndAdvance() noexcept
149 auto byte
= (signed char) *data
++;
152 return (juce_wchar
) (uint8
) byte
;
154 uint32 n
= (uint32
) (uint8
) byte
;
157 int numExtraValues
= 0;
159 while ((n
& bit
) != 0 && bit
> 0x8)
168 while (--numExtraValues
>= 0)
170 auto nextByte
= (uint32
) (uint8
) *data
;
172 if ((nextByte
& 0xc0) != 0x80)
177 n
|= (nextByte
& 0x3f);
180 return (juce_wchar
) n
;
183 /** Moves this pointer along to the next character in the string. */
184 CharPointer_UTF8
operator++ (int) noexcept
186 CharPointer_UTF8
temp (*this);
191 /** Moves this pointer forwards by the specified number of characters. */
192 void operator+= (int numToSkip
) noexcept
196 while (++numToSkip
<= 0)
201 while (--numToSkip
>= 0)
206 /** Moves this pointer backwards by the specified number of characters. */
207 void operator-= (int numToSkip
) noexcept
209 operator+= (-numToSkip
);
212 /** Returns the character at a given character index from the start of the string. */
213 juce_wchar
operator[] (int characterIndex
) const noexcept
220 /** Returns a pointer which is moved forwards from this one by the specified number of characters. */
221 CharPointer_UTF8
operator+ (int numToSkip
) const noexcept
228 /** Returns a pointer which is moved backwards from this one by the specified number of characters. */
229 CharPointer_UTF8
operator- (int numToSkip
) const noexcept
236 /** Returns the number of characters in this string. */
237 size_t length() const noexcept
244 auto n
= (uint32
) (uint8
) *d
++;
248 while ((*d
& 0xc0) == 0x80)
260 /** Returns the number of characters in this string, or the given value, whichever is lower. */
261 size_t lengthUpTo (const size_t maxCharsToCount
) const noexcept
263 return CharacterFunctions::lengthUpTo (*this, maxCharsToCount
);
266 /** Returns the number of characters in this string, or up to the given end pointer, whichever is lower. */
267 size_t lengthUpTo (const CharPointer_UTF8 end
) const noexcept
269 return CharacterFunctions::lengthUpTo (*this, end
);
272 /** Returns the number of bytes that are used to represent this string.
273 This includes the terminating null character.
275 size_t sizeInBytes() const noexcept
277 JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6387)
278 jassert (data
!= nullptr);
279 return strlen (data
) + 1;
280 JUCE_END_IGNORE_WARNINGS_MSVC
283 /** Returns the number of bytes that would be needed to represent the given
284 unicode character in this encoding format.
286 static size_t getBytesRequiredFor (const juce_wchar charToWrite
) noexcept
289 auto c
= (uint32
) charToWrite
;
305 /** Returns the number of bytes that would be needed to represent the given
306 string in this encoding format.
307 The value returned does NOT include the terminating null character.
309 template <class CharPointer
>
310 static size_t getBytesRequiredFor (CharPointer text
) noexcept
314 while (auto n
= text
.getAndAdvance())
315 count
+= getBytesRequiredFor (n
);
320 /** Returns a pointer to the null character that terminates this string. */
321 CharPointer_UTF8
findTerminatingNull() const noexcept
323 return CharPointer_UTF8 (data
+ strlen (data
));
326 /** Writes a unicode character to this string, and advances this pointer to point to the next position. */
327 void write (const juce_wchar charToWrite
) noexcept
329 auto c
= (uint32
) charToWrite
;
333 int numExtraBytes
= 1;
341 *data
++ = (CharType
) ((uint32
) (0xff << (7 - numExtraBytes
)) | (c
>> (numExtraBytes
* 6)));
343 while (--numExtraBytes
>= 0)
344 *data
++ = (CharType
) (0x80 | (0x3f & (c
>> (numExtraBytes
* 6))));
348 *data
++ = (CharType
) c
;
352 /** Writes a null character to this string (leaving the pointer's position unchanged). */
353 void writeNull() const noexcept
358 /** Copies a source string to this pointer, advancing this pointer as it goes. */
359 template <typename CharPointer
>
360 void writeAll (const CharPointer src
) noexcept
362 CharacterFunctions::copyAll (*this, src
);
365 /** Copies a source string to this pointer, advancing this pointer as it goes. */
366 void writeAll (const CharPointer_UTF8 src
) noexcept
370 while ((*data
= *s
) != 0)
377 /** Copies a source string to this pointer, advancing this pointer as it goes.
378 The maxDestBytes parameter specifies the maximum number of bytes that can be written
379 to the destination buffer before stopping.
381 template <typename CharPointer
>
382 size_t writeWithDestByteLimit (const CharPointer src
, const size_t maxDestBytes
) noexcept
384 return CharacterFunctions::copyWithDestByteLimit (*this, src
, maxDestBytes
);
387 /** Copies a source string to this pointer, advancing this pointer as it goes.
388 The maxChars parameter specifies the maximum number of characters that can be
389 written to the destination buffer before stopping (including the terminating null).
391 template <typename CharPointer
>
392 void writeWithCharLimit (const CharPointer src
, const int maxChars
) noexcept
394 CharacterFunctions::copyWithCharLimit (*this, src
, maxChars
);
397 /** Compares this string with another one. */
398 template <typename CharPointer
>
399 int compare (const CharPointer other
) const noexcept
401 return CharacterFunctions::compare (*this, other
);
404 /** Compares this string with another one, up to a specified number of characters. */
405 template <typename CharPointer
>
406 int compareUpTo (const CharPointer other
, const int maxChars
) const noexcept
408 return CharacterFunctions::compareUpTo (*this, other
, maxChars
);
411 /** Compares this string with another one. */
412 template <typename CharPointer
>
413 int compareIgnoreCase (const CharPointer other
) const noexcept
415 return CharacterFunctions::compareIgnoreCase (*this, other
);
418 /** Compares this string with another one. */
419 int compareIgnoreCase (const CharPointer_UTF8 other
) const noexcept
421 return CharacterFunctions::compareIgnoreCase (*this, other
);
424 /** Compares this string with another one, up to a specified number of characters. */
425 template <typename CharPointer
>
426 int compareIgnoreCaseUpTo (const CharPointer other
, const int maxChars
) const noexcept
428 return CharacterFunctions::compareIgnoreCaseUpTo (*this, other
, maxChars
);
431 /** Returns the character index of a substring, or -1 if it isn't found. */
432 template <typename CharPointer
>
433 int indexOf (const CharPointer stringToFind
) const noexcept
435 return CharacterFunctions::indexOf (*this, stringToFind
);
438 /** Returns the character index of a unicode character, or -1 if it isn't found. */
439 int indexOf (const juce_wchar charToFind
) const noexcept
441 return CharacterFunctions::indexOfChar (*this, charToFind
);
444 /** Returns the character index of a unicode character, or -1 if it isn't found. */
445 int indexOf (const juce_wchar charToFind
, const bool ignoreCase
) const noexcept
447 return ignoreCase
? CharacterFunctions::indexOfCharIgnoreCase (*this, charToFind
)
448 : CharacterFunctions::indexOfChar (*this, charToFind
);
451 /** Returns true if the first character of this string is whitespace. */
452 bool isWhitespace() const noexcept
{ return CharacterFunctions::isWhitespace ((juce_wchar
) *(*this)); }
453 /** Returns true if the first character of this string is a digit. */
454 bool isDigit() const noexcept
{ const CharType c
= *data
; return c
>= '0' && c
<= '9'; }
455 /** Returns true if the first character of this string is a letter. */
456 bool isLetter() const noexcept
{ return CharacterFunctions::isLetter (operator*()) != 0; }
457 /** Returns true if the first character of this string is a letter or digit. */
458 bool isLetterOrDigit() const noexcept
{ return CharacterFunctions::isLetterOrDigit (operator*()) != 0; }
459 /** Returns true if the first character of this string is upper-case. */
460 bool isUpperCase() const noexcept
{ return CharacterFunctions::isUpperCase (operator*()) != 0; }
461 /** Returns true if the first character of this string is lower-case. */
462 bool isLowerCase() const noexcept
{ return CharacterFunctions::isLowerCase (operator*()) != 0; }
464 /** Returns an upper-case version of the first character of this string. */
465 juce_wchar
toUpperCase() const noexcept
{ return CharacterFunctions::toUpperCase (operator*()); }
466 /** Returns a lower-case version of the first character of this string. */
467 juce_wchar
toLowerCase() const noexcept
{ return CharacterFunctions::toLowerCase (operator*()); }
469 /** Parses this string as a 32-bit integer. */
470 int getIntValue32() const noexcept
{ return atoi (data
); }
472 /** Parses this string as a 64-bit integer. */
473 int64
getIntValue64() const noexcept
475 #if JUCE_WINDOWS && ! JUCE_MINGW
476 return _atoi64 (data
);
482 /** Parses this string as a floating point double. */
483 double getDoubleValue() const noexcept
{ return CharacterFunctions::getDoubleValue (*this); }
485 /** Returns the first non-whitespace character in the string. */
486 CharPointer_UTF8
findEndOfWhitespace() const noexcept
{ return CharacterFunctions::findEndOfWhitespace (*this); }
488 /** Move this pointer to the first non-whitespace character in the string. */
489 void incrementToEndOfWhitespace() noexcept
{ CharacterFunctions::incrementToEndOfWhitespace (*this); }
491 /** Returns true if the given unicode character can be represented in this encoding. */
492 static bool canRepresent (juce_wchar character
) noexcept
494 return ((uint32
) character
) < (uint32
) 0x10ffff;
497 /** Returns true if this data contains a valid string in this encoding. */
498 static bool isValidString (const CharType
* dataToTest
, int maxBytesToRead
)
500 while (--maxBytesToRead
>= 0 && *dataToTest
!= 0)
502 auto byte
= (signed char) *dataToTest
++;
507 int numExtraValues
= 0;
509 while ((byte
& bit
) != 0)
517 if (bit
== 8 && (numExtraValues
> maxBytesToRead
518 || *CharPointer_UTF8 (dataToTest
- 1) > 0x10ffff))
522 if (numExtraValues
== 0)
525 maxBytesToRead
-= numExtraValues
;
526 if (maxBytesToRead
< 0)
529 while (--numExtraValues
>= 0)
530 if ((*dataToTest
++ & 0xc0) != 0x80)
538 /** Atomically swaps this pointer for a new value, returning the previous value. */
539 CharPointer_UTF8
atomicSwap (const CharPointer_UTF8 newValue
)
541 return CharPointer_UTF8 (reinterpret_cast<Atomic
<CharType
*>&> (data
).exchange (newValue
.data
));
544 /** These values are the byte-order mark (BOM) values for a UTF-8 stream. */
547 byteOrderMark1
= 0xef,
548 byteOrderMark2
= 0xbb,
549 byteOrderMark3
= 0xbf
552 /** Returns true if the first three bytes in this pointer are the UTF8 byte-order mark (BOM).
553 The pointer must not be null, and must point to at least 3 valid bytes.
555 static bool isByteOrderMark (const void* possibleByteOrder
) noexcept
557 JUCE_BEGIN_IGNORE_WARNINGS_MSVC (28182)
558 jassert (possibleByteOrder
!= nullptr);
559 auto c
= static_cast<const uint8
*> (possibleByteOrder
);
561 return c
[0] == (uint8
) byteOrderMark1
562 && c
[1] == (uint8
) byteOrderMark2
563 && c
[2] == (uint8
) byteOrderMark3
;
564 JUCE_END_IGNORE_WARNINGS_MSVC