VST3: fetch midi mappings all at once, use it for note/sound-off
[carla.git] / source / modules / juce_core / text / juce_CharPointer_UTF8.h
blob84b24d3b9223c538078a7a9c07023b2eb440ce91
1 /*
2 ==============================================================================
4 This file is part of the JUCE library.
5 Copyright (c) 2022 - Raw Material Software Limited
7 JUCE is an open source library subject to commercial or open-source
8 licensing.
10 The code included in this file is provided under the terms of the ISC license
11 http://www.isc.org/downloads/software-support-policy/isc-license. Permission
12 To use, copy, modify, and/or distribute this software for any purpose with or
13 without fee is hereby granted provided that the above copyright notice and
14 this permission notice appear in all copies.
16 JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
17 EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
18 DISCLAIMED.
20 ==============================================================================
23 namespace juce
26 //==============================================================================
27 /**
28 Wraps a pointer to a null-terminated UTF-8 character string, and provides
29 various methods to operate on the data.
30 @see CharPointer_UTF16, CharPointer_UTF32
32 @tags{Core}
34 class CharPointer_UTF8 final
36 public:
37 using CharType = char;
39 explicit CharPointer_UTF8 (const CharType* rawPointer) noexcept
40 : data (const_cast<CharType*> (rawPointer))
44 CharPointer_UTF8 (const CharPointer_UTF8& other) = default;
46 CharPointer_UTF8 operator= (CharPointer_UTF8 other) noexcept
48 data = other.data;
49 return *this;
52 CharPointer_UTF8 operator= (const CharType* text) noexcept
54 data = const_cast<CharType*> (text);
55 return *this;
58 /** This is a pointer comparison, it doesn't compare the actual text. */
59 bool operator== (CharPointer_UTF8 other) const noexcept { return data == other.data; }
60 bool operator!= (CharPointer_UTF8 other) const noexcept { return data != other.data; }
61 bool operator<= (CharPointer_UTF8 other) const noexcept { return data <= other.data; }
62 bool operator< (CharPointer_UTF8 other) const noexcept { return data < other.data; }
63 bool operator>= (CharPointer_UTF8 other) const noexcept { return data >= other.data; }
64 bool operator> (CharPointer_UTF8 other) const noexcept { return data > other.data; }
66 /** Returns the address that this pointer is pointing to. */
67 CharType* getAddress() const noexcept { return data; }
69 /** Returns the address that this pointer is pointing to. */
70 operator const CharType*() const noexcept { return data; }
72 /** Returns true if this pointer is pointing to a null character. */
73 bool isEmpty() const noexcept { return *data == 0; }
75 /** Returns true if this pointer is not pointing to a null character. */
76 bool isNotEmpty() const noexcept { return *data != 0; }
78 /** Returns the unicode character that this pointer is pointing to. */
79 juce_wchar operator*() const noexcept
81 auto byte = (signed char) *data;
83 if (byte >= 0)
84 return (juce_wchar) (uint8) byte;
86 uint32 n = (uint32) (uint8) byte;
87 uint32 mask = 0x7f;
88 uint32 bit = 0x40;
89 int numExtraValues = 0;
91 while ((n & bit) != 0 && bit > 0x8)
93 mask >>= 1;
94 ++numExtraValues;
95 bit >>= 1;
98 n &= mask;
100 for (int i = 1; i <= numExtraValues; ++i)
102 auto nextByte = (uint32) (uint8) data[i];
104 if ((nextByte & 0xc0) != 0x80)
105 break;
107 n <<= 6;
108 n |= (nextByte & 0x3f);
111 return (juce_wchar) n;
114 /** Moves this pointer along to the next character in the string. */
115 CharPointer_UTF8& operator++() noexcept
117 jassert (*data != 0); // trying to advance past the end of the string?
118 auto n = (signed char) *data++;
120 if (n < 0)
122 uint8 bit = 0x40;
124 while ((static_cast<uint8> (n) & bit) != 0 && bit > 0x8)
126 ++data;
127 bit = static_cast<uint8> (bit >> 1);
131 return *this;
134 /** Moves this pointer back to the previous character in the string. */
135 CharPointer_UTF8 operator--() noexcept
137 int count = 0;
139 while ((*--data & 0xc0) == 0x80 && ++count < 4)
142 return *this;
145 /** Returns the character that this pointer is currently pointing to, and then
146 advances the pointer to point to the next character. */
147 juce_wchar getAndAdvance() noexcept
149 auto byte = (signed char) *data++;
151 if (byte >= 0)
152 return (juce_wchar) (uint8) byte;
154 uint32 n = (uint32) (uint8) byte;
155 uint32 mask = 0x7f;
156 uint32 bit = 0x40;
157 int numExtraValues = 0;
159 while ((n & bit) != 0 && bit > 0x8)
161 mask >>= 1;
162 ++numExtraValues;
163 bit >>= 1;
166 n &= mask;
168 while (--numExtraValues >= 0)
170 auto nextByte = (uint32) (uint8) *data;
172 if ((nextByte & 0xc0) != 0x80)
173 break;
175 ++data;
176 n <<= 6;
177 n |= (nextByte & 0x3f);
180 return (juce_wchar) n;
183 /** Moves this pointer along to the next character in the string. */
184 CharPointer_UTF8 operator++ (int) noexcept
186 CharPointer_UTF8 temp (*this);
187 ++*this;
188 return temp;
191 /** Moves this pointer forwards by the specified number of characters. */
192 void operator+= (int numToSkip) noexcept
194 if (numToSkip < 0)
196 while (++numToSkip <= 0)
197 --*this;
199 else
201 while (--numToSkip >= 0)
202 ++*this;
206 /** Moves this pointer backwards by the specified number of characters. */
207 void operator-= (int numToSkip) noexcept
209 operator+= (-numToSkip);
212 /** Returns the character at a given character index from the start of the string. */
213 juce_wchar operator[] (int characterIndex) const noexcept
215 auto p (*this);
216 p += characterIndex;
217 return *p;
220 /** Returns a pointer which is moved forwards from this one by the specified number of characters. */
221 CharPointer_UTF8 operator+ (int numToSkip) const noexcept
223 auto p (*this);
224 p += numToSkip;
225 return p;
228 /** Returns a pointer which is moved backwards from this one by the specified number of characters. */
229 CharPointer_UTF8 operator- (int numToSkip) const noexcept
231 auto p (*this);
232 p += -numToSkip;
233 return p;
236 /** Returns the number of characters in this string. */
237 size_t length() const noexcept
239 auto* d = data;
240 size_t count = 0;
242 for (;;)
244 auto n = (uint32) (uint8) *d++;
246 if ((n & 0x80) != 0)
248 while ((*d & 0xc0) == 0x80)
249 ++d;
251 else if (n == 0)
252 break;
254 ++count;
257 return count;
260 /** Returns the number of characters in this string, or the given value, whichever is lower. */
261 size_t lengthUpTo (const size_t maxCharsToCount) const noexcept
263 return CharacterFunctions::lengthUpTo (*this, maxCharsToCount);
266 /** Returns the number of characters in this string, or up to the given end pointer, whichever is lower. */
267 size_t lengthUpTo (const CharPointer_UTF8 end) const noexcept
269 return CharacterFunctions::lengthUpTo (*this, end);
272 /** Returns the number of bytes that are used to represent this string.
273 This includes the terminating null character.
275 size_t sizeInBytes() const noexcept
277 JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6387)
278 jassert (data != nullptr);
279 return strlen (data) + 1;
280 JUCE_END_IGNORE_WARNINGS_MSVC
283 /** Returns the number of bytes that would be needed to represent the given
284 unicode character in this encoding format.
286 static size_t getBytesRequiredFor (const juce_wchar charToWrite) noexcept
288 size_t num = 1;
289 auto c = (uint32) charToWrite;
291 if (c >= 0x80)
293 ++num;
294 if (c >= 0x800)
296 ++num;
297 if (c >= 0x10000)
298 ++num;
302 return num;
305 /** Returns the number of bytes that would be needed to represent the given
306 string in this encoding format.
307 The value returned does NOT include the terminating null character.
309 template <class CharPointer>
310 static size_t getBytesRequiredFor (CharPointer text) noexcept
312 size_t count = 0;
314 while (auto n = text.getAndAdvance())
315 count += getBytesRequiredFor (n);
317 return count;
320 /** Returns a pointer to the null character that terminates this string. */
321 CharPointer_UTF8 findTerminatingNull() const noexcept
323 return CharPointer_UTF8 (data + strlen (data));
326 /** Writes a unicode character to this string, and advances this pointer to point to the next position. */
327 void write (const juce_wchar charToWrite) noexcept
329 auto c = (uint32) charToWrite;
331 if (c >= 0x80)
333 int numExtraBytes = 1;
334 if (c >= 0x800)
336 ++numExtraBytes;
337 if (c >= 0x10000)
338 ++numExtraBytes;
341 *data++ = (CharType) ((uint32) (0xff << (7 - numExtraBytes)) | (c >> (numExtraBytes * 6)));
343 while (--numExtraBytes >= 0)
344 *data++ = (CharType) (0x80 | (0x3f & (c >> (numExtraBytes * 6))));
346 else
348 *data++ = (CharType) c;
352 /** Writes a null character to this string (leaving the pointer's position unchanged). */
353 void writeNull() const noexcept
355 *data = 0;
358 /** Copies a source string to this pointer, advancing this pointer as it goes. */
359 template <typename CharPointer>
360 void writeAll (const CharPointer src) noexcept
362 CharacterFunctions::copyAll (*this, src);
365 /** Copies a source string to this pointer, advancing this pointer as it goes. */
366 void writeAll (const CharPointer_UTF8 src) noexcept
368 auto* s = src.data;
370 while ((*data = *s) != 0)
372 ++data;
373 ++s;
377 /** Copies a source string to this pointer, advancing this pointer as it goes.
378 The maxDestBytes parameter specifies the maximum number of bytes that can be written
379 to the destination buffer before stopping.
381 template <typename CharPointer>
382 size_t writeWithDestByteLimit (const CharPointer src, const size_t maxDestBytes) noexcept
384 return CharacterFunctions::copyWithDestByteLimit (*this, src, maxDestBytes);
387 /** Copies a source string to this pointer, advancing this pointer as it goes.
388 The maxChars parameter specifies the maximum number of characters that can be
389 written to the destination buffer before stopping (including the terminating null).
391 template <typename CharPointer>
392 void writeWithCharLimit (const CharPointer src, const int maxChars) noexcept
394 CharacterFunctions::copyWithCharLimit (*this, src, maxChars);
397 /** Compares this string with another one. */
398 template <typename CharPointer>
399 int compare (const CharPointer other) const noexcept
401 return CharacterFunctions::compare (*this, other);
404 /** Compares this string with another one, up to a specified number of characters. */
405 template <typename CharPointer>
406 int compareUpTo (const CharPointer other, const int maxChars) const noexcept
408 return CharacterFunctions::compareUpTo (*this, other, maxChars);
411 /** Compares this string with another one. */
412 template <typename CharPointer>
413 int compareIgnoreCase (const CharPointer other) const noexcept
415 return CharacterFunctions::compareIgnoreCase (*this, other);
418 /** Compares this string with another one. */
419 int compareIgnoreCase (const CharPointer_UTF8 other) const noexcept
421 return CharacterFunctions::compareIgnoreCase (*this, other);
424 /** Compares this string with another one, up to a specified number of characters. */
425 template <typename CharPointer>
426 int compareIgnoreCaseUpTo (const CharPointer other, const int maxChars) const noexcept
428 return CharacterFunctions::compareIgnoreCaseUpTo (*this, other, maxChars);
431 /** Returns the character index of a substring, or -1 if it isn't found. */
432 template <typename CharPointer>
433 int indexOf (const CharPointer stringToFind) const noexcept
435 return CharacterFunctions::indexOf (*this, stringToFind);
438 /** Returns the character index of a unicode character, or -1 if it isn't found. */
439 int indexOf (const juce_wchar charToFind) const noexcept
441 return CharacterFunctions::indexOfChar (*this, charToFind);
444 /** Returns the character index of a unicode character, or -1 if it isn't found. */
445 int indexOf (const juce_wchar charToFind, const bool ignoreCase) const noexcept
447 return ignoreCase ? CharacterFunctions::indexOfCharIgnoreCase (*this, charToFind)
448 : CharacterFunctions::indexOfChar (*this, charToFind);
451 /** Returns true if the first character of this string is whitespace. */
452 bool isWhitespace() const noexcept { return CharacterFunctions::isWhitespace ((juce_wchar) *(*this)); }
453 /** Returns true if the first character of this string is a digit. */
454 bool isDigit() const noexcept { const CharType c = *data; return c >= '0' && c <= '9'; }
455 /** Returns true if the first character of this string is a letter. */
456 bool isLetter() const noexcept { return CharacterFunctions::isLetter (operator*()) != 0; }
457 /** Returns true if the first character of this string is a letter or digit. */
458 bool isLetterOrDigit() const noexcept { return CharacterFunctions::isLetterOrDigit (operator*()) != 0; }
459 /** Returns true if the first character of this string is upper-case. */
460 bool isUpperCase() const noexcept { return CharacterFunctions::isUpperCase (operator*()) != 0; }
461 /** Returns true if the first character of this string is lower-case. */
462 bool isLowerCase() const noexcept { return CharacterFunctions::isLowerCase (operator*()) != 0; }
464 /** Returns an upper-case version of the first character of this string. */
465 juce_wchar toUpperCase() const noexcept { return CharacterFunctions::toUpperCase (operator*()); }
466 /** Returns a lower-case version of the first character of this string. */
467 juce_wchar toLowerCase() const noexcept { return CharacterFunctions::toLowerCase (operator*()); }
469 /** Parses this string as a 32-bit integer. */
470 int getIntValue32() const noexcept { return atoi (data); }
472 /** Parses this string as a 64-bit integer. */
473 int64 getIntValue64() const noexcept
475 #if JUCE_WINDOWS && ! JUCE_MINGW
476 return _atoi64 (data);
477 #else
478 return atoll (data);
479 #endif
482 /** Parses this string as a floating point double. */
483 double getDoubleValue() const noexcept { return CharacterFunctions::getDoubleValue (*this); }
485 /** Returns the first non-whitespace character in the string. */
486 CharPointer_UTF8 findEndOfWhitespace() const noexcept { return CharacterFunctions::findEndOfWhitespace (*this); }
488 /** Move this pointer to the first non-whitespace character in the string. */
489 void incrementToEndOfWhitespace() noexcept { CharacterFunctions::incrementToEndOfWhitespace (*this); }
491 /** Returns true if the given unicode character can be represented in this encoding. */
492 static bool canRepresent (juce_wchar character) noexcept
494 return ((uint32) character) < (uint32) 0x10ffff;
497 /** Returns true if this data contains a valid string in this encoding. */
498 static bool isValidString (const CharType* dataToTest, int maxBytesToRead)
500 while (--maxBytesToRead >= 0 && *dataToTest != 0)
502 auto byte = (signed char) *dataToTest++;
504 if (byte < 0)
506 int bit = 0x40;
507 int numExtraValues = 0;
509 while ((byte & bit) != 0)
511 if (bit < 8)
512 return false;
514 ++numExtraValues;
515 bit >>= 1;
517 if (bit == 8 && (numExtraValues > maxBytesToRead
518 || *CharPointer_UTF8 (dataToTest - 1) > 0x10ffff))
519 return false;
522 if (numExtraValues == 0)
523 return false;
525 maxBytesToRead -= numExtraValues;
526 if (maxBytesToRead < 0)
527 return false;
529 while (--numExtraValues >= 0)
530 if ((*dataToTest++ & 0xc0) != 0x80)
531 return false;
535 return true;
538 /** Atomically swaps this pointer for a new value, returning the previous value. */
539 CharPointer_UTF8 atomicSwap (const CharPointer_UTF8 newValue)
541 return CharPointer_UTF8 (reinterpret_cast<Atomic<CharType*>&> (data).exchange (newValue.data));
544 /** These values are the byte-order mark (BOM) values for a UTF-8 stream. */
545 enum
547 byteOrderMark1 = 0xef,
548 byteOrderMark2 = 0xbb,
549 byteOrderMark3 = 0xbf
552 /** Returns true if the first three bytes in this pointer are the UTF8 byte-order mark (BOM).
553 The pointer must not be null, and must point to at least 3 valid bytes.
555 static bool isByteOrderMark (const void* possibleByteOrder) noexcept
557 JUCE_BEGIN_IGNORE_WARNINGS_MSVC (28182)
558 jassert (possibleByteOrder != nullptr);
559 auto c = static_cast<const uint8*> (possibleByteOrder);
561 return c[0] == (uint8) byteOrderMark1
562 && c[1] == (uint8) byteOrderMark2
563 && c[2] == (uint8) byteOrderMark3;
564 JUCE_END_IGNORE_WARNINGS_MSVC
567 private:
568 CharType* data;
571 } // namespace juce