Cleanup
[carla.git] / source / modules / water / text / CharPointer_UTF8.h
blob6f9e96cc7fee9ba1fab79ef6f054d1b299b7a932
1 /*
2 ==============================================================================
4 This file is part of the Water library.
5 Copyright (c) 2016 ROLI Ltd.
6 Copyright (C) 2017 Filipe Coelho <falktx@falktx.com>
8 Permission is granted to use this software under the terms of the ISC license
9 http://www.isc.org/downloads/software-support-policy/isc-license/
11 Permission to use, copy, modify, and/or distribute this software for any
12 purpose with or without fee is hereby granted, provided that the above
13 copyright notice and this permission notice appear in all copies.
15 THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH REGARD
16 TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
18 OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
19 USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
20 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
21 OF THIS SOFTWARE.
23 ==============================================================================
26 #ifndef WATER_CHARPOINTER_UTF8_H_INCLUDED
27 #define WATER_CHARPOINTER_UTF8_H_INCLUDED
29 #include "CharacterFunctions.h"
30 #include "../memory/Atomic.h"
32 #include "CarlaUtils.hpp"
34 namespace water {
36 class String;
38 //==============================================================================
39 /**
40 Wraps a pointer to a null-terminated UTF-8 character string, and provides
41 various methods to operate on the data.
42 @see CharPointer_UTF16, CharPointer_UTF32
44 class CharPointer_UTF8
46 public:
47 typedef char CharType;
49 inline explicit CharPointer_UTF8 (const CharType* const rawPointer) noexcept
50 : data (const_cast<CharType*> (rawPointer))
54 inline CharPointer_UTF8 (const CharPointer_UTF8& other) noexcept
55 : data (other.data)
59 inline CharPointer_UTF8& operator= (CharPointer_UTF8 other) noexcept
61 data = other.data;
62 return *this;
65 inline CharPointer_UTF8& operator= (const CharType* text) noexcept
67 data = const_cast<CharType*> (text);
68 return *this;
71 /** This is a pointer comparison, it doesn't compare the actual text. */
72 inline bool operator== (CharPointer_UTF8 other) const noexcept { return data == other.data; }
73 inline bool operator!= (CharPointer_UTF8 other) const noexcept { return data != other.data; }
74 inline bool operator<= (CharPointer_UTF8 other) const noexcept { return data <= other.data; }
75 inline bool operator< (CharPointer_UTF8 other) const noexcept { return data < other.data; }
76 inline bool operator>= (CharPointer_UTF8 other) const noexcept { return data >= other.data; }
77 inline bool operator> (CharPointer_UTF8 other) const noexcept { return data > other.data; }
79 /** Returns the address that this pointer is pointing to. */
80 inline CharType* getAddress() const noexcept { return data; }
82 /** Returns the address that this pointer is pointing to. */
83 inline operator const CharType*() const noexcept { return data; }
85 /** Returns true if this pointer is pointing to a null character. */
86 inline bool isEmpty() const noexcept { return *data == 0; }
88 /** Returns the unicode character that this pointer is pointing to. */
89 water_uchar operator*() const noexcept
91 const signed char byte = (signed char) *data;
93 if (byte >= 0)
94 return (water_uchar) (uint8) byte;
96 uint32 n = (uint32) (uint8) byte;
97 uint32 mask = 0x7f;
98 uint32 bit = 0x40;
99 int numExtraValues = 0;
101 while ((n & bit) != 0 && bit > 0x8)
103 mask >>= 1;
104 ++numExtraValues;
105 bit >>= 1;
108 n &= mask;
110 for (int i = 1; i <= numExtraValues; ++i)
112 const uint32 nextByte = (uint32) (uint8) data[i];
114 if ((nextByte & 0xc0) != 0x80)
115 break;
117 n <<= 6;
118 n |= (nextByte & 0x3f);
121 return (water_uchar) n;
124 /** Moves this pointer along to the next character in the string. */
125 CharPointer_UTF8& operator++() noexcept
127 wassert (*data != 0); // trying to advance past the end of the string?
128 const signed char n = (signed char) *data++;
130 if (n < 0)
132 water_uchar bit = 0x40;
134 while ((static_cast<unsigned char>(n) & bit) != 0 && bit > 0x8)
136 ++data;
137 bit >>= 1;
141 return *this;
144 /** Moves this pointer back to the previous character in the string. */
145 CharPointer_UTF8& operator--() noexcept
147 int count = 0;
149 while ((*--data & 0xc0) == 0x80 && ++count < 4)
152 return *this;
155 /** Returns the character that this pointer is currently pointing to, and then
156 advances the pointer to point to the next character. */
157 water_uchar getAndAdvance() noexcept
159 const signed char byte = (signed char) *data++;
161 if (byte >= 0)
162 return (water_uchar) (uint8) byte;
164 uint32 n = (uint32) (uint8) byte;
165 uint32 mask = 0x7f;
166 uint32 bit = 0x40;
167 int numExtraValues = 0;
169 while ((n & bit) != 0 && bit > 0x8)
171 mask >>= 1;
172 ++numExtraValues;
173 bit >>= 1;
176 n &= mask;
178 while (--numExtraValues >= 0)
180 const uint32 nextByte = (uint32) (uint8) *data;
182 if ((nextByte & 0xc0) != 0x80)
183 break;
185 ++data;
186 n <<= 6;
187 n |= (nextByte & 0x3f);
190 return (water_uchar) n;
193 /** Moves this pointer along to the next character in the string. */
194 CharPointer_UTF8 operator++ (int) noexcept
196 CharPointer_UTF8 temp (*this);
197 ++*this;
198 return temp;
201 /** Moves this pointer forwards by the specified number of characters. */
202 void operator+= (int numToSkip) noexcept
204 if (numToSkip < 0)
206 while (++numToSkip <= 0)
207 --*this;
209 else
211 while (--numToSkip >= 0)
212 ++*this;
216 /** Moves this pointer backwards by the specified number of characters. */
217 void operator-= (int numToSkip) noexcept
219 operator+= (-numToSkip);
222 /** Returns the character at a given character index from the start of the string. */
223 water_uchar operator[] (int characterIndex) const noexcept
225 CharPointer_UTF8 p (*this);
226 p += characterIndex;
227 return *p;
230 /** Returns a pointer which is moved forwards from this one by the specified number of characters. */
231 CharPointer_UTF8 operator+ (int numToSkip) const noexcept
233 CharPointer_UTF8 p (*this);
234 p += numToSkip;
235 return p;
238 /** Returns a pointer which is moved backwards from this one by the specified number of characters. */
239 CharPointer_UTF8 operator- (int numToSkip) const noexcept
241 CharPointer_UTF8 p (*this);
242 p += -numToSkip;
243 return p;
246 /** Returns the number of characters in this string. */
247 size_t length() const noexcept
249 const CharType* d = data;
250 size_t count = 0;
252 for (;;)
254 const uint32 n = (uint32) (uint8) *d++;
256 if ((n & 0x80) != 0)
258 while ((*d & 0xc0) == 0x80)
259 ++d;
261 else if (n == 0)
262 break;
264 ++count;
267 return count;
270 /** Returns the number of characters in this string, or the given value, whichever is lower. */
271 size_t lengthUpTo (const size_t maxCharsToCount) const noexcept
273 return CharacterFunctions::lengthUpTo (*this, maxCharsToCount);
276 /** Returns the number of characters in this string, or up to the given end pointer, whichever is lower. */
277 size_t lengthUpTo (const CharPointer_UTF8 end) const noexcept
279 return CharacterFunctions::lengthUpTo (*this, end);
282 /** Returns the number of bytes that are used to represent this string.
283 This includes the terminating null character.
285 size_t sizeInBytes() const noexcept
287 wassert (data != nullptr);
288 return strlen (data) + 1;
291 /** Returns the number of bytes that would be needed to represent the given
292 unicode character in this encoding format.
294 static size_t getBytesRequiredFor (const water_uchar charToWrite) noexcept
296 size_t num = 1;
297 const uint32 c = (uint32) charToWrite;
299 if (c >= 0x80)
301 ++num;
302 if (c >= 0x800)
304 ++num;
305 if (c >= 0x10000)
306 ++num;
310 return num;
313 /** Returns the number of bytes that would be needed to represent the given
314 string in this encoding format.
315 The value returned does NOT include the terminating null character.
317 template <class CharPointer>
318 static size_t getBytesRequiredFor (CharPointer text) noexcept
320 size_t count = 0;
322 while (water_uchar n = text.getAndAdvance())
323 count += getBytesRequiredFor (n);
325 return count;
328 /** Returns a pointer to the null character that terminates this string. */
329 CharPointer_UTF8 findTerminatingNull() const noexcept
331 return CharPointer_UTF8 (data + strlen (data));
334 /** Writes a unicode character to this string, and advances this pointer to point to the next position. */
335 void write (const water_uchar charToWrite) noexcept
337 const uint32 c = (uint32) charToWrite;
339 if (c >= 0x80)
341 int numExtraBytes = 1;
342 if (c >= 0x800)
344 ++numExtraBytes;
345 if (c >= 0x10000)
346 ++numExtraBytes;
349 *data++ = (CharType) ((uint32) (0xff << (7 - numExtraBytes)) | (c >> (numExtraBytes * 6)));
351 while (--numExtraBytes >= 0)
352 *data++ = (CharType) (0x80 | (0x3f & (c >> (numExtraBytes * 6))));
354 else
356 *data++ = (CharType) c;
360 /** Writes a null character to this string (leaving the pointer's position unchanged). */
361 inline void writeNull() const noexcept
363 *data = 0;
366 /** Copies a source string to this pointer, advancing this pointer as it goes. */
367 template <typename CharPointer>
368 void writeAll (const CharPointer src) noexcept
370 CharacterFunctions::copyAll (*this, src);
373 /** Copies a source string to this pointer, advancing this pointer as it goes. */
374 void writeAll (const CharPointer_UTF8 src) noexcept
376 const CharType* s = src.data;
378 while ((*data = *s) != 0)
380 ++data;
381 ++s;
385 /** Copies a source string to this pointer, advancing this pointer as it goes.
386 The maxDestBytes parameter specifies the maximum number of bytes that can be written
387 to the destination buffer before stopping.
389 template <typename CharPointer>
390 size_t writeWithDestByteLimit (const CharPointer src, const size_t maxDestBytes) noexcept
392 return CharacterFunctions::copyWithDestByteLimit (*this, src, maxDestBytes);
395 /** Copies a source string to this pointer, advancing this pointer as it goes.
396 The maxChars parameter specifies the maximum number of characters that can be
397 written to the destination buffer before stopping (including the terminating null).
399 template <typename CharPointer>
400 void writeWithCharLimit (const CharPointer src, const int maxChars) noexcept
402 CharacterFunctions::copyWithCharLimit (*this, src, maxChars);
405 /** Compares this string with another one. */
406 template <typename CharPointer>
407 int compare (const CharPointer other) const noexcept
409 return CharacterFunctions::compare (*this, other);
412 /** Compares this string with another one, up to a specified number of characters. */
413 template <typename CharPointer>
414 int compareUpTo (const CharPointer other, const int maxChars) const noexcept
416 return CharacterFunctions::compareUpTo (*this, other, maxChars);
419 /** Compares this string with another one. */
420 template <typename CharPointer>
421 int compareIgnoreCase (const CharPointer other) const noexcept
423 return CharacterFunctions::compareIgnoreCase (*this, other);
426 /** Compares this string with another one. */
427 int compareIgnoreCase (const CharPointer_UTF8 other) const noexcept
429 return CharacterFunctions::compareIgnoreCase (*this, other);
432 /** Compares this string with another one, up to a specified number of characters. */
433 template <typename CharPointer>
434 int compareIgnoreCaseUpTo (const CharPointer other, const int maxChars) const noexcept
436 return CharacterFunctions::compareIgnoreCaseUpTo (*this, other, maxChars);
439 /** Returns the character index of a substring, or -1 if it isn't found. */
440 template <typename CharPointer>
441 int indexOf (const CharPointer stringToFind) const noexcept
443 return CharacterFunctions::indexOf (*this, stringToFind);
446 /** Returns the character index of a unicode character, or -1 if it isn't found. */
447 int indexOf (const water_uchar charToFind) const noexcept
449 return CharacterFunctions::indexOfChar (*this, charToFind);
452 /** Returns the character index of a unicode character, or -1 if it isn't found. */
453 int indexOf (const water_uchar charToFind, const bool ignoreCase) const noexcept
455 return ignoreCase ? CharacterFunctions::indexOfCharIgnoreCase (*this, charToFind)
456 : CharacterFunctions::indexOfChar (*this, charToFind);
459 /** Returns true if the first character of this string is whitespace. */
460 bool isWhitespace() const noexcept { const CharType c = *data; return c == ' ' || (c <= 13 && c >= 9); }
461 /** Returns true if the first character of this string is a digit. */
462 bool isDigit() const noexcept { const CharType c = *data; return c >= '0' && c <= '9'; }
463 /** Returns true if the first character of this string is a letter. */
464 bool isLetter() const noexcept { return CharacterFunctions::isLetter (operator*()) != 0; }
465 /** Returns true if the first character of this string is a letter or digit. */
466 bool isLetterOrDigit() const noexcept { return CharacterFunctions::isLetterOrDigit (operator*()) != 0; }
467 /** Returns true if the first character of this string is upper-case. */
468 bool isUpperCase() const noexcept { return CharacterFunctions::isUpperCase (operator*()) != 0; }
469 /** Returns true if the first character of this string is lower-case. */
470 bool isLowerCase() const noexcept { return CharacterFunctions::isLowerCase (operator*()) != 0; }
472 /** Returns an upper-case version of the first character of this string. */
473 water_uchar toUpperCase() const noexcept { return CharacterFunctions::toUpperCase (operator*()); }
474 /** Returns a lower-case version of the first character of this string. */
475 water_uchar toLowerCase() const noexcept { return CharacterFunctions::toLowerCase (operator*()); }
477 /** Parses this string as a 32-bit integer. */
478 int getIntValue32() const noexcept { return atoi (data); }
480 /** Parses this string as a 64-bit integer. */
481 int64 getIntValue64() const noexcept
483 return atoll (data);
484 #if 0
485 return CharacterFunctions::getIntValue <int64, CharPointer_UTF8> (*this);
486 #endif
489 /** Parses this string as a floating point double. */
490 double getDoubleValue() const noexcept { return CharacterFunctions::getDoubleValue (*this); }
492 /** Returns the first non-whitespace character in the string. */
493 CharPointer_UTF8 findEndOfWhitespace() const noexcept { return CharacterFunctions::findEndOfWhitespace (*this); }
495 /** Returns true if the given unicode character can be represented in this encoding. */
496 static bool canRepresent (water_uchar character) noexcept
498 return ((unsigned int) character) < (unsigned int) 0x10ffff;
501 /** Returns true if this data contains a valid string in this encoding. */
502 static bool isValidString (const CharType* dataToTest, int maxBytesToRead)
504 while (--maxBytesToRead >= 0 && *dataToTest != 0)
506 const signed char byte = (signed char) *dataToTest++;
508 if (byte < 0)
510 int bit = 0x40;
511 int numExtraValues = 0;
513 while ((byte & bit) != 0)
515 if (bit < 8)
516 return false;
518 ++numExtraValues;
519 bit >>= 1;
521 if (bit == 8 && (numExtraValues > maxBytesToRead
522 || *CharPointer_UTF8 (dataToTest - 1) > 0x10ffff))
523 return false;
526 if (numExtraValues == 0)
527 return false;
529 maxBytesToRead -= numExtraValues;
530 if (maxBytesToRead < 0)
531 return false;
533 while (--numExtraValues >= 0)
534 if ((*dataToTest++ & 0xc0) != 0x80)
535 return false;
539 return true;
542 /** These values are the byte-order mark (BOM) values for a UTF-8 stream. */
543 enum
545 byteOrderMark1 = 0xef,
546 byteOrderMark2 = 0xbb,
547 byteOrderMark3 = 0xbf
550 /** Returns true if the first three bytes in this pointer are the UTF8 byte-order mark (BOM).
551 The pointer must not be null, and must point to at least 3 valid bytes.
553 static bool isByteOrderMark (const void* possibleByteOrder) noexcept
555 wassert (possibleByteOrder != nullptr);
556 const uint8* const c = static_cast<const uint8*> (possibleByteOrder);
558 return c[0] == (uint8) byteOrderMark1
559 && c[1] == (uint8) byteOrderMark2
560 && c[2] == (uint8) byteOrderMark3;
563 private:
564 CharType* data;
566 friend class String;
571 #endif // WATER_CHARPOINTER_UTF8_H_INCLUDED