juce/source/src/text/juce_CharPointer_UTF16.h

   1 /*
   2   ==============================================================================
   3
   4    This file is part of the JUCE library - "Jules' Utility Class Extensions"
   5    Copyright 2004-11 by Raw Material Software Ltd.
   6
   7   ------------------------------------------------------------------------------
   8
   9    JUCE can be redistributed and/or modified under the terms of the GNU General
  10    Public License (Version 2), as published by the Free Software Foundation.
  11    A copy of the license is included in the JUCE distribution, or can be found
  12    online at www.gnu.org/licenses.
  13
  14    JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  15    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  16    A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  17
  18   ------------------------------------------------------------------------------
  19
  20    To release a closed-source product which uses JUCE, commercial licenses are
  21    available: visit www.rawmaterialsoftware.com/juce for more information.
  22
  23   ==============================================================================
  24 */
  25
  26 #ifndef __JUCE_CHARPOINTER_UTF16_JUCEHEADER__
  27 #define __JUCE_CHARPOINTER_UTF16_JUCEHEADER__
  28
  29
  30 //==============================================================================
  31 /**
  32     Wraps a pointer to a null-terminated UTF-16 character string, and provides
  33     various methods to operate on the data.
  34     @see CharPointer_UTF8, CharPointer_UTF32
  35 */
  36 class CharPointer_UTF16
  37 {
  38 public:
  39    #if JUCE_NATIVE_WCHAR_IS_UTF16
  40     typedef wchar_t CharType;
  41    #else
  42     typedef int16 CharType;
  43    #endif
  44
  45     inline explicit CharPointer_UTF16 (const CharType* const rawPointer) noexcept
  46         : data (const_cast <CharType*> (rawPointer))
  47     {
  48     }
  49
  50     inline CharPointer_UTF16 (const CharPointer_UTF16& other) noexcept
  51         : data (other.data)
  52     {
  53     }
  54
  55     inline CharPointer_UTF16& operator= (const CharPointer_UTF16& other) noexcept
  56     {
  57         data = other.data;
  58         return *this;
  59     }
  60
  61     inline CharPointer_UTF16& operator= (const CharType* text) noexcept
  62     {
  63         data = const_cast <CharType*> (text);
  64         return *this;
  65     }
  66
  67     /** This is a pointer comparison, it doesn't compare the actual text. */
  68     inline bool operator== (const CharPointer_UTF16& other) const noexcept { return data == other.data; }
  69     inline bool operator!= (const CharPointer_UTF16& other) const noexcept { return data != other.data; }
  70     inline bool operator<= (const CharPointer_UTF16& other) const noexcept { return data <= other.data; }
  71     inline bool operator<  (const CharPointer_UTF16& other) const noexcept { return data <  other.data; }
  72     inline bool operator>= (const CharPointer_UTF16& other) const noexcept { return data >= other.data; }
  73     inline bool operator>  (const CharPointer_UTF16& other) const noexcept { return data >  other.data; }
  74
  75     /** Returns the address that this pointer is pointing to. */
  76     inline CharType* getAddress() const noexcept        { return data; }
  77
  78     /** Returns the address that this pointer is pointing to. */
  79     inline operator const CharType*() const noexcept    { return data; }
  80
  81     /** Returns true if this pointer is pointing to a null character. */
  82     inline bool isEmpty() const noexcept                { return *data == 0; }
  83
  84     /** Returns the unicode character that this pointer is pointing to. */
  85     juce_wchar operator*() const noexcept
  86     {
  87         uint32 n = (uint32) (uint16) *data;
  88
  89         if (n >= 0xd800 && n <= 0xdfff && ((uint32) (uint16) data[1]) >= 0xdc00)
  90             n = 0x10000 + (((n - 0xd800) << 10) | (((uint32) (uint16) data[1]) - 0xdc00));
  91
  92         return (juce_wchar) n;
  93     }
  94
  95     /** Moves this pointer along to the next character in the string. */
  96     CharPointer_UTF16& operator++() noexcept
  97     {
  98         const juce_wchar n = *data++;
  99
 100         if (n >= 0xd800 && n <= 0xdfff && ((uint32) (uint16) *data) >= 0xdc00)
 101             ++data;
 102
 103         return *this;
 104     }
 105
 106     /** Moves this pointer back to the previous character in the string. */
 107     CharPointer_UTF16& operator--() noexcept
 108     {
 109         const juce_wchar n = *--data;
 110
 111         if (n >= 0xdc00 && n <= 0xdfff)
 112             --data;
 113
 114         return *this;
 115     }
 116
 117     /** Returns the character that this pointer is currently pointing to, and then
 118         advances the pointer to point to the next character. */
 119     juce_wchar getAndAdvance() noexcept
 120     {
 121         uint32 n = (uint32) (uint16) *data++;
 122
 123         if (n >= 0xd800 && n <= 0xdfff && ((uint32) (uint16) *data) >= 0xdc00)
 124             n = 0x10000 + ((((n - 0xd800) << 10) | (((uint32) (uint16) *data++) - 0xdc00)));
 125
 126         return (juce_wchar) n;
 127     }
 128
 129     /** Moves this pointer along to the next character in the string. */
 130     CharPointer_UTF16 operator++ (int) noexcept
 131     {
 132         CharPointer_UTF16 temp (*this);
 133         ++*this;
 134         return temp;
 135     }
 136
 137     /** Moves this pointer forwards by the specified number of characters. */
 138     void operator+= (int numToSkip) noexcept
 139     {
 140         if (numToSkip < 0)
 141         {
 142             while (++numToSkip <= 0)
 143                 --*this;
 144         }
 145         else
 146         {
 147             while (--numToSkip >= 0)
 148                 ++*this;
 149         }
 150     }
 151
 152     /** Moves this pointer backwards by the specified number of characters. */
 153     void operator-= (int numToSkip) noexcept
 154     {
 155         operator+= (-numToSkip);
 156     }
 157
 158     /** Returns the character at a given character index from the start of the string. */
 159     juce_wchar operator[] (const int characterIndex) const noexcept
 160     {
 161         CharPointer_UTF16 p (*this);
 162         p += characterIndex;
 163         return *p;
 164     }
 165
 166     /** Returns a pointer which is moved forwards from this one by the specified number of characters. */
 167     CharPointer_UTF16 operator+ (const int numToSkip) const noexcept
 168     {
 169         CharPointer_UTF16 p (*this);
 170         p += numToSkip;
 171         return p;
 172     }
 173
 174     /** Returns a pointer which is moved backwards from this one by the specified number of characters. */
 175     CharPointer_UTF16 operator- (const int numToSkip) const noexcept
 176     {
 177         CharPointer_UTF16 p (*this);
 178         p += -numToSkip;
 179         return p;
 180     }
 181
 182     /** Writes a unicode character to this string, and advances this pointer to point to the next position. */
 183     void write (juce_wchar charToWrite) noexcept
 184     {
 185         if (charToWrite >= 0x10000)
 186         {
 187             charToWrite -= 0x10000;
 188             *data++ = (CharType) (0xd800 + (charToWrite >> 10));
 189             *data++ = (CharType) (0xdc00 + (charToWrite & 0x3ff));
 190         }
 191         else
 192         {
 193             *data++ = (CharType) charToWrite;
 194         }
 195     }
 196
 197     /** Writes a null character to this string (leaving the pointer's position unchanged). */
 198     inline void writeNull() const noexcept
 199     {
 200         *data = 0;
 201     }
 202
 203     /** Returns the number of characters in this string. */
 204     size_t length() const noexcept
 205     {
 206         const CharType* d = data;
 207         size_t count = 0;
 208
 209         for (;;)
 210         {
 211             const int n = *d++;
 212
 213             if (n >= 0xd800 && n <= 0xdfff)
 214             {
 215                 if (*d++ == 0)
 216                     break;
 217             }
 218             else if (n == 0)
 219                 break;
 220
 221             ++count;
 222         }
 223
 224         return count;
 225     }
 226
 227     /** Returns the number of characters in this string, or the given value, whichever is lower. */
 228     size_t lengthUpTo (const size_t maxCharsToCount) const noexcept
 229     {
 230         return CharacterFunctions::lengthUpTo (*this, maxCharsToCount);
 231     }
 232
 233     /** Returns the number of characters in this string, or up to the given end pointer, whichever is lower. */
 234     size_t lengthUpTo (const CharPointer_UTF16& end) const noexcept
 235     {
 236         return CharacterFunctions::lengthUpTo (*this, end);
 237     }
 238
 239     /** Returns the number of bytes that are used to represent this string.
 240         This includes the terminating null character.
 241     */
 242     size_t sizeInBytes() const noexcept
 243     {
 244         return sizeof (CharType) * (findNullIndex (data) + 1);
 245     }
 246
 247     /** Returns the number of bytes that would be needed to represent the given
 248         unicode character in this encoding format.
 249     */
 250     static size_t getBytesRequiredFor (const juce_wchar charToWrite) noexcept
 251     {
 252         return (charToWrite >= 0x10000) ? (sizeof (CharType) * 2) : sizeof (CharType);
 253     }
 254
 255     /** Returns the number of bytes that would be needed to represent the given
 256         string in this encoding format.
 257         The value returned does NOT include the terminating null character.
 258     */
 259     template <class CharPointer>
 260     static size_t getBytesRequiredFor (CharPointer text) noexcept
 261     {
 262         size_t count = 0;
 263         juce_wchar n;
 264
 265         while ((n = text.getAndAdvance()) != 0)
 266             count += getBytesRequiredFor (n);
 267
 268         return count;
 269     }
 270
 271     /** Returns a pointer to the null character that terminates this string. */
 272     CharPointer_UTF16 findTerminatingNull() const noexcept
 273     {
 274         const CharType* t = data;
 275
 276         while (*t != 0)
 277             ++t;
 278
 279         return CharPointer_UTF16 (t);
 280     }
 281
 282     /** Copies a source string to this pointer, advancing this pointer as it goes. */
 283     template <typename CharPointer>
 284     void writeAll (const CharPointer& src) noexcept
 285     {
 286         CharacterFunctions::copyAll (*this, src);
 287     }
 288
 289     /** Copies a source string to this pointer, advancing this pointer as it goes. */
 290     void writeAll (const CharPointer_UTF16& src) noexcept
 291     {
 292         const CharType* s = src.data;
 293
 294         while ((*data = *s) != 0)
 295         {
 296             ++data;
 297             ++s;
 298         }
 299     }
 300
 301     /** Copies a source string to this pointer, advancing this pointer as it goes.
 302         The maxDestBytes parameter specifies the maximum number of bytes that can be written
 303         to the destination buffer before stopping.
 304     */
 305     template <typename CharPointer>
 306     int writeWithDestByteLimit (const CharPointer& src, const int maxDestBytes) noexcept
 307     {
 308         return CharacterFunctions::copyWithDestByteLimit (*this, src, maxDestBytes);
 309     }
 310
 311     /** Copies a source string to this pointer, advancing this pointer as it goes.
 312         The maxChars parameter specifies the maximum number of characters that can be
 313         written to the destination buffer before stopping (including the terminating null).
 314     */
 315     template <typename CharPointer>
 316     void writeWithCharLimit (const CharPointer& src, const int maxChars) noexcept
 317     {
 318         CharacterFunctions::copyWithCharLimit (*this, src, maxChars);
 319     }
 320
 321     /** Compares this string with another one. */
 322     template <typename CharPointer>
 323     int compare (const CharPointer& other) const noexcept
 324     {
 325         return CharacterFunctions::compare (*this, other);
 326     }
 327
 328     /** Compares this string with another one, up to a specified number of characters. */
 329     template <typename CharPointer>
 330     int compareUpTo (const CharPointer& other, const int maxChars) const noexcept
 331     {
 332         return CharacterFunctions::compareUpTo (*this, other, maxChars);
 333     }
 334
 335     /** Compares this string with another one. */
 336     template <typename CharPointer>
 337     int compareIgnoreCase (const CharPointer& other) const noexcept
 338     {
 339         return CharacterFunctions::compareIgnoreCase (*this, other);
 340     }
 341
 342     /** Compares this string with another one, up to a specified number of characters. */
 343     template <typename CharPointer>
 344     int compareIgnoreCaseUpTo (const CharPointer& other, const int maxChars) const noexcept
 345     {
 346         return CharacterFunctions::compareIgnoreCaseUpTo (*this, other, maxChars);
 347     }
 348
 349    #if JUCE_WINDOWS && ! DOXYGEN
 350     int compareIgnoreCase (const CharPointer_UTF16& other) const noexcept
 351     {
 352         return _wcsicmp (data, other.data);
 353     }
 354
 355     int compareIgnoreCaseUpTo (const CharPointer_UTF16& other, int maxChars) const noexcept
 356     {
 357         return _wcsnicmp (data, other.data, maxChars);
 358     }
 359
 360     int indexOf (const CharPointer_UTF16& stringToFind) const noexcept
 361     {
 362         const CharType* const t = wcsstr (data, stringToFind.getAddress());
 363         return t == nullptr ? -1 : (int) (t - data);
 364     }
 365    #endif
 366
 367     /** Returns the character index of a substring, or -1 if it isn't found. */
 368     template <typename CharPointer>
 369     int indexOf (const CharPointer& stringToFind) const noexcept
 370     {
 371         return CharacterFunctions::indexOf (*this, stringToFind);
 372     }
 373
 374     /** Returns the character index of a unicode character, or -1 if it isn't found. */
 375     int indexOf (const juce_wchar charToFind) const noexcept
 376     {
 377         return CharacterFunctions::indexOfChar (*this, charToFind);
 378     }
 379
 380     /** Returns the character index of a unicode character, or -1 if it isn't found. */
 381     int indexOf (const juce_wchar charToFind, const bool ignoreCase) const noexcept
 382     {
 383         return ignoreCase ? CharacterFunctions::indexOfCharIgnoreCase (*this, charToFind)
 384                           : CharacterFunctions::indexOfChar (*this, charToFind);
 385     }
 386
 387     /** Returns true if the first character of this string is whitespace. */
 388     bool isWhitespace() const noexcept      { return CharacterFunctions::isWhitespace (operator*()) != 0; }
 389     /** Returns true if the first character of this string is a digit. */
 390     bool isDigit() const noexcept           { return CharacterFunctions::isDigit (operator*()) != 0; }
 391     /** Returns true if the first character of this string is a letter. */
 392     bool isLetter() const noexcept          { return CharacterFunctions::isLetter (operator*()) != 0; }
 393     /** Returns true if the first character of this string is a letter or digit. */
 394     bool isLetterOrDigit() const noexcept   { return CharacterFunctions::isLetterOrDigit (operator*()) != 0; }
 395     /** Returns true if the first character of this string is upper-case. */
 396     bool isUpperCase() const noexcept       { return CharacterFunctions::isUpperCase (operator*()) != 0; }
 397     /** Returns true if the first character of this string is lower-case. */
 398     bool isLowerCase() const noexcept       { return CharacterFunctions::isLowerCase (operator*()) != 0; }
 399
 400     /** Returns an upper-case version of the first character of this string. */
 401     juce_wchar toUpperCase() const noexcept { return CharacterFunctions::toUpperCase (operator*()); }
 402     /** Returns a lower-case version of the first character of this string. */
 403     juce_wchar toLowerCase() const noexcept { return CharacterFunctions::toLowerCase (operator*()); }
 404
 405     /** Parses this string as a 32-bit integer. */
 406     int getIntValue32() const noexcept
 407     {
 408        #if JUCE_WINDOWS
 409         return _wtoi (data);
 410        #else
 411         return CharacterFunctions::getIntValue <int, CharPointer_UTF16> (*this);
 412        #endif
 413     }
 414
 415     /** Parses this string as a 64-bit integer. */
 416     int64 getIntValue64() const noexcept
 417     {
 418        #if JUCE_WINDOWS
 419         return _wtoi64 (data);
 420        #else
 421         return CharacterFunctions::getIntValue <int64, CharPointer_UTF16> (*this);
 422        #endif
 423     }
 424
 425     /** Parses this string as a floating point double. */
 426     double getDoubleValue() const noexcept  { return CharacterFunctions::getDoubleValue (*this); }
 427
 428     /** Returns the first non-whitespace character in the string. */
 429     CharPointer_UTF16 findEndOfWhitespace() const noexcept   { return CharacterFunctions::findEndOfWhitespace (*this); }
 430
 431     /** Returns true if the given unicode character can be represented in this encoding. */
 432     static bool canRepresent (juce_wchar character) noexcept
 433     {
 434         return ((unsigned int) character) < (unsigned int) 0x10ffff
 435                  && (((unsigned int) character) < 0xd800 || ((unsigned int) character) > 0xdfff);
 436     }
 437
 438     /** Returns true if this data contains a valid string in this encoding. */
 439     static bool isValidString (const CharType* dataToTest, int maxBytesToRead)
 440     {
 441         maxBytesToRead /= sizeof (CharType);
 442
 443         while (--maxBytesToRead >= 0 && *dataToTest != 0)
 444         {
 445             const uint32 n = (uint32) (uint16) *dataToTest++;
 446
 447             if (n >= 0xd800)
 448             {
 449                 if (n > 0x10ffff)
 450                     return false;
 451
 452                 if (n <= 0xdfff)
 453                 {
 454                     if (n > 0xdc00)
 455                         return false;
 456
 457                     const uint32 nextChar = (uint32) (uint16) *dataToTest++;
 458
 459                     if (nextChar < 0xdc00 || nextChar > 0xdfff)
 460                         return false;
 461                 }
 462             }
 463         }
 464
 465         return true;
 466     }
 467
 468     /** Atomically swaps this pointer for a new value, returning the previous value. */
 469     CharPointer_UTF16 atomicSwap (const CharPointer_UTF16& newValue)
 470     {
 471         return CharPointer_UTF16 (reinterpret_cast <Atomic<CharType*>&> (data).exchange (newValue.data));
 472     }
 473
 474     /** These values are the byte-order-mark (BOM) values for a UTF-16 stream. */
 475     enum
 476     {
 477         byteOrderMarkBE1 = 0xfe,
 478         byteOrderMarkBE2 = 0xff,
 479         byteOrderMarkLE1 = 0xff,
 480         byteOrderMarkLE2 = 0xfe
 481     };
 482
 483 private:
 484     CharType* data;
 485
 486     static int findNullIndex (const CharType* const t) noexcept
 487     {
 488         int n = 0;
 489
 490         while (t[n] != 0)
 491             ++n;
 492
 493         return n;
 494     }
 495 };
 496
 497
 498 #endif   // __JUCE_CHARPOINTER_UTF16_JUCEHEADER__