xpcom/string/src/nsReadableUtils.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* ***** BEGIN LICENSE BLOCK *****
   3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   4  *
   5  * The contents of this file are subject to the Mozilla Public License Version
   6  * 1.1 (the "License"); you may not use this file except in compliance with
   7  * the License. You may obtain a copy of the License at
   8  * http://www.mozilla.org/MPL/
   9  *
  10  * Software distributed under the License is distributed on an "AS IS" basis,
  11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12  * for the specific language governing rights and limitations under the
  13  * License.
  14  *
  15  * The Original Code is mozilla.org code.
  16  *
  17  * The Initial Developer of the Original Code is
  18  * Netscape Communications Corporation.
  19  * Portions created by the Initial Developer are Copyright (C) 2000
  20  * the Initial Developer. All Rights Reserved.
  21  *
  22  * Contributor(s):
  23  *   Scott Collins <scc@mozilla.org> (original author)
  24  *
  25  * Alternatively, the contents of this file may be used under the terms of
  26  * either of the GNU General Public License Version 2 or later (the "GPL"),
  27  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  28  * in which case the provisions of the GPL or the LGPL are applicable instead
  29  * of those above. If you wish to allow use of your version of this file only
  30  * under the terms of either the GPL or the LGPL, and not to allow others to
  31  * use your version of this file under the terms of the MPL, indicate your
  32  * decision by deleting the provisions above and replace them with the notice
  33  * and other provisions required by the GPL or the LGPL. If you do not delete
  34  * the provisions above, a recipient may use your version of this file under
  35  * the terms of any one of the MPL, the GPL or the LGPL.
  36  *
  37  * ***** END LICENSE BLOCK ***** */
  38
  39 #include "nsReadableUtils.h"
  40 #include "nsMemory.h"
  41 #include "nsString.h"
  42 #include "nsUTF8Utils.h"
  43
  44 NS_COM
  45 void
  46 LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest )
  47   {
  48     aDest.Truncate();
  49     LossyAppendUTF16toASCII(aSource, aDest);
  50   }
  51
  52 NS_COM
  53 void
  54 CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest )
  55   {
  56     aDest.Truncate();
  57     AppendASCIItoUTF16(aSource, aDest);
  58   }
  59
  60 NS_COM
  61 void
  62 LossyCopyUTF16toASCII( const PRUnichar* aSource, nsACString& aDest )
  63   {
  64     aDest.Truncate();
  65     if (aSource) {
  66       LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
  67     }
  68   }
  69
  70 NS_COM
  71 void
  72 CopyASCIItoUTF16( const char* aSource, nsAString& aDest )
  73   {
  74     aDest.Truncate();
  75     if (aSource) {
  76       AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
  77     }
  78   }
  79
  80 NS_COM
  81 void
  82 CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest )
  83   {
  84     aDest.Truncate();
  85     AppendUTF16toUTF8(aSource, aDest);
  86   }
  87
  88 NS_COM
  89 void
  90 CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest )
  91   {
  92     aDest.Truncate();
  93     AppendUTF8toUTF16(aSource, aDest);
  94   }
  95
  96 NS_COM
  97 void
  98 CopyUTF16toUTF8( const PRUnichar* aSource, nsACString& aDest )
  99   {
 100     aDest.Truncate();
 101     AppendUTF16toUTF8(aSource, aDest);
 102   }
 103
 104 NS_COM
 105 void
 106 CopyUTF8toUTF16( const char* aSource, nsAString& aDest )
 107   {
 108     aDest.Truncate();
 109     AppendUTF8toUTF16(aSource, aDest);
 110   }
 111
 112 // Like GetMutableData, but returns false if it can't
 113 // allocate enough memory (e.g. due to OOM) rather than
 114 // returning zero (which could have other meanings) and
 115 // throws away the out-param pointer.
 116 PRBool
 117 SetLengthForWriting(nsAString& aDest, PRUint32 aDesiredLength)
 118   {
 119     PRUnichar* dummy;
 120     PRUint32 len = aDest.GetMutableData(&dummy, aDesiredLength);
 121     return (len >= aDesiredLength);
 122   }
 123
 124 PRBool
 125 SetLengthForWritingC(nsACString& aDest, PRUint32 aDesiredLength)
 126   {
 127     char* dummy;
 128     PRUint32 len = aDest.GetMutableData(&dummy, aDesiredLength);
 129     return (len >= aDesiredLength);
 130   }
 131
 132
 133 NS_COM
 134 void
 135 LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest )
 136   {
 137     PRUint32 old_dest_length = aDest.Length();
 138     if (!SetLengthForWritingC(aDest, old_dest_length + aSource.Length()))
 139         return;
 140
 141     nsAString::const_iterator fromBegin, fromEnd;
 142
 143     nsACString::iterator dest;
 144     aDest.BeginWriting(dest);
 145
 146     dest.advance(old_dest_length);
 147
 148       // right now, this won't work on multi-fragment destinations
 149     LossyConvertEncoding<PRUnichar, char> converter(dest.get());
 150
 151     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
 152   }
 153
 154 NS_COM
 155 void
 156 AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest )
 157   {
 158     PRUint32 old_dest_length = aDest.Length();
 159     if (!SetLengthForWriting(aDest, old_dest_length + aSource.Length()))
 160         return;
 161
 162     nsACString::const_iterator fromBegin, fromEnd;
 163
 164     nsAString::iterator dest;
 165     aDest.BeginWriting(dest);
 166
 167     dest.advance(old_dest_length);
 168
 169       // right now, this won't work on multi-fragment destinations
 170     LossyConvertEncoding<char, PRUnichar> converter(dest.get());
 171
 172     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
 173   }
 174
 175 NS_COM
 176 void
 177 LossyAppendUTF16toASCII( const PRUnichar* aSource, nsACString& aDest )
 178   {
 179     if (aSource) {
 180       LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
 181     }
 182   }
 183
 184 NS_COM
 185 void
 186 AppendASCIItoUTF16( const char* aSource, nsAString& aDest )
 187   {
 188     if (aSource) {
 189       AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
 190     }
 191   }
 192
 193 NS_COM
 194 void
 195 AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest )
 196   {
 197     nsAString::const_iterator source_start, source_end;
 198     CalculateUTF8Size calculator;
 199     copy_string(aSource.BeginReading(source_start),
 200                 aSource.EndReading(source_end), calculator);
 201
 202     PRUint32 count = calculator.Size();
 203
 204     if (count)
 205       {
 206         PRUint32 old_dest_length = aDest.Length();
 207
 208         // Grow the buffer if we need to.
 209         if(!SetLengthForWritingC(aDest, old_dest_length + count))
 210             return;
 211
 212         nsACString::iterator dest;
 213         aDest.BeginWriting(dest);
 214
 215         dest.advance(old_dest_length);
 216
 217         if (count <= (PRUint32)dest.size_forward())
 218           {
 219             // aDest has enough room in the fragment just past the end
 220             // of its old data that it can hold what we're about to
 221             // append. Append using copy_string().
 222
 223             // All ready? Time to convert
 224
 225             ConvertUTF16toUTF8 converter(dest.get());
 226             copy_string(aSource.BeginReading(source_start),
 227                         aSource.EndReading(source_end), converter);
 228
 229             if (converter.Size() != count)
 230               {
 231                 NS_ERROR("Input invalid or incorrect length was calculated");
 232
 233                 aDest.SetLength(old_dest_length);
 234               }
 235           }
 236         else
 237           {
 238             // This isn't the fastest way to do this, but it gets
 239             // complicated to convert UTF16 into a fragmented UTF8
 240             // string, so we'll take the easy way out here in this
 241             // rare situation.
 242
 243             aDest.Replace(old_dest_length, count,
 244                           NS_ConvertUTF16toUTF8(aSource));
 245           }
 246       }
 247   }
 248
 249 NS_COM
 250 void
 251 AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest )
 252   {
 253     nsACString::const_iterator source_start, source_end;
 254     CalculateUTF8Length calculator;
 255     copy_string(aSource.BeginReading(source_start),
 256                 aSource.EndReading(source_end), calculator);
 257
 258     PRUint32 count = calculator.Length();
 259
 260     if (count)
 261       {
 262         PRUint32 old_dest_length = aDest.Length();
 263
 264         // Grow the buffer if we need to.
 265         if(!SetLengthForWriting(aDest, old_dest_length + count))
 266             return;
 267
 268         nsAString::iterator dest;
 269         aDest.BeginWriting(dest);
 270
 271         dest.advance(old_dest_length);
 272
 273         if (count <= (PRUint32)dest.size_forward())
 274           {
 275             // aDest has enough room in the fragment just past the end
 276             // of its old data that it can hold what we're about to
 277             // append. Append using copy_string().
 278
 279             // All ready? Time to convert
 280
 281             ConvertUTF8toUTF16 converter(dest.get());
 282             copy_string(aSource.BeginReading(source_start),
 283                         aSource.EndReading(source_end), converter);
 284
 285             if (converter.Length() != count)
 286               {
 287                 NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
 288                 aDest.SetLength(old_dest_length);
 289               }
 290           }
 291         else
 292           {
 293             // This isn't the fastest way to do this, but it gets
 294             // complicated to convert parts of a UTF8 string into a
 295             // UTF16 string, so we'll take the easy way out here in
 296             // this rare situation.
 297
 298             aDest.Replace(old_dest_length, count,
 299                           NS_ConvertUTF8toUTF16(aSource));
 300           }
 301       }
 302   }
 303
 304 NS_COM
 305 void
 306 AppendUTF16toUTF8( const PRUnichar* aSource, nsACString& aDest )
 307   {
 308     if (aSource) {
 309       AppendUTF16toUTF8(nsDependentString(aSource), aDest);
 310     }
 311   }
 312
 313 NS_COM
 314 void
 315 AppendUTF8toUTF16( const char* aSource, nsAString& aDest )
 316   {
 317     if (aSource) {
 318       AppendUTF8toUTF16(nsDependentCString(aSource), aDest);
 319     }
 320   }
 321
 322
 323   /**
 324    * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).
 325    *
 326    * @param aSource an string you will eventually be making a copy of
 327    * @return a new buffer (of the type specified by the second parameter) which you must free with |nsMemory::Free|.
 328    *
 329    */
 330 template <class FromStringT, class ToCharT>
 331 inline
 332 ToCharT*
 333 AllocateStringCopy( const FromStringT& aSource, ToCharT* )
 334   {
 335     return static_cast<ToCharT*>(nsMemory::Alloc((aSource.Length()+1) * sizeof(ToCharT)));
 336   }
 337
 338
 339 NS_COM
 340 char*
 341 ToNewCString( const nsAString& aSource )
 342   {
 343     char* result = AllocateStringCopy(aSource, (char*)0);
 344     if (!result)
 345       return nsnull;
 346
 347     nsAString::const_iterator fromBegin, fromEnd;
 348     LossyConvertEncoding<PRUnichar, char> converter(result);
 349     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
 350     return result;
 351   }
 352
 353 NS_COM
 354 char*
 355 ToNewUTF8String( const nsAString& aSource, PRUint32 *aUTF8Count )
 356   {
 357     nsAString::const_iterator start, end;
 358     CalculateUTF8Size calculator;
 359     copy_string(aSource.BeginReading(start), aSource.EndReading(end),
 360                 calculator);
 361
 362     if (aUTF8Count)
 363       *aUTF8Count = calculator.Size();
 364
 365     char *result = static_cast<char*>
 366                               (nsMemory::Alloc(calculator.Size() + 1));
 367     if (!result)
 368       return nsnull;
 369
 370     ConvertUTF16toUTF8 converter(result);
 371     copy_string(aSource.BeginReading(start), aSource.EndReading(end),
 372                 converter).write_terminator();
 373     NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
 374
 375     return result;
 376   }
 377
 378 NS_COM
 379 char*
 380 ToNewCString( const nsACString& aSource )
 381   {
 382     // no conversion needed, just allocate a buffer of the correct length and copy into it
 383
 384     char* result = AllocateStringCopy(aSource, (char*)0);
 385     if (!result)
 386       return nsnull;
 387
 388     nsACString::const_iterator fromBegin, fromEnd;
 389     char* toBegin = result;
 390     *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char(0);
 391     return result;
 392   }
 393
 394 NS_COM
 395 PRUnichar*
 396 ToNewUnicode( const nsAString& aSource )
 397   {
 398     // no conversion needed, just allocate a buffer of the correct length and copy into it
 399
 400     PRUnichar* result = AllocateStringCopy(aSource, (PRUnichar*)0);
 401     if (!result)
 402       return nsnull;
 403
 404     nsAString::const_iterator fromBegin, fromEnd;
 405     PRUnichar* toBegin = result;
 406     *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = PRUnichar(0);
 407     return result;
 408   }
 409
 410 NS_COM
 411 PRUnichar*
 412 ToNewUnicode( const nsACString& aSource )
 413   {
 414     PRUnichar* result = AllocateStringCopy(aSource, (PRUnichar*)0);
 415     if (!result)
 416       return nsnull;
 417
 418     nsACString::const_iterator fromBegin, fromEnd;
 419     LossyConvertEncoding<char, PRUnichar> converter(result);
 420     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
 421     return result;
 422   }
 423
 424 NS_COM
 425 PRUnichar*
 426 UTF8ToNewUnicode( const nsACString& aSource, PRUint32 *aUTF16Count )
 427   {
 428     nsACString::const_iterator start, end;
 429     CalculateUTF8Length calculator;
 430     copy_string(aSource.BeginReading(start), aSource.EndReading(end),
 431                 calculator);
 432
 433     if (aUTF16Count)
 434       *aUTF16Count = calculator.Length();
 435
 436     PRUnichar *result = static_cast<PRUnichar*>
 437                                    (nsMemory::Alloc(sizeof(PRUnichar) * (calculator.Length() + 1)));
 438     if (!result)
 439       return nsnull;
 440
 441     ConvertUTF8toUTF16 converter(result);
 442     copy_string(aSource.BeginReading(start), aSource.EndReading(end),
 443                 converter).write_terminator();
 444     NS_ASSERTION(calculator.Length() == converter.Length(), "length mismatch");
 445
 446     return result;
 447   }
 448
 449 NS_COM
 450 PRUnichar*
 451 CopyUnicodeTo( const nsAString& aSource, PRUint32 aSrcOffset, PRUnichar* aDest, PRUint32 aLength )
 452   {
 453     nsAString::const_iterator fromBegin, fromEnd;
 454     PRUnichar* toBegin = aDest;
 455     copy_string(aSource.BeginReading(fromBegin).advance( PRInt32(aSrcOffset) ), aSource.BeginReading(fromEnd).advance( PRInt32(aSrcOffset+aLength) ), toBegin);
 456     return aDest;
 457   }
 458
 459 NS_COM
 460 void
 461 CopyUnicodeTo( const nsAString::const_iterator& aSrcStart,
 462                const nsAString::const_iterator& aSrcEnd,
 463                nsAString& aDest )
 464   {
 465     nsAString::iterator writer;
 466     if (!SetLengthForWriting(aDest, Distance(aSrcStart, aSrcEnd)))
 467         return;
 468
 469     aDest.BeginWriting(writer);
 470     nsAString::const_iterator fromBegin(aSrcStart);
 471
 472     copy_string(fromBegin, aSrcEnd, writer);
 473   }
 474
 475 NS_COM
 476 void
 477 AppendUnicodeTo( const nsAString::const_iterator& aSrcStart,
 478                  const nsAString::const_iterator& aSrcEnd,
 479                  nsAString& aDest )
 480   {
 481     nsAString::iterator writer;
 482     PRUint32 oldLength = aDest.Length();
 483     if(!SetLengthForWriting(aDest, oldLength + Distance(aSrcStart, aSrcEnd)))
 484         return;
 485
 486     aDest.BeginWriting(writer).advance(oldLength);
 487     nsAString::const_iterator fromBegin(aSrcStart);
 488
 489     copy_string(fromBegin, aSrcEnd, writer);
 490   }
 491
 492 NS_COM
 493 PRBool
 494 IsASCII( const nsAString& aString )
 495   {
 496     static const PRUnichar NOT_ASCII = PRUnichar(~0x007F);
 497
 498
 499     // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
 500
 501     nsAString::const_iterator iter, done_reading;
 502     aString.BeginReading(iter);
 503     aString.EndReading(done_reading);
 504
 505     const PRUnichar* c = iter.get();
 506     const PRUnichar* end = done_reading.get();
 507
 508     while ( c < end )
 509       {
 510         if ( *c++ & NOT_ASCII )
 511           return PR_FALSE;
 512       }
 513
 514     return PR_TRUE;
 515   }
 516
 517 NS_COM
 518 PRBool
 519 IsASCII( const nsACString& aString )
 520   {
 521     static const char NOT_ASCII = char(~0x7F);
 522
 523
 524     // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
 525
 526     nsACString::const_iterator iter, done_reading;
 527     aString.BeginReading(iter);
 528     aString.EndReading(done_reading);
 529
 530     const char* c = iter.get();
 531     const char* end = done_reading.get();
 532
 533     while ( c < end )
 534       {
 535         if ( *c++ & NOT_ASCII )
 536           return PR_FALSE;
 537       }
 538
 539     return PR_TRUE;
 540   }
 541
 542 NS_COM
 543 PRBool
 544 IsUTF8( const nsACString& aString )
 545   {
 546     nsReadingIterator<char> done_reading;
 547     aString.EndReading(done_reading);
 548
 549     PRInt32 state = 0;
 550     PRBool overlong = PR_FALSE;
 551     PRBool surrogate = PR_FALSE;
 552     PRBool nonchar = PR_FALSE;
 553     PRUint16 olupper = 0; // overlong byte upper bound.
 554     PRUint16 slower = 0;  // surrogate byte lower bound.
 555
 556     nsReadingIterator<char> iter;
 557     aString.BeginReading(iter);
 558
 559     const char* ptr = iter.get();
 560     const char* end = done_reading.get();
 561     while ( ptr < end )
 562       {
 563         PRUint8 c;
 564
 565         if (0 == state)
 566           {
 567             c = *ptr++;
 568
 569             if ( UTF8traits::isASCII(c) )
 570               continue;
 571
 572             if ( c <= 0xC1 ) // [80-BF] where not expected, [C0-C1] for overlong.
 573               return PR_FALSE;
 574             else if ( UTF8traits::is2byte(c) )
 575                 state = 1;
 576             else if ( UTF8traits::is3byte(c) )
 577               {
 578                 state = 2;
 579                 if ( c == 0xE0 ) // to exclude E0[80-9F][80-BF]
 580                   {
 581                     overlong = PR_TRUE;
 582                     olupper = 0x9F;
 583                   }
 584                 else if ( c == 0xED ) // ED[A0-BF][80-BF] : surrogate codepoint
 585                   {
 586                     surrogate = PR_TRUE;
 587                     slower = 0xA0;
 588                   }
 589                 else if ( c == 0xEF ) // EF BF [BE-BF] : non-character
 590                   nonchar = PR_TRUE;
 591               }
 592             else if ( c <= 0xF4 ) // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)
 593               {
 594                 state = 3;
 595                 nonchar = PR_TRUE;
 596                 if ( c == 0xF0 ) // to exclude F0[80-8F][80-BF]{2}
 597                   {
 598                     overlong = PR_TRUE;
 599                     olupper = 0x8F;
 600                   }
 601                 else if ( c == 0xF4 ) // to exclude F4[90-BF][80-BF]
 602                   {
 603                     // actually not surrogates but codepoints beyond 0x10FFFF
 604                     surrogate = PR_TRUE;
 605                     slower = 0x90;
 606                   }
 607               }
 608             else
 609               return PR_FALSE; // Not UTF-8 string
 610           }
 611
 612         while ( ptr < end && state )
 613           {
 614             c = *ptr++;
 615             --state;
 616
 617             // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
 618             if ( nonchar &&  ( !state &&  c < 0xBE ||
 619                   state == 1 && c != 0xBF  ||
 620                   state == 2 && 0x0F != (0x0F & c) ))
 621                 nonchar = PR_FALSE;
 622
 623             if ( !UTF8traits::isInSeq(c) || overlong && c <= olupper ||
 624                   surrogate && slower <= c || nonchar && !state )
 625               return PR_FALSE; // Not UTF-8 string
 626             overlong = surrogate = PR_FALSE;
 627           }
 628         }
 629     return !state; // state != 0 at the end indicates an invalid UTF-8 seq.
 630   }
 631
 632   /**
 633    * A character sink for in-place case conversion.
 634    */
 635 class ConvertToUpperCase
 636   {
 637     public:
 638       typedef char value_type;
 639
 640       PRUint32
 641       write( const char* aSource, PRUint32 aSourceLength )
 642         {
 643           char* cp = const_cast<char*>(aSource);
 644           const char* end = aSource + aSourceLength;
 645           while (cp != end) {
 646             char ch = *cp;
 647             if ((ch >= 'a') && (ch <= 'z'))
 648               *cp = ch - ('a' - 'A');
 649             ++cp;
 650           }
 651           return aSourceLength;
 652         }
 653   };
 654
 655 NS_COM
 656 void
 657 ToUpperCase( nsCSubstring& aCString )
 658   {
 659     ConvertToUpperCase converter;
 660     char* start;
 661     converter.write(aCString.BeginWriting(start), aCString.Length());
 662   }
 663
 664   /**
 665    * A character sink for copying with case conversion.
 666    */
 667 class CopyToUpperCase
 668   {
 669     public:
 670       typedef char value_type;
 671
 672       CopyToUpperCase( nsACString::iterator& aDestIter )
 673         : mIter(aDestIter)
 674         {
 675         }
 676
 677       PRUint32
 678       write( const char* aSource, PRUint32 aSourceLength )
 679         {
 680           PRUint32 len = PR_MIN(PRUint32(mIter.size_forward()), aSourceLength);
 681           char* cp = mIter.get();
 682           const char* end = aSource + len;
 683           while (aSource != end) {
 684             char ch = *aSource;
 685             if ((ch >= 'a') && (ch <= 'z'))
 686               *cp = ch - ('a' - 'A');
 687             else
 688               *cp = ch;
 689             ++aSource;
 690             ++cp;
 691           }
 692           mIter.advance(len);
 693           return len;
 694         }
 695
 696     protected:
 697       nsACString::iterator& mIter;
 698   };
 699
 700 NS_COM
 701 void
 702 ToUpperCase( const nsACString& aSource, nsACString& aDest )
 703   {
 704     nsACString::const_iterator fromBegin, fromEnd;
 705     nsACString::iterator toBegin;
 706     if (!SetLengthForWritingC(aDest, aSource.Length()))
 707         return;
 708
 709     CopyToUpperCase converter(aDest.BeginWriting(toBegin));
 710     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
 711   }
 712
 713   /**
 714    * A character sink for case conversion.
 715    */
 716 class ConvertToLowerCase
 717   {
 718     public:
 719       typedef char value_type;
 720
 721       PRUint32
 722       write( const char* aSource, PRUint32 aSourceLength )
 723         {
 724           char* cp = const_cast<char*>(aSource);
 725           const char* end = aSource + aSourceLength;
 726           while (cp != end) {
 727             char ch = *cp;
 728             if ((ch >= 'A') && (ch <= 'Z'))
 729               *cp = ch + ('a' - 'A');
 730             ++cp;
 731           }
 732           return aSourceLength;
 733         }
 734   };
 735
 736 NS_COM
 737 void
 738 ToLowerCase( nsCSubstring& aCString )
 739   {
 740     ConvertToLowerCase converter;
 741     char* start;
 742     converter.write(aCString.BeginWriting(start), aCString.Length());
 743   }
 744
 745   /**
 746    * A character sink for copying with case conversion.
 747    */
 748 class CopyToLowerCase
 749   {
 750     public:
 751       typedef char value_type;
 752
 753       CopyToLowerCase( nsACString::iterator& aDestIter )
 754         : mIter(aDestIter)
 755         {
 756         }
 757
 758       PRUint32
 759       write( const char* aSource, PRUint32 aSourceLength )
 760         {
 761           PRUint32 len = PR_MIN(PRUint32(mIter.size_forward()), aSourceLength);
 762           char* cp = mIter.get();
 763           const char* end = aSource + len;
 764           while (aSource != end) {
 765             char ch = *aSource;
 766             if ((ch >= 'A') && (ch <= 'Z'))
 767               *cp = ch + ('a' - 'A');
 768             else
 769               *cp = ch;
 770             ++aSource;
 771             ++cp;
 772           }
 773           mIter.advance(len);
 774           return len;
 775         }
 776
 777     protected:
 778       nsACString::iterator& mIter;
 779   };
 780
 781 NS_COM
 782 void
 783 ToLowerCase( const nsACString& aSource, nsACString& aDest )
 784   {
 785     nsACString::const_iterator fromBegin, fromEnd;
 786     nsACString::iterator toBegin;
 787     if (!SetLengthForWritingC(aDest, aSource.Length()))
 788         return;
 789
 790     CopyToLowerCase converter(aDest.BeginWriting(toBegin));
 791     copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
 792   }
 793
 794 template <class StringT, class IteratorT, class Comparator>
 795 PRBool
 796 FindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare )
 797   {
 798     PRBool found_it = PR_FALSE;
 799
 800       // only bother searching at all if we're given a non-empty range to search
 801     if ( aSearchStart != aSearchEnd )
 802       {
 803         IteratorT aPatternStart, aPatternEnd;
 804         aPattern.BeginReading(aPatternStart);
 805         aPattern.EndReading(aPatternEnd);
 806
 807           // outer loop keeps searching till we find it or run out of string to search
 808         while ( !found_it )
 809           {
 810               // fast inner loop (that's what it's called, not what it is) looks for a potential match
 811             while ( aSearchStart != aSearchEnd &&
 812                     compare(*aPatternStart, *aSearchStart) )
 813               ++aSearchStart;
 814
 815               // if we broke out of the `fast' loop because we're out of string ... we're done: no match
 816             if ( aSearchStart == aSearchEnd )
 817               break;
 818
 819               // otherwise, we're at a potential match, let's see if we really hit one
 820             IteratorT testPattern(aPatternStart);
 821             IteratorT testSearch(aSearchStart);
 822
 823               // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
 824             for(;;)
 825               {
 826                   // we already compared the first character in the outer loop,
 827                   //  so we'll advance before the next comparison
 828                 ++testPattern;
 829                 ++testSearch;
 830
 831                   // if we verified all the way to the end of the pattern, then we found it!
 832                 if ( testPattern == aPatternEnd )
 833                   {
 834                     found_it = PR_TRUE;
 835                     aSearchEnd = testSearch; // return the exact found range through the parameters
 836                     break;
 837                   }
 838
 839                   // if we got to end of the string we're searching before we hit the end of the
 840                   //  pattern, we'll never find what we're looking for
 841                 if ( testSearch == aSearchEnd )
 842                   {
 843                     aSearchStart = aSearchEnd;
 844                     break;
 845                   }
 846
 847                   // else if we mismatched ... it's time to advance to the next search position
 848                   //  and get back into the `fast' loop
 849                 if ( compare(*testPattern, *testSearch) )
 850                   {
 851                     ++aSearchStart;
 852                     break;
 853                   }
 854               }
 855           }
 856       }
 857
 858     return found_it;
 859   }
 860
 861   /**
 862    * This searches the entire string from right to left, and returns the first match found, if any.
 863    */
 864 template <class StringT, class IteratorT, class Comparator>
 865 PRBool
 866 RFindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare )
 867   {
 868     IteratorT patternStart, patternEnd, searchEnd = aSearchEnd;
 869     aPattern.BeginReading(patternStart);
 870     aPattern.EndReading(patternEnd);
 871
 872       // Point to the last character in the pattern
 873     --patternEnd;
 874       // outer loop keeps searching till we run out of string to search
 875     while ( aSearchStart != searchEnd )
 876       {
 877           // Point to the end position of the next possible match
 878         --searchEnd;
 879
 880           // Check last character, if a match, explore further from here
 881         if ( compare(*patternEnd, *searchEnd) == 0 )
 882           {
 883               // We're at a potential match, let's see if we really hit one
 884             IteratorT testPattern(patternEnd);
 885             IteratorT testSearch(searchEnd);
 886
 887               // inner loop verifies the potential match at the current position
 888             do
 889               {
 890                   // if we verified all the way to the end of the pattern, then we found it!
 891                 if ( testPattern == patternStart )
 892                   {
 893                     aSearchStart = testSearch;  // point to start of match
 894                     aSearchEnd = ++searchEnd;   // point to end of match
 895                     return PR_TRUE;
 896                   }
 897
 898                   // if we got to end of the string we're searching before we hit the end of the
 899                   //  pattern, we'll never find what we're looking for
 900                 if ( testSearch == aSearchStart )
 901                   {
 902                     aSearchStart = aSearchEnd;
 903                     return PR_FALSE;
 904                   }
 905
 906                   // test previous character for a match
 907                 --testPattern;
 908                 --testSearch;
 909               }
 910             while ( compare(*testPattern, *testSearch) == 0 );
 911           }
 912       }
 913
 914     aSearchStart = aSearchEnd;
 915     return PR_FALSE;
 916   }
 917
 918 NS_COM
 919 PRBool
 920 FindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator )
 921   {
 922     return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
 923   }
 924
 925 NS_COM
 926 PRBool
 927 FindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)
 928   {
 929     return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
 930   }
 931
 932 NS_COM
 933 PRBool
 934 CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd )
 935   {
 936     return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, nsCaseInsensitiveCStringComparator());
 937   }
 938
 939 NS_COM
 940 PRBool
 941 RFindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator)
 942   {
 943     return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
 944   }
 945
 946 NS_COM
 947 PRBool
 948 RFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)
 949   {
 950     return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
 951   }
 952
 953 NS_COM
 954 PRBool
 955 FindCharInReadable( PRUnichar aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd )
 956   {
 957     PRInt32 fragmentLength = aSearchEnd.get() - aSearchStart.get();
 958
 959     const PRUnichar* charFoundAt = nsCharTraits<PRUnichar>::find(aSearchStart.get(), fragmentLength, aChar);
 960     if ( charFoundAt ) {
 961       aSearchStart.advance( charFoundAt - aSearchStart.get() );
 962       return PR_TRUE;
 963     }
 964
 965     aSearchStart.advance(fragmentLength);
 966     return PR_FALSE;
 967   }
 968
 969 NS_COM
 970 PRBool
 971 FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd )
 972   {
 973     PRInt32 fragmentLength = aSearchEnd.get() - aSearchStart.get();
 974
 975     const char* charFoundAt = nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);
 976     if ( charFoundAt ) {
 977       aSearchStart.advance( charFoundAt - aSearchStart.get() );
 978       return PR_TRUE;
 979     }
 980
 981     aSearchStart.advance(fragmentLength);
 982     return PR_FALSE;
 983   }
 984
 985 NS_COM
 986 PRUint32
 987 CountCharInReadable( const nsAString& aStr,
 988                      PRUnichar aChar )
 989 {
 990   PRUint32 count = 0;
 991   nsAString::const_iterator begin, end;
 992
 993   aStr.BeginReading(begin);
 994   aStr.EndReading(end);
 995
 996   while (begin != end) {
 997     if (*begin == aChar) {
 998       ++count;
 999     }
1000     ++begin;
1001   }
1002
1003   return count;
1004 }
1005
1006 NS_COM
1007 PRUint32
1008 CountCharInReadable( const nsACString& aStr,
1009                      char aChar )
1010 {
1011   PRUint32 count = 0;
1012   nsACString::const_iterator begin, end;
1013
1014   aStr.BeginReading(begin);
1015   aStr.EndReading(end);
1016
1017   while (begin != end) {
1018     if (*begin == aChar) {
1019       ++count;
1020     }
1021     ++begin;
1022   }
1023
1024   return count;
1025 }
1026
1027 NS_COM PRBool
1028 StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring,
1029                   const nsStringComparator& aComparator )
1030   {
1031     nsAString::size_type src_len = aSource.Length(),
1032                          sub_len = aSubstring.Length();
1033     if (sub_len > src_len)
1034       return PR_FALSE;
1035     return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
1036   }
1037
1038 NS_COM PRBool
1039 StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring,
1040                   const nsCStringComparator& aComparator )
1041   {
1042     nsACString::size_type src_len = aSource.Length(),
1043                           sub_len = aSubstring.Length();
1044     if (sub_len > src_len)
1045       return PR_FALSE;
1046     return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
1047   }
1048
1049 NS_COM PRBool
1050 StringEndsWith( const nsAString& aSource, const nsAString& aSubstring,
1051                 const nsStringComparator& aComparator )
1052   {
1053     nsAString::size_type src_len = aSource.Length(),
1054                          sub_len = aSubstring.Length();
1055     if (sub_len > src_len)
1056       return PR_FALSE;
1057     return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
1058                                                                  aComparator);
1059   }
1060
1061 NS_COM PRBool
1062 StringEndsWith( const nsACString& aSource, const nsACString& aSubstring,
1063                 const nsCStringComparator& aComparator )
1064   {
1065     nsACString::size_type src_len = aSource.Length(),
1066                           sub_len = aSubstring.Length();
1067     if (sub_len > src_len)
1068       return PR_FALSE;
1069     return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
1070                                                                  aComparator);
1071   }
1072
1073
1074
1075 static const PRUnichar empty_buffer[1] = { '\0' };
1076
1077 NS_COM
1078 const nsAFlatString&
1079 EmptyString()
1080   {
1081     static const nsDependentString sEmpty(empty_buffer);
1082
1083     return sEmpty;
1084   }
1085
1086 NS_COM
1087 const nsAFlatCString&
1088 EmptyCString()
1089   {
1090     static const nsDependentCString sEmpty((const char *)empty_buffer);
1091
1092     return sEmpty;
1093   }
1094
1095 NS_COM PRInt32
1096 CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
1097                    const nsASingleFragmentString& aUTF16String)
1098   {
1099     static const PRUint32 NOT_ASCII = PRUint32(~0x7F);
1100
1101     const char *u8, *u8end;
1102     aUTF8String.BeginReading(u8);
1103     aUTF8String.EndReading(u8end);
1104
1105     const PRUnichar *u16, *u16end;
1106     aUTF16String.BeginReading(u16);
1107     aUTF16String.EndReading(u16end);
1108
1109     while (u8 != u8end && u16 != u16end)
1110       {
1111         // Cast away the signedness of *u8 to prevent signextension when
1112         // converting to PRUint32
1113         PRUint32 c8_32 = (PRUint8)*u8;
1114
1115         if (c8_32 & NOT_ASCII)
1116           {
1117             PRBool err;
1118             c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err);
1119             if (err)
1120               return PR_INT32_MIN;
1121
1122             PRUint32 c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end);
1123             // The above UTF16CharEnumerator::NextChar() calls can
1124             // fail, but if it does for anything other than no data to
1125             // look at (which can't happen here), it returns the
1126             // Unicode replacement character 0xFFFD for the invalid
1127             // data they were fed. Ignore that error and treat invalid
1128             // UTF16 as 0xFFFD.
1129             //
1130             // This matches what our UTF16 to UTF8 conversion code
1131             // does, and thus a UTF8 string that came from an invalid
1132             // UTF16 string will compare equal to the invalid UTF16
1133             // string it came from. Same is true for any other UTF16
1134             // string differs only in the invalid part of the string.
1135
1136             if (c8_32 != c16_32)
1137               return c8_32 < c16_32 ? -1 : 1;
1138           }
1139         else
1140           {
1141             if (c8_32 != *u16)
1142               return c8_32 > *u16 ? 1 : -1;
1143
1144             ++u8;
1145             ++u16;
1146           }
1147       }
1148
1149     if (u8 != u8end)
1150       {
1151         // We get to the end of the UTF16 string, but no to the end of
1152         // the UTF8 string. The UTF8 string is longer than the UTF16
1153         // string
1154
1155         return 1;
1156       }
1157
1158     if (u16 != u16end)
1159       {
1160         // We get to the end of the UTF8 string, but no to the end of
1161         // the UTF16 string. The UTF16 string is longer than the UTF8
1162         // string
1163
1164         return -1;
1165       }
1166
1167     // The two strings match.
1168
1169     return 0;
1170   }
1171
1172 NS_COM
1173 void
1174 AppendUCS4ToUTF16(const PRUint32 aSource, nsAString& aDest)
1175   {
1176     NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
1177     if (IS_IN_BMP(aSource))
1178       {
1179         aDest.Append(PRUnichar(aSource));
1180       }
1181     else
1182       {
1183         aDest.Append(H_SURROGATE(aSource));
1184         aDest.Append(L_SURROGATE(aSource));
1185       }
1186   }