xpcom/ds/nsCRT.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* ***** BEGIN LICENSE BLOCK *****
   3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   4  *
   5  * The contents of this file are subject to the Mozilla Public License Version
   6  * 1.1 (the "License"); you may not use this file except in compliance with
   7  * the License. You may obtain a copy of the License at
   8  * http://www.mozilla.org/MPL/
   9  *
  10  * Software distributed under the License is distributed on an "AS IS" basis,
  11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12  * for the specific language governing rights and limitations under the
  13  * License.
  14  *
  15  * The Original Code is mozilla.org code.
  16  *
  17  * The Initial Developer of the Original Code is
  18  * Netscape Communications Corporation.
  19  * Portions created by the Initial Developer are Copyright (C) 1998
  20  * the Initial Developer. All Rights Reserved.
  21  *
  22  * Contributor(s):
  23  *
  24  * Alternatively, the contents of this file may be used under the terms of
  25  * either of the GNU General Public License Version 2 or later (the "GPL"),
  26  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  27  * in which case the provisions of the GPL or the LGPL are applicable instead
  28  * of those above. If you wish to allow use of your version of this file only
  29  * under the terms of either the GPL or the LGPL, and not to allow others to
  30  * use your version of this file under the terms of the MPL, indicate your
  31  * decision by deleting the provisions above and replace them with the notice
  32  * and other provisions required by the GPL or the LGPL. If you do not delete
  33  * the provisions above, a recipient may use your version of this file under
  34  * the terms of any one of the MPL, the GPL or the LGPL.
  35  *
  36  * ***** END LICENSE BLOCK ***** */
  37
  38
  39 /**
  40  * MODULE NOTES:
  41  * @update  gess7/30/98
  42  *
  43  * Much as I hate to do it, we were using string compares wrong.
  44  * Often, programmers call functions like strcmp(s1,s2), and pass
  45  * one or more null strings. Rather than blow up on these, I've
  46  * added quick checks to ensure that cases like this don't cause
  47  * us to fail.
  48  *
  49  * In general, if you pass a null into any of these string compare
  50  * routines, we simply return 0.
  51  */
  52
  53
  54 #include "nsCRT.h"
  55 #include "nsIServiceManager.h"
  56 #include "nsCharTraits.h"
  57 #include "prbit.h"
  58
  59 #define ADD_TO_HASHVAL(hashval, c) \
  60     hashval = PR_ROTATE_LEFT32(hashval, 4) ^ (c);
  61
  62 //----------------------------------------------------------------------
  63
  64
  65 ////////////////////////////////////////////////////////////////////////////////
  66 // My lovely strtok routine
  67
  68 #define IS_DELIM(m, c)          ((m)[(c) >> 3] & (1 << ((c) & 7)))
  69 #define SET_DELIM(m, c)         ((m)[(c) >> 3] |= (1 << ((c) & 7)))
  70 #define DELIM_TABLE_SIZE        32
  71
  72 char* nsCRT::strtok(char* string, const char* delims, char* *newStr)
  73 {
  74   NS_ASSERTION(string, "Unlike regular strtok, the first argument cannot be null.");
  75
  76   char delimTable[DELIM_TABLE_SIZE];
  77   PRUint32 i;
  78   char* result;
  79   char* str = string;
  80
  81   for (i = 0; i < DELIM_TABLE_SIZE; i++)
  82     delimTable[i] = '\0';
  83
  84   for (i = 0; delims[i]; i++) {
  85     SET_DELIM(delimTable, static_cast<PRUint8>(delims[i]));
  86   }
  87   NS_ASSERTION(delims[i] == '\0', "too many delimiters");
  88
  89   // skip to beginning
  90   while (*str && IS_DELIM(delimTable, static_cast<PRUint8>(*str))) {
  91     str++;
  92   }
  93   result = str;
  94
  95   // fix up the end of the token
  96   while (*str) {
  97     if (IS_DELIM(delimTable, static_cast<PRUint8>(*str))) {
  98       *str++ = '\0';
  99       break;
 100     }
 101     str++;
 102   }
 103   *newStr = str;
 104
 105   return str == result ? NULL : result;
 106 }
 107
 108 ////////////////////////////////////////////////////////////////////////////////
 109
 110 /**
 111  * Compare unichar string ptrs, stopping at the 1st null
 112  * NOTE: If both are null, we return 0.
 113  * NOTE: We terminate the search upon encountering a NULL
 114  *
 115  * @update  gess 11/10/99
 116  * @param   s1 and s2 both point to unichar strings
 117  * @return  0 if they match, -1 if s1<s2; 1 if s1>s2
 118  */
 119 PRInt32 nsCRT::strcmp(const PRUnichar* s1, const PRUnichar* s2) {
 120   if(s1 && s2) {
 121     for (;;) {
 122       PRUnichar c1 = *s1++;
 123       PRUnichar c2 = *s2++;
 124       if (c1 != c2) {
 125         if (c1 < c2) return -1;
 126         return 1;
 127       }
 128       if ((0==c1) || (0==c2)) break;
 129     }
 130   }
 131   else {
 132     if (s1)                     // s2 must have been null
 133       return -1;
 134     if (s2)                     // s1 must have been null
 135       return 1;
 136   }
 137   return 0;
 138 }
 139
 140 /**
 141  * Compare unichar string ptrs, stopping at the 1st null or nth char.
 142  * NOTE: If either is null, we return 0.
 143  * NOTE: We DO NOT terminate the search upon encountering NULL's before N
 144  *
 145  * @update  gess 11/10/99
 146  * @param   s1 and s2 both point to unichar strings
 147  * @return  0 if they match, -1 if s1<s2; 1 if s1>s2
 148  */
 149 PRInt32 nsCRT::strncmp(const PRUnichar* s1, const PRUnichar* s2, PRUint32 n) {
 150   if(s1 && s2) {
 151     if(n != 0) {
 152       do {
 153         PRUnichar c1 = *s1++;
 154         PRUnichar c2 = *s2++;
 155         if (c1 != c2) {
 156           if (c1 < c2) return -1;
 157           return 1;
 158         }
 159       } while (--n != 0);
 160     }
 161   }
 162   return 0;
 163 }
 164
 165 PRUnichar* nsCRT::strdup(const PRUnichar* str)
 166 {
 167   PRUint32 len = nsCRT::strlen(str);
 168   return strndup(str, len);
 169 }
 170
 171 PRUnichar* nsCRT::strndup(const PRUnichar* str, PRUint32 len)
 172 {
 173         nsCppSharedAllocator<PRUnichar> shared_allocator;
 174         PRUnichar* rslt = shared_allocator.allocate(len + 1); // add one for the null
 175   // PRUnichar* rslt = new PRUnichar[len + 1];
 176
 177   if (rslt == NULL) return NULL;
 178   memcpy(rslt, str, len * sizeof(PRUnichar));
 179   rslt[len] = 0;
 180   return rslt;
 181 }
 182
 183   /**
 184    * |nsCRT::HashCode| is identical to |PL_HashString|, which tests
 185    *  (http://bugzilla.mozilla.org/showattachment.cgi?attach_id=26596)
 186    *  show to be the best hash among several other choices.
 187    *
 188    * We re-implement it here rather than calling it for two reasons:
 189    *  (1) in this interface, we also calculate the length of the
 190    *  string being hashed; and (2) the narrow and wide and `buffer' versions here
 191    *  will hash equivalent strings to the same value, e.g., "Hello" and L"Hello".
 192    */
 193 PRUint32 nsCRT::HashCode(const char* str, PRUint32* resultingStrLen)
 194 {
 195   PRUint32 h = 0;
 196   const char* s = str;
 197
 198   if (!str) return h;
 199
 200   unsigned char c;
 201   while ( (c = *s++) )
 202     ADD_TO_HASHVAL(h, c);
 203
 204   if ( resultingStrLen )
 205     *resultingStrLen = (s-str)-1;
 206   return h;
 207 }
 208
 209 PRUint32 nsCRT::HashCode(const char* start, PRUint32 length)
 210 {
 211   PRUint32 h = 0;
 212   const char* s = start;
 213   const char* end = start + length;
 214
 215   unsigned char c;
 216   while ( s < end ) {
 217     c = *s++;
 218     ADD_TO_HASHVAL(h, c);
 219   }
 220
 221   return h;
 222 }
 223
 224 PRUint32 nsCRT::HashCode(const PRUnichar* str, PRUint32* resultingStrLen)
 225 {
 226   PRUint32 h = 0;
 227   const PRUnichar* s = str;
 228
 229   if (!str) return h;
 230
 231   PRUnichar c;
 232   while ( (c = *s++) )
 233     ADD_TO_HASHVAL(h, c);
 234
 235   if ( resultingStrLen )
 236     *resultingStrLen = (s-str)-1;
 237   return h;
 238 }
 239
 240 PRUint32 nsCRT::HashCodeAsUTF8(const PRUnichar* start, PRUint32 length)
 241 {
 242   PRUint32 h = 0;
 243   const PRUnichar* s = start;
 244   const PRUnichar* end = start + length;
 245
 246   PRUint16 W1 = 0;      // the first UTF-16 word in a two word tuple
 247   PRUint32 U = 0;       // the current char as UCS-4
 248   int code_length = 0;  // the number of bytes in the UTF-8 sequence for the current char
 249
 250   PRUint16 W;
 251   while ( s < end )
 252     {
 253       W = *s++;
 254         /*
 255          * On the fly, decoding from UTF-16 (and/or UCS-2) into UTF-8 as per
 256          *  http://www.ietf.org/rfc/rfc2781.txt
 257          *  http://www.ietf.org/rfc/rfc3629.txt
 258          */
 259
 260       if ( !W1 )
 261         {
 262           if ( !IS_SURROGATE(W) )
 263             {
 264               U = W;
 265               if ( W <= 0x007F )
 266                 code_length = 1;
 267               else if ( W <= 0x07FF )
 268                 code_length = 2;
 269               else
 270                 code_length = 3;
 271             }
 272           else if ( NS_IS_HIGH_SURROGATE(W) && s < end)
 273             {
 274               W1 = W;
 275
 276               continue;
 277             }
 278           else
 279             {
 280               // Treat broken characters as the Unicode replacement
 281               // character 0xFFFD
 282               U = 0xFFFD;
 283
 284               code_length = 3;
 285
 286               NS_WARNING("Got low surrogate but no previous high surrogate");
 287             }
 288         }
 289       else
 290         {
 291           // as required by the standard, this code is careful to
 292           // throw out illegal sequences
 293
 294           if ( NS_IS_LOW_SURROGATE(W) )
 295             {
 296               U = SURROGATE_TO_UCS4(W1, W);
 297               NS_ASSERTION(IS_VALID_CHAR(U), "How did this happen?");
 298               code_length = 4;
 299             }
 300           else
 301             {
 302               // Treat broken characters as the Unicode replacement
 303               // character 0xFFFD
 304               U = 0xFFFD;
 305
 306               code_length = 3;
 307
 308               NS_WARNING("High surrogate not followed by low surrogate");
 309
 310               // The pointer to the next character points to the second 16-bit
 311               // value, not beyond it, as per Unicode 5.0.0 Chapter 3 C10, only
 312               // the first code unit of an illegal sequence must be treated as
 313               // an illegally terminated code unit sequence (also Chapter 3
 314               // D91, "isolated [not paired and ill-formed] UTF-16 code units
 315               // in the range D800..DFFF are ill-formed").
 316               --s;
 317             }
 318
 319           W1 = 0;
 320         }
 321
 322
 323       static const PRUint16 sBytePrefix[5]  = { 0x0000, 0x0000, 0x00C0, 0x00E0, 0x00F0  };
 324       static const PRUint16 sShift[5]       = { 0, 0, 6, 12, 18 };
 325
 326       /*
 327        *  Unlike the algorithm in
 328        *  http://www.ietf.org/rfc/rfc3629.txt we must calculate the
 329        *  bytes in left to right order so that our hash result
 330        *  matches what the narrow version would calculate on an
 331        *  already UTF-8 string.
 332        */
 333
 334       // hash the first (and often, only, byte)
 335       ADD_TO_HASHVAL(h, (sBytePrefix[code_length] | (U>>sShift[code_length])));
 336
 337       // an unrolled loop for hashing any remaining bytes in this
 338       // sequence
 339       switch ( code_length )
 340         {  // falling through in each case
 341           case 4:   ADD_TO_HASHVAL(h, (0x80 | ((U>>12) & 0x003F)));
 342           case 3:   ADD_TO_HASHVAL(h, (0x80 | ((U>>6 ) & 0x003F)));
 343           case 2:   ADD_TO_HASHVAL(h, (0x80 | ( U      & 0x003F)));
 344           default:  code_length = 0;
 345             break;
 346         }
 347     }
 348
 349   return h;
 350 }
 351
 352 PRUint32 nsCRT::BufferHashCode(const PRUnichar* s, PRUint32 len)
 353 {
 354   PRUint32 h = 0;
 355   const PRUnichar* done = s + len;
 356
 357   while ( s < done )
 358     h = PR_ROTATE_LEFT32(h, 4) ^ PRUint16(*s++); // cast to unsigned to prevent possible sign extension
 359   return h;
 360 }
 361
 362 // This should use NSPR but NSPR isn't exporting its PR_strtoll function
 363 // Until then...
 364 PRInt64 nsCRT::atoll(const char *str)
 365 {
 366     if (!str)
 367         return LL_Zero();
 368
 369     PRInt64 ll = LL_Zero(), digitll = LL_Zero();
 370
 371     while (*str && *str >= '0' && *str <= '9') {
 372         LL_MUL(ll, ll, 10);
 373         LL_UI2L(digitll, (*str - '0'));
 374         LL_ADD(ll, ll, digitll);
 375         str++;
 376     }
 377
 378     return ll;
 379 }
 380