khtml/misc/StringHash.h

   1 /*
   2  * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Library General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Library General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Library General Public License
  15  * along with this library; see the file COPYING.LIB.  If not, write to
  16  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  17  * Boston, MA 02110-1301, USA.
  18  *
  19  */
  20
  21 #ifndef StringHash_h
  22 #define StringHash_h
  23
  24 #include "AtomicStringImpl.h"
  25 #include "dom/dom_string.h"
  26 #include "xml/dom_stringimpl.h"
  27 #include <wtf/HashTraits.h>
  28
  29 using DOM::DOMString;
  30 using DOM::DOMStringImpl;
  31
  32 namespace khtml {
  33
  34     // FIXME: We should really figure out a way to put the computeHash function that's
  35     // currently a member function of DOMStringImpl into this file so we can be a little
  36     // closer to having all the nearly-identical hash functions in one place.
  37
  38     struct StringHash {
  39         static unsigned hash(DOMStringImpl* key) { return key->hash(); }
  40         static bool equal(DOMStringImpl* a, DOMStringImpl* b)
  41         {
  42             if (a == b)
  43                 return true;
  44             if (!a || !b)
  45                 return false;
  46
  47             unsigned aLength = a->length();
  48             unsigned bLength = b->length();
  49             if (aLength != bLength)
  50                 return false;
  51
  52             const uint32_t* aChars = reinterpret_cast<const uint32_t*>(a->unicode());
  53             const uint32_t* bChars = reinterpret_cast<const uint32_t*>(b->unicode());
  54
  55             unsigned halfLength = aLength >> 1;
  56             for (unsigned i = 0; i != halfLength; ++i)
  57                 if (*aChars++ != *bChars++)
  58                     return false;
  59
  60             if (aLength & 1 && *reinterpret_cast<const uint16_t*>(aChars) != *reinterpret_cast<const uint16_t*>(bChars))
  61                 return false;
  62
  63             return true;
  64         }
  65
  66         static unsigned hash(const RefPtr<DOMStringImpl>& key) { return key->hash(); }
  67         static bool equal(const RefPtr<DOMStringImpl>& a, const RefPtr<DOMStringImpl>& b)
  68         {
  69             return equal(a.get(), b.get());
  70         }
  71
  72         static unsigned hash(const DOMString& key) { return key.implementation()->hash(); }
  73         static bool equal(const DOMString& a, const DOMString& b)
  74         {
  75             return equal(a.implementation(), b.implementation());
  76         }
  77
  78         static const bool safeToCompareToEmptyOrDeleted = false;
  79     };
  80
  81     class CaseFoldingHash {
  82     private:
  83         // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
  84         static const unsigned PHI = 0x9e3779b9U;
  85     public:
  86         // Paul Hsieh's SuperFastHash
  87         // http://www.azillionmonkeys.com/qed/hash.html
  88         static unsigned hash(const QChar* data, unsigned length)
  89         {
  90             unsigned l = length;
  91             const QChar* s = data;
  92             uint32_t hash = PHI;
  93             uint32_t tmp;
  94
  95             int rem = l & 1;
  96             l >>= 1;
  97
  98             // Main loop.
  99             for (; l > 0; l--) {
 100                 hash += s[0].toCaseFolded().unicode();
 101                 tmp = (s[1].toCaseFolded().unicode() << 11) ^ hash;
 102                 hash = (hash << 16) ^ tmp;
 103                 s += 2;
 104                 hash += hash >> 11;
 105             }
 106
 107             // Handle end case.
 108             if (rem) {
 109                 hash += s[0].toCaseFolded().unicode();
 110                 hash ^= hash << 11;
 111                 hash += hash >> 17;
 112             }
 113
 114             // Force "avalanching" of final 127 bits.
 115             hash ^= hash << 3;
 116             hash += hash >> 5;
 117             hash ^= hash << 2;
 118             hash += hash >> 15;
 119             hash ^= hash << 10;
 120
 121             // This avoids ever returning a hash code of 0, since that is used to
 122             // signal "hash not computed yet", using a value that is likely to be
 123             // effectively the same as 0 when the low bits are masked.
 124             hash |= !hash << 31;
 125
 126             return hash;
 127         }
 128
 129         static unsigned hash(DOMStringImpl* str)
 130         {
 131             return hash(str->unicode(), str->length());
 132         }
 133
 134         static unsigned hash(const char* str, unsigned length)
 135         {
 136             // This hash is designed to work on 16-bit chunks at a time. But since the normal case
 137             // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
 138             // were 16-bit chunks, which will give matching results.
 139
 140             unsigned l = length;
 141             const char* s = str;
 142             uint32_t hash = PHI;
 143             uint32_t tmp;
 144
 145             int rem = l & 1;
 146             l >>= 1;
 147
 148             // Main loop
 149             for (; l > 0; l--) {
 150                 hash += QChar::toCaseFolded((unsigned int)s[0]);
 151                 tmp = (QChar::toCaseFolded((unsigned int)s[1]) << 11) ^ hash;
 152                 hash = (hash << 16) ^ tmp;
 153                 s += 2;
 154                 hash += hash >> 11;
 155             }
 156
 157             // Handle end case
 158             if (rem) {
 159                 hash += QChar::toCaseFolded((unsigned int)s[0]);
 160                 hash ^= hash << 11;
 161                 hash += hash >> 17;
 162             }
 163
 164             // Force "avalanching" of final 127 bits
 165             hash ^= hash << 3;
 166             hash += hash >> 5;
 167             hash ^= hash << 2;
 168             hash += hash >> 15;
 169             hash ^= hash << 10;
 170
 171             // this avoids ever returning a hash code of 0, since that is used to
 172             // signal "hash not computed yet", using a value that is likely to be
 173             // effectively the same as 0 when the low bits are masked
 174             hash |= !hash << 31;
 175
 176             return hash;
 177         }
 178
 179         static bool equal(DOMStringImpl* a, DOMStringImpl* b)
 180         {
 181             if (a == b)
 182                 return true;
 183             if (!a || !b)
 184                 return false;
 185             unsigned length = a->length();
 186             if (length != b->length())
 187                 return false;
 188             for (unsigned i = 0; i < length; ++i)
 189                 if (a->unicode()[i].toCaseFolded() != b->unicode()[i].toCaseFolded()) return false;
 190             return true;
 191         }
 192
 193         static unsigned hash(const RefPtr<DOMStringImpl>& key)
 194         {
 195             return hash(key.get());
 196         }
 197
 198         static bool equal(const RefPtr<DOMStringImpl>& a, const RefPtr<DOMStringImpl>& b)
 199         {
 200             return equal(a.get(), b.get());
 201         }
 202
 203         static unsigned hash(const DOMString& key)
 204         {
 205             return hash(key.implementation());
 206         }
 207         static bool equal(const DOMString& a, const DOMString& b)
 208         {
 209             return equal(a.implementation(), b.implementation());
 210         }
 211
 212         static const bool safeToCompareToEmptyOrDeleted = false;
 213     };
 214
 215     // This hash can be used in cases where the key is a hash of a string, but we don't
 216     // want to store the string. It's not really specific to string hashing, but all our
 217     // current uses of it are for strings.
 218     struct AlreadyHashed : IntHash<unsigned> {
 219         static unsigned hash(unsigned key) { return key; }
 220
 221         // To use a hash value as a key for a hash table, we need to eliminate the
 222         // "deleted" value, which is negative one. That could be done by changing
 223         // the string hash function to never generate negative one, but this works
 224         // and is still relatively efficient.
 225         static unsigned avoidDeletedValue(unsigned hash)
 226         {
 227             ASSERT(hash);
 228             unsigned newHash = hash | (!(hash + 1) << 31);
 229             ASSERT(newHash);
 230             ASSERT(newHash != 0xFFFFFFFF);
 231             return newHash;
 232         }
 233     };
 234
 235 }
 236
 237 namespace WTF {
 238
 239     /*template<> struct HashTraits<DOM::DOMString> : GenericHashTraits<DOM::DOMString> {
 240         static const bool emptyValueIsZero = true;
 241         static void constructDeletedValue(DOM::DOMString& slot) { new (&slot) DOM::DOMString(HashTableDeletedValue); }
 242         static bool isDeletedValue(const DOM::DOMString& slot) { return slot.isHashTableDeletedValue(); }
 243     };*/
 244
 245 }
 246
 247 #endif
 248