fix tricky regression noticed by Vyacheslav Tokarev on Google Reader.
[kdelibs.git] / khtml / misc / StringHash.h
blob16b07ba169567ee82345ea63d83c342962729a0b
1 /*
2 * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public License
15 * along with this library; see the file COPYING.LIB. If not, write to
16 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
21 #ifndef StringHash_h
22 #define StringHash_h
24 #include "AtomicStringImpl.h"
25 #include "dom/dom_string.h"
26 #include "xml/dom_stringimpl.h"
27 #include <wtf/HashTraits.h>
29 using DOM::DOMString;
30 using DOM::DOMStringImpl;
32 namespace khtml {
34 // FIXME: We should really figure out a way to put the computeHash function that's
35 // currently a member function of DOMStringImpl into this file so we can be a little
36 // closer to having all the nearly-identical hash functions in one place.
38 struct StringHash {
39 static unsigned hash(DOMStringImpl* key) { return key->hash(); }
40 static bool equal(DOMStringImpl* a, DOMStringImpl* b)
42 if (a == b)
43 return true;
44 if (!a || !b)
45 return false;
47 unsigned aLength = a->length();
48 unsigned bLength = b->length();
49 if (aLength != bLength)
50 return false;
52 const uint32_t* aChars = reinterpret_cast<const uint32_t*>(a->unicode());
53 const uint32_t* bChars = reinterpret_cast<const uint32_t*>(b->unicode());
55 unsigned halfLength = aLength >> 1;
56 for (unsigned i = 0; i != halfLength; ++i)
57 if (*aChars++ != *bChars++)
58 return false;
60 if (aLength & 1 && *reinterpret_cast<const uint16_t*>(aChars) != *reinterpret_cast<const uint16_t*>(bChars))
61 return false;
63 return true;
66 static unsigned hash(const RefPtr<DOMStringImpl>& key) { return key->hash(); }
67 static bool equal(const RefPtr<DOMStringImpl>& a, const RefPtr<DOMStringImpl>& b)
69 return equal(a.get(), b.get());
72 static unsigned hash(const DOMString& key) { return key.implementation()->hash(); }
73 static bool equal(const DOMString& a, const DOMString& b)
75 return equal(a.implementation(), b.implementation());
78 static const bool safeToCompareToEmptyOrDeleted = false;
81 class CaseFoldingHash {
82 private:
83 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
84 static const unsigned PHI = 0x9e3779b9U;
85 public:
86 // Paul Hsieh's SuperFastHash
87 // http://www.azillionmonkeys.com/qed/hash.html
88 static unsigned hash(const QChar* data, unsigned length)
90 unsigned l = length;
91 const QChar* s = data;
92 uint32_t hash = PHI;
93 uint32_t tmp;
95 int rem = l & 1;
96 l >>= 1;
98 // Main loop.
99 for (; l > 0; l--) {
100 hash += s[0].toCaseFolded().unicode();
101 tmp = (s[1].toCaseFolded().unicode() << 11) ^ hash;
102 hash = (hash << 16) ^ tmp;
103 s += 2;
104 hash += hash >> 11;
107 // Handle end case.
108 if (rem) {
109 hash += s[0].toCaseFolded().unicode();
110 hash ^= hash << 11;
111 hash += hash >> 17;
114 // Force "avalanching" of final 127 bits.
115 hash ^= hash << 3;
116 hash += hash >> 5;
117 hash ^= hash << 2;
118 hash += hash >> 15;
119 hash ^= hash << 10;
121 // This avoids ever returning a hash code of 0, since that is used to
122 // signal "hash not computed yet", using a value that is likely to be
123 // effectively the same as 0 when the low bits are masked.
124 hash |= !hash << 31;
126 return hash;
129 static unsigned hash(DOMStringImpl* str)
131 return hash(str->unicode(), str->length());
134 static unsigned hash(const char* str, unsigned length)
136 // This hash is designed to work on 16-bit chunks at a time. But since the normal case
137 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
138 // were 16-bit chunks, which will give matching results.
140 unsigned l = length;
141 const char* s = str;
142 uint32_t hash = PHI;
143 uint32_t tmp;
145 int rem = l & 1;
146 l >>= 1;
148 // Main loop
149 for (; l > 0; l--) {
150 hash += QChar::toCaseFolded((unsigned int)s[0]);
151 tmp = (QChar::toCaseFolded((unsigned int)s[1]) << 11) ^ hash;
152 hash = (hash << 16) ^ tmp;
153 s += 2;
154 hash += hash >> 11;
157 // Handle end case
158 if (rem) {
159 hash += QChar::toCaseFolded((unsigned int)s[0]);
160 hash ^= hash << 11;
161 hash += hash >> 17;
164 // Force "avalanching" of final 127 bits
165 hash ^= hash << 3;
166 hash += hash >> 5;
167 hash ^= hash << 2;
168 hash += hash >> 15;
169 hash ^= hash << 10;
171 // this avoids ever returning a hash code of 0, since that is used to
172 // signal "hash not computed yet", using a value that is likely to be
173 // effectively the same as 0 when the low bits are masked
174 hash |= !hash << 31;
176 return hash;
179 static bool equal(DOMStringImpl* a, DOMStringImpl* b)
181 if (a == b)
182 return true;
183 if (!a || !b)
184 return false;
185 unsigned length = a->length();
186 if (length != b->length())
187 return false;
188 for (unsigned i = 0; i < length; ++i)
189 if (a->unicode()[i].toCaseFolded() != b->unicode()[i].toCaseFolded()) return false;
190 return true;
193 static unsigned hash(const RefPtr<DOMStringImpl>& key)
195 return hash(key.get());
198 static bool equal(const RefPtr<DOMStringImpl>& a, const RefPtr<DOMStringImpl>& b)
200 return equal(a.get(), b.get());
203 static unsigned hash(const DOMString& key)
205 return hash(key.implementation());
207 static bool equal(const DOMString& a, const DOMString& b)
209 return equal(a.implementation(), b.implementation());
212 static const bool safeToCompareToEmptyOrDeleted = false;
215 // This hash can be used in cases where the key is a hash of a string, but we don't
216 // want to store the string. It's not really specific to string hashing, but all our
217 // current uses of it are for strings.
218 struct AlreadyHashed : IntHash<unsigned> {
219 static unsigned hash(unsigned key) { return key; }
221 // To use a hash value as a key for a hash table, we need to eliminate the
222 // "deleted" value, which is negative one. That could be done by changing
223 // the string hash function to never generate negative one, but this works
224 // and is still relatively efficient.
225 static unsigned avoidDeletedValue(unsigned hash)
227 ASSERT(hash);
228 unsigned newHash = hash | (!(hash + 1) << 31);
229 ASSERT(newHash);
230 ASSERT(newHash != 0xFFFFFFFF);
231 return newHash;
237 namespace WTF {
239 /*template<> struct HashTraits<DOM::DOMString> : GenericHashTraits<DOM::DOMString> {
240 static const bool emptyValueIsZero = true;
241 static void constructDeletedValue(DOM::DOMString& slot) { new (&slot) DOM::DOMString(HashTableDeletedValue); }
242 static bool isDeletedValue(const DOM::DOMString& slot) { return slot.isHashTableDeletedValue(); }
243 };*/
247 #endif