Speech bubbles can point down right.
[scummvm-innocent.git] / common / hashmap.h
blobf5059a4bcf3627e18fce73721aa7a201c1e8fd59
1 /* ScummVM - Graphic Adventure Engine
3 * ScummVM is the legal property of its developers, whose names
4 * are too numerous to list here. Please refer to the COPYRIGHT
5 * file distributed with this source distribution.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * $URL$
22 * $Id$
26 // The hash map (associative array) implementation in this file is
27 // based on the PyDict implementation of CPython.
29 #ifndef COMMON_HASHMAP_H
30 #define COMMON_HASHMAP_H
32 #include "common/func.h"
33 #include "common/str.h"
34 #include "common/util.h"
36 #define USE_HASHMAP_MEMORY_POOL
37 #ifdef USE_HASHMAP_MEMORY_POOL
38 #include "common/memorypool.h"
39 #endif
41 namespace Common {
43 // Enable the following #define if you want to check how many collisions the
44 // code produces (many collisions indicate either a bad hash function, or a
45 // hash table that is too small).
46 //#define DEBUG_HASH_COLLISIONS
49 /**
50 * HashMap<Key,Val> maps objects of type Key to objects of type Val.
51 * For each used Key type, we need an "uint hashit(Key,uint)" function
52 * that computes a hash for the given Key object and returns it as an
53 * an integer from 0 to hashsize-1, and also an "equality functor".
54 * that returns true if if its two arguments are to be considered
55 * equal. Also, we assume that "=" works on Val objects for assignment.
57 * If aa is an HashMap<Key,Val>, then space is allocated each time aa[key] is
58 * referenced, for a new key. If the object is const, then an assertion is
59 * triggered instead. Hence if you are not sure whether a key is contained in
60 * the map, use contains() first to check for its presence.
62 template<class Key, class Val, class HashFunc = Hash<Key>, class EqualFunc = EqualTo<Key> >
63 class HashMap {
64 private:
65 #if defined (PALMOS_MODE)
66 public:
67 #endif
69 typedef HashMap<Key, Val, HashFunc, EqualFunc> HM_t;
71 struct Node {
72 const Key _key;
73 Val _value;
74 explicit Node(const Key &key) : _key(key), _value() {}
75 Node() : _key(), _value() {}
78 enum {
79 HASHMAP_PERTURB_SHIFT = 5,
80 HASHMAP_MIN_CAPACITY = 16,
82 // The quotient of the next two constants controls how much the
83 // internal storage of the hashmap may fill up before being
84 // increased automatically.
85 // Note: the quotient of these two must be between and different
86 // from 0 and 1.
87 HASHMAP_LOADFACTOR_NUMERATOR = 2,
88 HASHMAP_LOADFACTOR_DENOMINATOR = 3,
90 HASHMAP_MEMORYPOOL_SIZE = HASHMAP_MIN_CAPACITY * HASHMAP_LOADFACTOR_NUMERATOR / HASHMAP_LOADFACTOR_DENOMINATOR
94 ObjectPool<Node, HASHMAP_MEMORYPOOL_SIZE> _nodePool;
96 Node *allocNode(const Key &key) {
97 return new (_nodePool) Node(key);
100 void freeNode(Node *node) {
101 if (node && node != &_dummyNode)
102 _nodePool.deleteChunk(node);
105 Node **_storage; // hashtable of size arrsize.
106 uint _mask; /**< Capacity of the HashMap minus one; must be a power of two of minus one */
107 uint _size;
108 uint _deleted; ///< Number of deleted elements (_dummyNodes)
110 HashFunc _hash;
111 EqualFunc _equal;
113 // Default value, returned by the const getVal.
114 const Val _defaultVal;
116 // Dummy node, used as marker for erased objects.
117 Node _dummyNode;
119 #ifdef DEBUG_HASH_COLLISIONS
120 mutable int _collisions, _lookups, _dummyHits;
121 #endif
123 void assign(const HM_t &map);
124 int lookup(const Key &key) const;
125 int lookupAndCreateIfMissing(const Key &key);
126 void expandStorage(uint newCapacity);
128 template<class T> friend class IteratorImpl;
131 * Simple HashMap iterator implementation.
133 template<class NodeType>
134 class IteratorImpl {
135 friend class HashMap;
136 template<class T> friend class IteratorImpl;
137 protected:
138 typedef const HashMap hashmap_t;
140 uint _idx;
141 hashmap_t *_hashmap;
143 protected:
144 IteratorImpl(uint idx, hashmap_t *hashmap) : _idx(idx), _hashmap(hashmap) {}
146 NodeType *deref() const {
147 assert(_hashmap != 0);
148 assert(_idx <= _hashmap->_mask);
149 Node *node = _hashmap->_storage[_idx];
150 assert(node != 0);
151 return node;
154 public:
155 IteratorImpl() : _idx(0), _hashmap(0) {}
156 template<class T>
157 IteratorImpl(const IteratorImpl<T> &c) : _idx(c._idx), _hashmap(c._hashmap) {}
159 NodeType &operator*() const { return *deref(); }
160 NodeType *operator->() const { return deref(); }
162 bool operator==(const IteratorImpl &iter) const { return _idx == iter._idx && _hashmap == iter._hashmap; }
163 bool operator!=(const IteratorImpl &iter) const { return !(*this == iter); }
165 IteratorImpl &operator++() {
166 assert(_hashmap);
167 do {
168 _idx++;
169 } while (_idx <= _hashmap->_mask && _hashmap->_storage[_idx] == 0);
170 if (_idx > _hashmap->_mask)
171 _idx = (uint)-1;
173 return *this;
176 IteratorImpl operator++(int) {
177 IteratorImpl old = *this;
178 operator ++();
179 return old;
183 public:
184 typedef IteratorImpl<Node> iterator;
185 typedef IteratorImpl<const Node> const_iterator;
187 HashMap();
188 HashMap(const HM_t &map);
189 ~HashMap();
191 HM_t &operator=(const HM_t &map) {
192 if (this == &map)
193 return *this;
195 // Remove the previous content and ...
196 clear();
197 delete[] _storage;
198 // ... copy the new stuff.
199 assign(map);
200 return *this;
203 bool contains(const Key &key) const;
205 Val &operator[](const Key &key);
206 const Val &operator[](const Key &key) const;
208 Val &getVal(const Key &key);
209 const Val &getVal(const Key &key) const;
210 void setVal(const Key &key, const Val &val);
212 void clear(bool shrinkArray = 0);
214 void erase(const Key &key);
216 uint size() const { return _size; }
218 iterator begin() {
219 // Find and return the first non-empty entry
220 for (uint ctr = 0; ctr <= _mask; ++ctr) {
221 if (_storage[ctr])
222 return iterator(ctr, this);
224 return end();
226 iterator end() {
227 return iterator((uint)-1, this);
230 const_iterator begin() const {
231 // Find and return the first non-empty entry
232 for (uint ctr = 0; ctr <= _mask; ++ctr) {
233 if (_storage[ctr])
234 return const_iterator(ctr, this);
236 return end();
238 const_iterator end() const {
239 return const_iterator((uint)-1, this);
242 iterator find(const Key &key) {
243 uint ctr = lookup(key);
244 if (_storage[ctr])
245 return iterator(ctr, this);
246 return end();
249 const_iterator find(const Key &key) const {
250 uint ctr = lookup(key);
251 if (_storage[ctr])
252 return const_iterator(ctr, this);
253 return end();
256 // TODO: insert() method?
258 bool empty() const {
259 return (_size == 0);
263 //-------------------------------------------------------
264 // HashMap functions
267 * Base constructor, creates an empty hashmap.
269 template<class Key, class Val, class HashFunc, class EqualFunc>
270 HashMap<Key, Val, HashFunc, EqualFunc>::HashMap()
272 // We have to skip _defaultVal() on PS2 to avoid gcc 3.2.2 ICE
274 #ifdef __PLAYSTATION2__
276 #else
277 : _defaultVal(), _dummyNode() {
278 #endif
279 _mask = HASHMAP_MIN_CAPACITY - 1;
280 _storage = new Node *[HASHMAP_MIN_CAPACITY];
281 assert(_storage != NULL);
282 memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
284 _size = 0;
285 _deleted = 0;
287 #ifdef DEBUG_HASH_COLLISIONS
288 _collisions = 0;
289 _lookups = 0;
290 _dummyHits = 0;
291 #endif
295 * Copy constructor, creates a full copy of the given hashmap.
296 * We must provide a custom copy constructor as we use pointers
297 * to heap buffers for the internal storage.
299 template<class Key, class Val, class HashFunc, class EqualFunc>
300 HashMap<Key, Val, HashFunc, EqualFunc>::HashMap(const HM_t &map) :
301 _defaultVal(), _dummyNode() {
302 #ifdef DEBUG_HASH_COLLISIONS
303 _collisions = 0;
304 _lookups = 0;
305 _dummyHits = 0;
306 #endif
307 assign(map);
311 * Destructor, frees all used memory.
313 template<class Key, class Val, class HashFunc, class EqualFunc>
314 HashMap<Key, Val, HashFunc, EqualFunc>::~HashMap() {
315 for (uint ctr = 0; ctr <= _mask; ++ctr)
316 freeNode(_storage[ctr]);
318 delete[] _storage;
319 #ifdef DEBUG_HASH_COLLISIONS
320 extern void updateHashCollisionStats(int, int, int, int, int);
321 updateHashCollisionStats(_collisions, _dummyHits, _lookups, _mask+1, _size);
322 #endif
326 * Internal method for assigning the content of another HashMap
327 * to this one.
329 * @note We do *not* deallocate the previous storage here -- the caller is
330 * responsible for doing that!
332 template<class Key, class Val, class HashFunc, class EqualFunc>
333 void HashMap<Key, Val, HashFunc, EqualFunc>::assign(const HM_t &map) {
334 _mask = map._mask;
335 _storage = new Node *[_mask+1];
336 assert(_storage != NULL);
337 memset(_storage, 0, (_mask+1) * sizeof(Node *));
339 // Simply clone the map given to us, one by one.
340 _size = 0;
341 _deleted = 0;
342 for (uint ctr = 0; ctr <= _mask; ++ctr) {
343 if (map._storage[ctr] == &map._dummyNode) {
344 _storage[ctr] = &_dummyNode;
345 _deleted++;
346 } else if (map._storage[ctr] != NULL) {
347 _storage[ctr] = allocNode(map._storage[ctr]->_key);
348 _storage[ctr]->_value = map._storage[ctr]->_value;
349 _size++;
352 // Perform a sanity check (to help track down hashmap corruption)
353 assert(_size == map._size);
354 assert(_deleted == map._deleted);
358 template<class Key, class Val, class HashFunc, class EqualFunc>
359 void HashMap<Key, Val, HashFunc, EqualFunc>::clear(bool shrinkArray) {
360 for (uint ctr = 0; ctr <= _mask; ++ctr) {
361 freeNode(_storage[ctr]);
362 _storage[ctr] = NULL;
365 #ifdef USE_HASHMAP_MEMORY_POOL
366 _nodePool.freeUnusedPages();
367 #endif
369 if (shrinkArray && _mask >= HASHMAP_MIN_CAPACITY) {
370 delete[] _storage;
372 _mask = HASHMAP_MIN_CAPACITY;
373 _storage = new Node *[HASHMAP_MIN_CAPACITY];
374 assert(_storage != NULL);
375 memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *));
378 _size = 0;
379 _deleted = 0;
382 template<class Key, class Val, class HashFunc, class EqualFunc>
383 void HashMap<Key, Val, HashFunc, EqualFunc>::expandStorage(uint newCapacity) {
384 assert(newCapacity > _mask+1);
386 #ifndef NDEBUG
387 const uint old_size = _size;
388 #endif
389 const uint old_mask = _mask;
390 Node **old_storage = _storage;
392 // allocate a new array
393 _size = 0;
394 _deleted = 0;
395 _mask = newCapacity - 1;
396 _storage = new Node *[newCapacity];
397 assert(_storage != NULL);
398 memset(_storage, 0, newCapacity * sizeof(Node *));
400 // rehash all the old elements
401 for (uint ctr = 0; ctr <= old_mask; ++ctr) {
402 if (old_storage[ctr] == NULL || old_storage[ctr] == &_dummyNode)
403 continue;
405 // Insert the element from the old table into the new table.
406 // Since we know that no key exists twice in the old table, we
407 // can do this slightly better than by calling lookup, since we
408 // don't have to call _equal().
409 const uint hash = _hash(old_storage[ctr]->_key);
410 uint idx = hash & _mask;
411 for (uint perturb = hash; _storage[idx] != NULL && _storage[idx] != &_dummyNode; perturb >>= HASHMAP_PERTURB_SHIFT) {
412 idx = (5 * idx + perturb + 1) & _mask;
415 _storage[idx] = old_storage[ctr];
416 _size++;
419 // Perform a sanity check: Old number of elements should match the new one!
420 // This check will fail if some previous operation corrupted this hashmap.
421 assert(_size == old_size);
423 delete[] old_storage;
425 return;
428 template<class Key, class Val, class HashFunc, class EqualFunc>
429 int HashMap<Key, Val, HashFunc, EqualFunc>::lookup(const Key &key) const {
430 const uint hash = _hash(key);
431 uint ctr = hash & _mask;
432 for (uint perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
433 if (_storage[ctr] == NULL)
434 break;
435 if (_storage[ctr] == &_dummyNode) {
436 #ifdef DEBUG_HASH_COLLISIONS
437 _dummyHits++;
438 #endif
439 } else if (_equal(_storage[ctr]->_key, key))
440 break;
442 ctr = (5 * ctr + perturb + 1) & _mask;
444 #ifdef DEBUG_HASH_COLLISIONS
445 _collisions++;
446 #endif
449 #ifdef DEBUG_HASH_COLLISIONS
450 _lookups++;
451 fprintf(stderr, "collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d\n",
452 _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups),
453 (const void *)this, _mask+1, _size);
454 #endif
456 return ctr;
459 template<class Key, class Val, class HashFunc, class EqualFunc>
460 int HashMap<Key, Val, HashFunc, EqualFunc>::lookupAndCreateIfMissing(const Key &key) {
461 const uint hash = _hash(key);
462 uint ctr = hash & _mask;
463 const uint NONE_FOUND = _mask + 1;
464 uint first_free = NONE_FOUND;
465 bool found = false;
466 for (uint perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) {
467 if (_storage[ctr] == NULL)
468 break;
469 if (_storage[ctr] == &_dummyNode) {
470 #ifdef DEBUG_HASH_COLLISIONS
471 _dummyHits++;
472 #endif
473 if (first_free != _mask + 1)
474 first_free = ctr;
475 } else if (_equal(_storage[ctr]->_key, key)) {
476 found = true;
477 break;
480 ctr = (5 * ctr + perturb + 1) & _mask;
482 #ifdef DEBUG_HASH_COLLISIONS
483 _collisions++;
484 #endif
487 #ifdef DEBUG_HASH_COLLISIONS
488 _lookups++;
489 fprintf(stderr, "collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d\n",
490 _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups),
491 (const void *)this, _mask+1, _size);
492 #endif
494 if (!found && first_free != _mask + 1)
495 ctr = first_free;
497 if (!found) {
498 if (_storage[ctr])
499 _deleted--;
500 _storage[ctr] = allocNode(key);
501 assert(_storage[ctr] != NULL);
502 _size++;
504 // Keep the load factor below a certain threshold.
505 // Deleted nodes are also counted
506 uint capacity = _mask + 1;
507 if ((_size + _deleted) * HASHMAP_LOADFACTOR_DENOMINATOR >
508 capacity * HASHMAP_LOADFACTOR_NUMERATOR) {
509 capacity = capacity < 500 ? (capacity * 4) : (capacity * 2);
510 expandStorage(capacity);
511 ctr = lookup(key);
512 assert(_storage[ctr] != NULL);
516 return ctr;
520 template<class Key, class Val, class HashFunc, class EqualFunc>
521 bool HashMap<Key, Val, HashFunc, EqualFunc>::contains(const Key &key) const {
522 uint ctr = lookup(key);
523 return (_storage[ctr] != NULL);
526 template<class Key, class Val, class HashFunc, class EqualFunc>
527 Val &HashMap<Key, Val, HashFunc, EqualFunc>::operator[](const Key &key) {
528 return getVal(key);
531 template<class Key, class Val, class HashFunc, class EqualFunc>
532 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::operator[](const Key &key) const {
533 return getVal(key);
536 template<class Key, class Val, class HashFunc, class EqualFunc>
537 Val &HashMap<Key, Val, HashFunc, EqualFunc>::getVal(const Key &key) {
538 uint ctr = lookupAndCreateIfMissing(key);
539 assert(_storage[ctr] != NULL);
540 return _storage[ctr]->_value;
543 template<class Key, class Val, class HashFunc, class EqualFunc>
544 const Val &HashMap<Key, Val, HashFunc, EqualFunc>::getVal(const Key &key) const {
545 uint ctr = lookup(key);
546 if (_storage[ctr] != NULL)
547 return _storage[ctr]->_value;
548 else
549 return _defaultVal;
552 template<class Key, class Val, class HashFunc, class EqualFunc>
553 void HashMap<Key, Val, HashFunc, EqualFunc>::setVal(const Key &key, const Val &val) {
554 uint ctr = lookupAndCreateIfMissing(key);
555 assert(_storage[ctr] != NULL);
556 _storage[ctr]->_value = val;
559 template<class Key, class Val, class HashFunc, class EqualFunc>
560 void HashMap<Key, Val, HashFunc, EqualFunc>::erase(const Key &key) {
562 uint ctr = lookup(key);
563 if (_storage[ctr] == NULL)
564 return;
566 // If we remove a key, we replace it with a dummy node.
567 freeNode(_storage[ctr]);
568 _storage[ctr] = &_dummyNode;
569 _size--;
570 _deleted++;
571 return;
574 } // End of namespace Common
576 #endif