fixed: auto_ptr -> unique_ptr
[opensg.git] / Source / External / google / sparsehash / densehashtable.h
bloba00584ca6815671ddf3cdc772bbceb2e1b240cc0
1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 // ---
31 // Author: Craig Silverstein
33 // A dense hashtable is a particular implementation of
34 // a hashtable: one that is meant to minimize memory allocation.
35 // It does this by using an array to store all the data. We
36 // steal a value from the key space to indicate "empty" array
37 // elements (ie indices where no item lives) and another to indicate
38 // "deleted" elements.
40 // (Note it is possible to change the value of the delete key
41 // on the fly; you can even remove it, though after that point
42 // the hashtable is insert_only until you set it again. The empty
43 // value however can't be changed.)
45 // This code is possibly a bit faster if the empty value is 0.
47 // To minimize allocation and pointer overhead, we use internal
48 // probing, in which the hashtable is a single table, and collisions
49 // are resolved by trying to insert again in another bucket. The
50 // most cache-efficient internal probing schemes are linear probing
51 // (which suffers, alas, from clumping) and quadratic probing, which
52 // is what we implement by default.
54 // Type requirements: T must be a Plain Old Data Type (POD Type), since
55 // we use placement new but never call a destructor. To ensure this,
56 // you must defined a __type_traits struct for T if it is not a basic
57 // type (see type_traits.h). Also, those imposed by the requirements
58 // of Random Access Container.
60 // You probably shouldn't use this code directly. Use
61 // <google/dense_hash_map> or <google/dense_hash_set> instead.
63 // You can change the following below:
64 // HT_OCCUPANCY_FLT -- how full before we double size
65 // HT_EMPTY_FLT -- how empty before we halve size
66 // HT_MIN_BUCKETS -- default smallest bucket size
68 // How to decide what values to use?
69 // HT_EMPTY_FLT's default of .4 * OCCUPANCY_FLT, is probably good.
70 // HT_MIN_BUCKETS is probably unnecessary since you can specify
71 // (indirectly) the starting number of buckets at construct-time.
72 // For HT_OCCUPANCY_FLT, you can use this chart to try to trade-off
73 // expected lookup time to the space taken up. By default, this
74 // code uses quadratic probing, though you can change it to linear
75 // via _JUMP below if you really want to.
77 // From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html
78 // NUMBER OF PROBES / LOOKUP Successful Unsuccessful
79 // Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L)
80 // Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2
82 // -- HT_OCCUPANCY_FLT -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99
83 // QUADRATIC COLLISION RES.
84 // probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11
85 // probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6
86 // LINEAR COLLISION RES.
87 // probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5
88 // probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0
90 #ifndef _DENSEHASHTABLE_H_
91 #define _DENSEHASHTABLE_H_
93 // The probing method
94 // Linear probing
95 // #define JUMP_(key, num_probes) ( 1 )
96 // Quadratic-ish probing
97 #define JUMP_(key, num_probes) ( num_probes )
100 // Hashtable class, used to implement the hashed associative containers
101 // hash_set and hash_map.
103 // !!! DR changed some <google/...> to "..." due to include path problems...
104 #include "google_config.h"
105 #include <cassert>
106 #include <algorithm> // For swap(), eg
108 _START_GOOGLE_NAMESPACE_
110 using STL_NAMESPACE::pair;
112 template <class Value, class Key, class HashFcn,
113 class ExtractKey, class EqualKey, class Alloc>
114 class dense_hashtable;
116 template <class V, class K, class HF, class ExK, class EqK, class A>
117 struct dense_hashtable_iterator;
119 template <class V, class K, class HF, class ExK, class EqK, class A>
120 struct dense_hashtable_const_iterator;
122 // We're just an array, but we need to skip over empty and deleted elements
123 template <class V, class K, class HF, class ExK, class EqK, class A>
124 struct dense_hashtable_iterator {
125 public:
126 typedef dense_hashtable<V,K,HF,ExK,EqK,A> table_type;
127 typedef dense_hashtable_iterator<V,K,HF,ExK,EqK,A> iterator;
128 typedef dense_hashtable_const_iterator<V,K,HF,ExK,EqK,A> const_iterator;
130 #ifdef UNDERSTANDS_ITERATOR_TAGS
131 typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
132 #endif
133 typedef V value_type;
134 typedef ptrdiff_t difference_type;
135 typedef size_t size_type;
136 typedef V& reference; // Value
137 typedef V* pointer;
139 // "Real" constructor and default constructor
140 dense_hashtable_iterator(const table_type *h,
141 pointer it, pointer it_end, bool advance)
142 : ht(h), pos(it), end(it_end) {
143 if (advance) advance_past_empty_and_deleted();
145 dense_hashtable_iterator() { }
146 // The default destructor is fine; we don't define one
147 // The default operator= is fine; we don't define one
149 // Happy dereferencer
150 reference operator*() const { return *pos; }
151 pointer operator->() const { return &(operator*()); }
153 // Arithmetic. The only hard part is making sure that
154 // we're not on an empty or marked-deleted array element
155 void advance_past_empty_and_deleted() {
156 while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) )
157 ++pos;
159 iterator& operator++() {
160 assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this;
162 iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
164 // Comparison.
165 bool operator==(const iterator& it) const { return pos == it.pos; }
166 bool operator!=(const iterator& it) const { return pos != it.pos; }
169 // The actual data
170 const table_type *ht;
171 pointer pos, end;
175 // Now do it all again, but with const-ness!
176 template <class V, class K, class HF, class ExK, class EqK, class A>
177 struct dense_hashtable_const_iterator {
178 public:
179 typedef dense_hashtable<V,K,HF,ExK,EqK,A> table_type;
180 typedef dense_hashtable_iterator<V,K,HF,ExK,EqK,A> iterator;
181 typedef dense_hashtable_const_iterator<V,K,HF,ExK,EqK,A> const_iterator;
183 #ifdef UNDERSTANDS_ITERATOR_TAGS
184 typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
185 #endif
186 typedef V value_type;
187 typedef ptrdiff_t difference_type;
188 typedef size_t size_type;
189 typedef const V& reference; // Value
190 typedef const V* pointer;
192 // "Real" constructor and default constructor
193 dense_hashtable_const_iterator(const table_type *h,
194 pointer it, pointer it_end, bool advance)
195 : ht(h), pos(it), end(it_end) {
196 if (advance) advance_past_empty_and_deleted();
198 dense_hashtable_const_iterator() { }
199 // This lets us convert regular iterators to const iterators
200 dense_hashtable_const_iterator(const iterator &it)
201 : ht(it.ht), pos(it.pos), end(it.end) { }
202 // The default destructor is fine; we don't define one
203 // The default operator= is fine; we don't define one
205 // Happy dereferencer
206 reference operator*() const { return *pos; }
207 pointer operator->() const { return &(operator*()); }
209 // Arithmetic. The only hard part is making sure that
210 // we're not on an empty or marked-deleted array element
211 void advance_past_empty_and_deleted() {
212 while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) )
213 ++pos;
215 const_iterator& operator++() {
216 assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this;
218 const_iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
220 // Comparison.
221 bool operator==(const const_iterator& it) const { return pos == it.pos; }
222 bool operator!=(const const_iterator& it) const { return pos != it.pos; }
225 // The actual data
226 const table_type *ht;
227 pointer pos, end;
230 template <class Value, class Key, class HashFcn,
231 class ExtractKey, class EqualKey, class Alloc>
232 class dense_hashtable {
233 public:
234 typedef Key key_type;
235 typedef Value value_type;
236 typedef HashFcn hasher;
237 typedef EqualKey key_equal;
239 typedef size_t size_type;
240 typedef ptrdiff_t difference_type;
241 typedef value_type* pointer;
242 typedef const value_type* const_pointer;
243 typedef value_type& reference;
244 typedef const value_type& const_reference;
245 typedef dense_hashtable_iterator<Value, Key, HashFcn,
246 ExtractKey, EqualKey, Alloc>
247 iterator;
249 typedef dense_hashtable_const_iterator<Value, Key, HashFcn,
250 ExtractKey, EqualKey, Alloc>
251 const_iterator;
253 // How full we let the table get before we resize. Knuth says .8 is
254 // good -- higher causes us to probe too much, though saves memory
255 static const float HT_OCCUPANCY_FLT; // = 0.8;
257 // How empty we let the table get before we resize lower.
258 // It should be less than OCCUPANCY_FLT / 2 or we thrash resizing
259 static const float HT_EMPTY_FLT; // = 0.4 * HT_OCCUPANCY_FLT
261 // Minimum size we're willing to let hashtables be.
262 // Must be a power of two, and at least 4.
263 // Note, however, that for a given hashtable, the minimum size is
264 // determined by the first constructor arg, and may be >HT_MIN_BUCKETS.
265 static const size_t HT_MIN_BUCKETS = 32;
268 // ITERATOR FUNCTIONS
269 iterator begin() { return iterator(this, table,
270 table + num_buckets, true); }
271 iterator end() { return iterator(this, table + num_buckets,
272 table + num_buckets, true); }
273 const_iterator begin() const { return const_iterator(this, table,
274 table+num_buckets,true);}
275 const_iterator end() const { return const_iterator(this, table + num_buckets,
276 table+num_buckets,true);}
278 // ACCESSOR FUNCTIONS for the things we templatize on, basically
279 hasher hash_funct() const { return hash; }
280 key_equal key_eq() const { return equals; }
282 // Annoyingly, we can't copy values around, because they might have
283 // const components (they're probably pair<const X, Y>). We use
284 // placement new to get around this. Arg.
285 private:
286 void set_value(value_type* dst, const value_type src) {
287 new(dst) value_type(src);
290 void set_key(key_type* dst, const key_type src) {
291 new(dst) key_type(src); // used for set_deleted_key(), etc
294 // DELETE HELPER FUNCTIONS
295 // This lets the user describe a key that will indicate deleted
296 // table entries. This key should be an "impossible" entry --
297 // if you try to insert it for real, you won't be able to retrieve it!
298 // (NB: while you pass in an entire value, only the key part is looked
299 // at. This is just because I don't know how to assign just a key.)
300 private:
301 void squash_deleted() { // gets rid of any deleted entries we have
302 if ( num_deleted ) { // get rid of deleted before writing
303 dense_hashtable tmp(*this); // copying will get rid of deleted
304 swap(tmp); // now we are tmp
306 assert(num_deleted == 0);
309 public:
310 void set_deleted_key(const key_type &key) {
311 // It's only safe to change what "deleted" means if we purge deleted guys
312 squash_deleted();
313 use_deleted = true;
314 set_key(&delkey, key); // save the key
316 void clear_deleted_key() {
317 squash_deleted();
318 use_deleted = false;
321 // These are public so the iterators can use them
322 // True if the item at position bucknum is "deleted" marker
323 bool test_deleted(size_type bucknum) const {
324 // The num_deleted test is crucial for read(): after read(), the ht values
325 // are garbage, and we don't want to think some of them are deleted.
326 return (use_deleted && num_deleted > 0 &&
327 equals(delkey, get_key(table[bucknum])));
329 bool test_deleted(const iterator &it) const {
330 return (use_deleted && num_deleted > 0 &&
331 equals(delkey, get_key(*it)));
333 bool test_deleted(const const_iterator &it) const {
334 return (use_deleted && num_deleted > 0 &&
335 equals(delkey, get_key(*it)));
337 // Set it so test_deleted is true. true if object didn't used to be deleted
338 // See below (at erase()) to explain why we allow const_iterators
339 bool set_deleted(const_iterator &it) {
340 assert(use_deleted); // bad if set_deleted_key() wasn't called
341 bool retval = !test_deleted(it);
342 set_key(const_cast<key_type*>(&get_key(*it)), delkey);
343 return retval;
345 // Set it so test_deleted is false. true if object used to be deleted
346 bool clear_deleted(const_iterator &it) {
347 assert(use_deleted); // bad if set_deleted_key() wasn't called
348 // happens automatically when we assign something else in its place
349 return test_deleted(it);
352 // EMPTY HELPER FUNCTIONS
353 // This lets the user describe a key that will indicate empty (unused)
354 // table entries. This key should be an "impossible" entry --
355 // if you try to insert it for real, you won't be able to retrieve it!
356 // (NB: while you pass in an entire value, only the key part is looked
357 // at. This is just because I don't know how to assign just a key.)
358 public:
359 // These are public so the iterators can use them
360 // True if the item at position bucknum is "empty" marker
361 bool test_empty(size_type bucknum) const {
362 assert(use_empty); // we always need to know what's empty!
363 return equals(emptykey, get_key(table[bucknum]));
365 bool test_empty(const iterator &it) const {
366 assert(use_empty); // we always need to know what's empty!
367 return equals(emptykey, get_key(*it));
369 bool test_empty(const const_iterator &it) const {
370 assert(use_empty); // we always need to know what's empty!
371 return equals(emptykey, get_key(*it));
374 private:
375 // You can either set a range empty or an individual element
376 void set_empty(size_type bucknum) {
377 assert(use_empty);
378 set_key(const_cast<key_type*>(&get_key(table[bucknum])), emptykey);
380 void set_empty(size_type buckstart, size_type buckend) {
381 assert(use_empty);
382 if ( empty_is_zero ) // we can empty a lot of buckets at once
383 memset(table + buckstart, 0, (buckend-buckstart) * sizeof(*table));
384 else
385 for ( ; buckstart < buckend; buckstart++ )
386 set_empty(buckstart);
389 public:
390 void set_empty_key(const key_type &key) {
391 // Once you set the empty key, you can't change it
392 assert(!use_empty);
393 use_empty = true;
394 set_key(&emptykey, key);
395 empty_is_zero = true; // true if key is all 0's
396 for ( size_t i = 0; i < sizeof(emptykey); ++i )
397 if ( (reinterpret_cast<const char *>(&emptykey))[i] != 0 ) {
398 empty_is_zero = false; // won't be able to optimize :-(
399 break;
401 empty_is_zero = false; // empty heuristic seems to hurt
403 assert(!table); // must set before first use
404 // num_buckets was set in constructor even though table was NULL
405 table = static_cast<value_type *>(malloc(num_buckets * sizeof(*table)));
406 assert(table);
407 set_empty(0, num_buckets);
410 // FUNCTIONS CONCERNING SIZE
411 public:
412 size_type size() const { return num_elements - num_deleted; }
413 // Buckets are always a power of 2
414 size_type max_size() const { return (size_type(-1) >> 1U) + 1; }
415 bool empty() const { return size() == 0; }
416 size_type bucket_count() const { return num_buckets; }
417 size_type max_bucket_count() const { return max_size(); }
419 private:
420 // Because of the above, size_type(-1) is never legal; use it for errors
421 static const size_type ILLEGAL_BUCKET = size_type(-1);
423 private:
424 // This is the smallest size a hashtable can be without being too crowded
425 // If you like, you can give a min #buckets as well as a min #elts
426 size_type min_size(size_type num_elts, size_type min_buckets_wanted) {
427 size_type sz = HT_MIN_BUCKETS; // min buckets allowed
428 while ( sz < min_buckets_wanted || num_elts >= sz * HT_OCCUPANCY_FLT )
429 sz *= 2;
430 return sz;
433 // Used after a string of deletes
434 void maybe_shrink() {
435 assert(num_elements >= num_deleted);
436 assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
437 assert(bucket_count() >= HT_MIN_BUCKETS);
439 if ( (num_elements-num_deleted) <= shrink_threshold &&
440 bucket_count() > HT_MIN_BUCKETS ) {
441 size_type sz = bucket_count() / 2; // find how much we should shrink
442 while ( sz > HT_MIN_BUCKETS &&
443 (num_elements - num_deleted) <= sz * HT_EMPTY_FLT )
444 sz /= 2; // stay a power of 2
445 dense_hashtable tmp(*this, sz); // Do the actual resizing
446 swap(tmp); // now we are tmp
448 consider_shrink = false; // because we just considered it
451 // We'll let you resize a hashtable -- though this makes us copy all!
452 // When you resize, you say, "make it big enough for this many more elements"
453 void resize_delta(size_type delta, size_type min_buckets_wanted = 0) {
454 if ( consider_shrink ) // see if lots of deletes happened
455 maybe_shrink();
456 if ( bucket_count() > min_buckets_wanted &&
457 (num_elements + delta) <= enlarge_threshold )
458 return; // we're ok as we are
460 const size_type resize_to = min_size(num_elements + delta,
461 min_buckets_wanted);
462 if ( resize_to > bucket_count() ) { // we don't have enough buckets
463 dense_hashtable tmp(*this, resize_to);
464 swap(tmp); // now we are tmp
468 // Used to actually do the rehashing when we grow/shrink a hashtable
469 void copy_from(const dense_hashtable &ht, size_type min_buckets_wanted = 0) {
470 clear(); // clear table, set num_deleted to 0
472 // If we need to change the size of our table, do it now
473 const size_type resize_to = min_size(ht.size(), min_buckets_wanted);
474 if ( resize_to > bucket_count() ) { // we don't have enough buckets
475 table = static_cast<value_type *>(realloc(table, resize_to * sizeof(*table)));
476 assert(table != NULL);
477 set_empty(num_buckets, resize_to); // empty everything between them
478 num_buckets = resize_to;
479 reset_thresholds();
482 // We use a normal iterator to get non-deleted bcks from ht
483 // We could use insert() here, but since we know there are
484 // no duplicates and no deleted items, we can be more efficient
485 assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two
486 for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) {
487 size_type num_probes = 0; // how many times we've probed
488 size_type bucknum;
489 const size_type bucket_count_minus_one = bucket_count() - 1;
490 for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
491 !test_empty(bucknum); // not empty
492 bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
493 ++num_probes;
494 assert(num_probes < bucket_count()); // or else the hashtable is full
496 set_value(&table[bucknum], *it); // copies the value to here
497 num_elements++;
501 // Required by the spec for hashed associative container
502 public:
503 // Though the docs say this should be num_buckets, I think it's much
504 // more useful as req_elements. As a special feature, calling with
505 // req_elements==0 will cause us to shrink if we can, saving space.
506 void resize(size_type req_elements) { // resize to this or larger
507 if ( consider_shrink || req_elements == 0 )
508 maybe_shrink();
509 if ( req_elements > num_elements )
510 return resize_delta(req_elements - num_elements, 0);
514 // CONSTRUCTORS -- as required by the specs, we take a size,
515 // but also let you specify a hashfunction, key comparator,
516 // and key extractor. We also define a copy constructor and =.
517 // DESTRUCTOR -- needs to free the table
518 explicit dense_hashtable(size_type n = 0,
519 const HashFcn& hf = HashFcn(),
520 const EqualKey& eql = EqualKey(),
521 const ExtractKey& ext = ExtractKey())
522 : hash(hf), equals(eql), get_key(ext), num_deleted(0),
523 use_deleted(false), use_empty(false),
524 empty_is_zero(false), delkey(), emptykey(),
525 table(NULL), num_buckets(min_size(0, n)), num_elements(0) {
526 // table is NULL until emptykey is set. However, we set num_buckets
527 // here so we know how much space to allocate once emptykey is set
528 reset_thresholds();
531 // As a convenience for resize(), we allow an optional second argument
532 // which lets you make this new hashtable a different size than ht
533 dense_hashtable(const dense_hashtable& ht, size_type min_buckets_wanted = 0)
534 : hash(ht.hash), equals(ht.equals), get_key(ht.get_key), num_deleted(0),
535 use_deleted(ht.use_deleted), use_empty(ht.use_empty),
536 empty_is_zero(ht.empty_is_zero), delkey(ht.delkey), emptykey(ht.emptykey),
537 table(NULL), num_buckets(min_size(0, min_buckets_wanted)),
538 num_elements(0) {
539 reset_thresholds();
540 copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries
543 dense_hashtable& operator= (const dense_hashtable& ht) {
544 if (&ht == this) return *this; // don't copy onto ourselves
545 clear();
546 hash = ht.hash;
547 equals = ht.equals;
548 get_key = ht.get_key;
549 use_deleted = ht.use_deleted;
550 use_empty = ht.use_empty;
551 empty_is_zero = ht.empty_is_zero;
552 set_key(&delkey, ht.delkey);
553 set_key(&emptykey, ht.emptykey);
554 copy_from(ht); // sets num_deleted to 0 too
555 return *this;
558 ~dense_hashtable() {
559 if (table) free(table);
562 // Many STL algorithms use swap instead of copy constructors
563 void swap(dense_hashtable& ht) {
564 STL_NAMESPACE::swap(hash, ht.hash);
565 STL_NAMESPACE::swap(equals, ht.equals);
566 STL_NAMESPACE::swap(get_key, ht.get_key);
567 STL_NAMESPACE::swap(num_deleted, ht.num_deleted);
568 STL_NAMESPACE::swap(use_deleted, ht.use_deleted);
569 STL_NAMESPACE::swap(use_empty, ht.use_empty);
570 STL_NAMESPACE::swap(empty_is_zero, ht.empty_is_zero);
571 { key_type tmp; // for annoying reasons, swap() doesn't work
572 set_key(&tmp, delkey);
573 set_key(&delkey, ht.delkey);
574 set_key(&ht.delkey, tmp);
576 { key_type tmp; // for annoying reasons, swap() doesn't work
577 set_key(&tmp, emptykey);
578 set_key(&emptykey, ht.emptykey);
579 set_key(&ht.emptykey, tmp);
581 STL_NAMESPACE::swap(table, ht.table);
582 STL_NAMESPACE::swap(num_buckets, ht.num_buckets);
583 STL_NAMESPACE::swap(num_elements, ht.num_elements);
584 reset_thresholds();
585 ht.reset_thresholds();
588 // It's always nice to be able to clear a table without deallocating it
589 void clear() {
590 num_buckets = min_size(0,0); // our new size
591 reset_thresholds();
592 table = static_cast<value_type *>(realloc(table, num_buckets * sizeof(*table)));
593 assert(table);
594 set_empty(0, num_buckets);
595 num_elements = 0;
596 num_deleted = 0;
600 // LOOKUP ROUTINES
601 private:
602 // Returns a pair of positions: 1st where the object is, 2nd where
603 // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET
604 // if object is not found; 2nd is ILLEGAL_BUCKET if it is.
605 // Note: because of deletions where-to-insert is not trivial: it's the
606 // first deleted bucket we see, as long as we don't find the key later
607 pair<size_type, size_type> find_position(const key_type &key) const {
608 size_type num_probes = 0; // how many times we've probed
609 const size_type bucket_count_minus_one = bucket_count() - 1;
610 size_type bucknum = hash(key) & bucket_count_minus_one;
611 size_type insert_pos = ILLEGAL_BUCKET; // where we would insert
612 while ( 1 ) { // probe until something happens
613 if ( test_empty(bucknum) ) { // bucket is empty
614 if ( insert_pos == ILLEGAL_BUCKET ) // found no prior place to insert
615 return pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum);
616 else
617 return pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos);
619 } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert
620 if ( insert_pos == ILLEGAL_BUCKET )
621 insert_pos = bucknum;
623 } else if ( equals(key, get_key(table[bucknum])) ) {
624 return pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET);
626 ++num_probes; // we're doing another probe
627 bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
628 assert(num_probes < bucket_count()); // don't probe too many times!
632 public:
633 iterator find(const key_type& key) {
634 if ( size() == 0 ) return end();
635 pair<size_type, size_type> pos = find_position(key);
636 if ( pos.first == ILLEGAL_BUCKET ) // alas, not there
637 return end();
638 else
639 return iterator(this, table + pos.first, table + num_buckets, false);
642 const_iterator find(const key_type& key) const {
643 if ( size() == 0 ) return end();
644 pair<size_type, size_type> pos = find_position(key);
645 if ( pos.first == ILLEGAL_BUCKET ) // alas, not there
646 return end();
647 else
648 return const_iterator(this, table + pos.first, table+num_buckets, false);
651 // Counts how many elements have key key. For maps, it's either 0 or 1.
652 size_type count(const key_type &key) const {
653 pair<size_type, size_type> pos = find_position(key);
654 return pos.first == ILLEGAL_BUCKET ? 0 : 1;
657 // Likewise, equal_range doesn't really make sense for us. Oh well.
658 pair<iterator,iterator> equal_range(const key_type& key) {
659 const iterator pos = find(key); // either an iterator or end
660 return pair<iterator,iterator>(pos, pos);
662 pair<const_iterator,const_iterator> equal_range(const key_type& key) const {
663 const const_iterator pos = find(key); // either an iterator or end
664 return pair<iterator,iterator>(pos, pos);
668 // INSERTION ROUTINES
669 private:
670 // If you know *this is big enough to hold obj, use this routine
671 pair<iterator, bool> insert_noresize(const value_type& obj) {
672 const pair<size_type,size_type> pos = find_position(get_key(obj));
673 if ( pos.first != ILLEGAL_BUCKET) { // object was already there
674 return pair<iterator,bool>(iterator(this, table + pos.first,
675 table + num_buckets, false),
676 false); // false: we didn't insert
677 } else { // pos.second says where to put it
678 if ( test_deleted(pos.second) ) { // just replace if it's been del.
679 const_iterator delpos(this, table + pos.second, // shrug:
680 table + num_buckets, false);// shouldn't need const
681 clear_deleted(delpos);
682 assert( num_deleted > 0);
683 --num_deleted; // used to be, now it isn't
684 } else {
685 ++num_elements; // replacing an empty bucket
687 set_value(&table[pos.second], obj);
688 return pair<iterator,bool>(iterator(this, table + pos.second,
689 table + num_buckets, false),
690 true); // true: we did insert
694 public:
695 // This is the normal insert routine, used by the outside world
696 pair<iterator, bool> insert(const value_type& obj) {
697 resize_delta(1); // adding an object, grow if need be
698 return insert_noresize(obj);
701 // When inserting a lot at a time, we specialize on the type of iterator
702 template <class InputIterator>
703 void insert(InputIterator f, InputIterator l) {
704 // specializes on iterator type
705 insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category());
708 // Iterator supports operator-, resize before inserting
709 template <class ForwardIterator>
710 void insert(ForwardIterator f, ForwardIterator l,
711 STL_NAMESPACE::forward_iterator_tag) {
712 size_type n = STL_NAMESPACE::distance(f, l); // TODO(csilvers): standard?
713 resize_delta(n);
714 for ( ; n > 0; --n, ++f)
715 insert_noresize(*f);
718 // Arbitrary iterator, can't tell how much to resize
719 template <class InputIterator>
720 void insert(InputIterator f, InputIterator l,
721 STL_NAMESPACE::input_iterator_tag) {
722 for ( ; f != l; ++f)
723 insert(*f);
726 // DELETION ROUTINES
727 size_type erase(const key_type& key) {
728 const_iterator pos = find(key); // shrug: shouldn't need to be const
729 if ( pos != end() ) {
730 assert(!test_deleted(pos)); // or find() shouldn't have returned it
731 set_deleted(pos);
732 ++num_deleted;
733 consider_shrink = true; // will think about shrink after next insert
734 return 1; // because we deleted one thing
735 } else {
736 return 0; // because we deleted nothing
740 // This is really evil: really it should be iterator, not const_iterator.
741 // But...the only reason keys are const is to allow lookup.
742 // Since that's a moot issue for deleted keys, we allow const_iterators
743 void erase(const_iterator pos) {
744 if ( pos == end() ) return; // sanity check
745 if ( set_deleted(pos) ) { // true if object has been newly deleted
746 ++num_deleted;
747 consider_shrink = true; // will think about shrink after next insert
751 void erase(const_iterator f, const_iterator l) {
752 for ( ; f != l; ++f) {
753 if ( set_deleted(f) ) // should always be true
754 ++num_deleted;
756 consider_shrink = true; // will think about shrink after next insert
760 // COMPARISON
761 bool operator==(const dense_hashtable& ht) const {
762 // We really want to check that the hash functions are the same
763 // but alas there's no way to do this. We just hope.
764 return ( num_deleted == ht.num_deleted && table == ht.table );
766 bool operator!=(const dense_hashtable& ht) const {
767 return !(*this == ht);
771 // I/O
772 // We support reading and writing hashtables to disk. Alas, since
773 // I don't know how to write a hasher or key_equal, you have to make
774 // sure everything but the table is the same. We compact before writing
776 // NOTE: These functions are currently TODO. They've not been implemented.
777 bool write_metadata(FILE *fp) {
778 squash_deleted(); // so we don't have to worry about delkey
779 return false; // TODO
782 bool read_metadata(FILE *fp) {
783 num_deleted = 0; // since we got rid before writing
784 assert(use_empty); // have to set this before calling us
785 if (table) free(table); // we'll make our own
786 // TODO: read magic number
787 // TODO: read num_buckets
788 reset_thresholds();
789 table = static_cast<value_type *>(malloc(num_buckets * sizeof(*table)));
790 assert(table);
791 set_empty(0, num_buckets);
792 // TODO: read num_elements
793 for ( size_type i = 0; i < num_elements; ++i ) {
794 // TODO: read bucket_num
795 // TODO: set with non-empty, non-deleted value
797 return false; // TODO
800 // If your keys and values are simple enough, we can write them
801 // to disk for you. "simple enough" means no pointers.
802 // However, we don't try to normalize endianness
803 bool write_nopointer_data(FILE *fp) const {
804 for ( const_iterator it = begin(); it != end(); ++it ) {
805 // TODO: skip empty/deleted values
806 if ( !fwrite(&*it, sizeof(*it), 1, fp) ) return false;
808 return false; // TODO
811 // When reading, we have to override the potential const-ness of *it
812 bool read_nopointer_data(FILE *fp) {
813 for ( iterator it = begin(); it != end(); ++it ) {
814 // TODO: skip empty/deleted values
815 if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
816 return false;
818 return false; // TODO
821 private:
822 // We need to enforce that our value_type is a Plain Old Data Type
823 // (so we know realloc and memmove will work). We use traits to
824 // enforce this. The following gives a compile-time error if
825 // is_POD_type is false (which is the default for user types).
827 // IF YOU GET AN ERROR HERE, make sure your class is a POD type,
828 // and if so tell the compiler via code similar to this:
829 // template<> struct __type_traits<classname> {
830 // typedef __true_type has_trivial_default_constructor;
831 // typedef __true_type has_trivial_copy_constructor;
832 // typedef __true_type has_trivial_assignment_operator;
833 // typedef __true_type has_trivial_destructor;
834 // typedef __true_type is_POD_type;
835 // };
837 // If this is part of a hash_map, you need to make sure both the
838 // Key and Value types are POD types, if they're user-defined.
839 #ifdef UNDERSTANDS_TYPE_TRAITS
840 static __true_type * const enforce_pod;
841 #endif
843 // The actual data
844 hasher hash; // required by hashed_associative_container
845 key_equal equals;
846 ExtractKey get_key;
847 size_type num_deleted; // how many occupied buckets are marked deleted
848 bool use_deleted; // false until delkey has been set
849 bool use_empty; // you must do this before you start
850 bool empty_is_zero; // can optimize this case when emptying
851 key_type delkey; // which key marks deleted entries
852 key_type emptykey; // which key marks unused entries
853 value_type *table;
854 size_type num_buckets;
855 size_type num_elements;
856 size_type shrink_threshold; // num_buckets * HT_EMPTY_FLT
857 size_type enlarge_threshold; // num_buckets * HT_OCCUPANCY_FLT
858 bool consider_shrink; // true if we should try to shrink before next insert
860 void reset_thresholds() {
861 enlarge_threshold = static_cast<size_type>(num_buckets*HT_OCCUPANCY_FLT);
862 shrink_threshold = static_cast<size_type>(num_buckets*HT_EMPTY_FLT);
863 consider_shrink = false; // whatever caused us to reset already considered
867 // We need a global swap as well
868 template <class V, class K, class HF, class ExK, class EqK, class A>
869 inline void swap(dense_hashtable<V,K,HF,ExK,EqK,A> &x,
870 dense_hashtable<V,K,HF,ExK,EqK,A> &y) {
871 x.swap(y);
874 #ifdef UNDERSTANDS_TYPE_TRAITS
875 template <class V, class K, class HF, class ExK, class EqK, class A>
876 __true_type * const dense_hashtable<V,K,HF,ExK,EqK,A>::enforce_pod =
877 static_cast<typename __type_traits<value_type>::is_POD_type *>(0);
878 #endif
880 #undef JUMP_
882 template <class V, class K, class HF, class ExK, class EqK, class A>
883 const typename dense_hashtable<V,K,HF,ExK,EqK,A>::size_type
884 dense_hashtable<V,K,HF,ExK,EqK,A>::ILLEGAL_BUCKET;
886 // How full we let the table get before we resize. Knuth says .8 is
887 // good -- higher causes us to probe too much, though saves memory
888 template <class V, class K, class HF, class ExK, class EqK, class A>
889 const float dense_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT = 0.5f;
891 // How empty we let the table get before we resize lower.
892 // It should be less than OCCUPANCY_FLT / 2 or we thrash resizing
893 template <class V, class K, class HF, class ExK, class EqK, class A>
894 const float dense_hashtable<V,K,HF,ExK,EqK,A>::HT_EMPTY_FLT = 0.4 *
895 dense_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT;
897 _END_GOOGLE_NAMESPACE_
899 #endif /* _DENSEHASHTABLE_H_ */