1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 // Author: Craig Silverstein
33 // A dense hashtable is a particular implementation of
34 // a hashtable: one that is meant to minimize memory allocation.
35 // It does this by using an array to store all the data. We
36 // steal a value from the key space to indicate "empty" array
37 // elements (ie indices where no item lives) and another to indicate
38 // "deleted" elements.
40 // (Note it is possible to change the value of the delete key
41 // on the fly; you can even remove it, though after that point
42 // the hashtable is insert_only until you set it again. The empty
43 // value however can't be changed.)
45 // This code is possibly a bit faster if the empty value is 0.
47 // To minimize allocation and pointer overhead, we use internal
48 // probing, in which the hashtable is a single table, and collisions
49 // are resolved by trying to insert again in another bucket. The
50 // most cache-efficient internal probing schemes are linear probing
51 // (which suffers, alas, from clumping) and quadratic probing, which
52 // is what we implement by default.
54 // Type requirements: T must be a Plain Old Data Type (POD Type), since
55 // we use placement new but never call a destructor. To ensure this,
56 // you must defined a __type_traits struct for T if it is not a basic
57 // type (see type_traits.h). Also, those imposed by the requirements
58 // of Random Access Container.
60 // You probably shouldn't use this code directly. Use
61 // <google/dense_hash_map> or <google/dense_hash_set> instead.
63 // You can change the following below:
64 // HT_OCCUPANCY_FLT -- how full before we double size
65 // HT_EMPTY_FLT -- how empty before we halve size
66 // HT_MIN_BUCKETS -- default smallest bucket size
68 // How to decide what values to use?
69 // HT_EMPTY_FLT's default of .4 * OCCUPANCY_FLT, is probably good.
70 // HT_MIN_BUCKETS is probably unnecessary since you can specify
71 // (indirectly) the starting number of buckets at construct-time.
72 // For HT_OCCUPANCY_FLT, you can use this chart to try to trade-off
73 // expected lookup time to the space taken up. By default, this
74 // code uses quadratic probing, though you can change it to linear
75 // via _JUMP below if you really want to.
77 // From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html
78 // NUMBER OF PROBES / LOOKUP Successful Unsuccessful
79 // Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L)
80 // Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2
82 // -- HT_OCCUPANCY_FLT -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99
83 // QUADRATIC COLLISION RES.
84 // probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11
85 // probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6
86 // LINEAR COLLISION RES.
87 // probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5
88 // probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0
90 #ifndef _DENSEHASHTABLE_H_
91 #define _DENSEHASHTABLE_H_
95 // #define JUMP_(key, num_probes) ( 1 )
96 // Quadratic-ish probing
97 #define JUMP_(key, num_probes) ( num_probes )
100 // Hashtable class, used to implement the hashed associative containers
101 // hash_set and hash_map.
103 // !!! DR changed some <google/...> to "..." due to include path problems...
104 #include "google_config.h"
106 #include <algorithm> // For swap(), eg
108 _START_GOOGLE_NAMESPACE_
110 using STL_NAMESPACE::pair
;
112 template <class Value
, class Key
, class HashFcn
,
113 class ExtractKey
, class EqualKey
, class Alloc
>
114 class dense_hashtable
;
116 template <class V
, class K
, class HF
, class ExK
, class EqK
, class A
>
117 struct dense_hashtable_iterator
;
119 template <class V
, class K
, class HF
, class ExK
, class EqK
, class A
>
120 struct dense_hashtable_const_iterator
;
122 // We're just an array, but we need to skip over empty and deleted elements
123 template <class V
, class K
, class HF
, class ExK
, class EqK
, class A
>
124 struct dense_hashtable_iterator
{
126 typedef dense_hashtable
<V
,K
,HF
,ExK
,EqK
,A
> table_type
;
127 typedef dense_hashtable_iterator
<V
,K
,HF
,ExK
,EqK
,A
> iterator
;
128 typedef dense_hashtable_const_iterator
<V
,K
,HF
,ExK
,EqK
,A
> const_iterator
;
130 #ifdef UNDERSTANDS_ITERATOR_TAGS
131 typedef STL_NAMESPACE::forward_iterator_tag iterator_category
;
133 typedef V value_type
;
134 typedef ptrdiff_t difference_type
;
135 typedef size_t size_type
;
136 typedef V
& reference
; // Value
139 // "Real" constructor and default constructor
140 dense_hashtable_iterator(const table_type
*h
,
141 pointer it
, pointer it_end
, bool advance
)
142 : ht(h
), pos(it
), end(it_end
) {
143 if (advance
) advance_past_empty_and_deleted();
145 dense_hashtable_iterator() { }
146 // The default destructor is fine; we don't define one
147 // The default operator= is fine; we don't define one
149 // Happy dereferencer
150 reference
operator*() const { return *pos
; }
151 pointer
operator->() const { return &(operator*()); }
153 // Arithmetic. The only hard part is making sure that
154 // we're not on an empty or marked-deleted array element
155 void advance_past_empty_and_deleted() {
156 while ( pos
!= end
&& (ht
->test_empty(*this) || ht
->test_deleted(*this)) )
159 iterator
& operator++() {
160 assert(pos
!= end
); ++pos
; advance_past_empty_and_deleted(); return *this;
162 iterator
operator++(int) { iterator
tmp(*this); ++*this; return tmp
; }
165 bool operator==(const iterator
& it
) const { return pos
== it
.pos
; }
166 bool operator!=(const iterator
& it
) const { return pos
!= it
.pos
; }
170 const table_type
*ht
;
175 // Now do it all again, but with const-ness!
176 template <class V
, class K
, class HF
, class ExK
, class EqK
, class A
>
177 struct dense_hashtable_const_iterator
{
179 typedef dense_hashtable
<V
,K
,HF
,ExK
,EqK
,A
> table_type
;
180 typedef dense_hashtable_iterator
<V
,K
,HF
,ExK
,EqK
,A
> iterator
;
181 typedef dense_hashtable_const_iterator
<V
,K
,HF
,ExK
,EqK
,A
> const_iterator
;
183 #ifdef UNDERSTANDS_ITERATOR_TAGS
184 typedef STL_NAMESPACE::forward_iterator_tag iterator_category
;
186 typedef V value_type
;
187 typedef ptrdiff_t difference_type
;
188 typedef size_t size_type
;
189 typedef const V
& reference
; // Value
190 typedef const V
* pointer
;
192 // "Real" constructor and default constructor
193 dense_hashtable_const_iterator(const table_type
*h
,
194 pointer it
, pointer it_end
, bool advance
)
195 : ht(h
), pos(it
), end(it_end
) {
196 if (advance
) advance_past_empty_and_deleted();
198 dense_hashtable_const_iterator() { }
199 // This lets us convert regular iterators to const iterators
200 dense_hashtable_const_iterator(const iterator
&it
)
201 : ht(it
.ht
), pos(it
.pos
), end(it
.end
) { }
202 // The default destructor is fine; we don't define one
203 // The default operator= is fine; we don't define one
205 // Happy dereferencer
206 reference
operator*() const { return *pos
; }
207 pointer
operator->() const { return &(operator*()); }
209 // Arithmetic. The only hard part is making sure that
210 // we're not on an empty or marked-deleted array element
211 void advance_past_empty_and_deleted() {
212 while ( pos
!= end
&& (ht
->test_empty(*this) || ht
->test_deleted(*this)) )
215 const_iterator
& operator++() {
216 assert(pos
!= end
); ++pos
; advance_past_empty_and_deleted(); return *this;
218 const_iterator
operator++(int) { iterator
tmp(*this); ++*this; return tmp
; }
221 bool operator==(const const_iterator
& it
) const { return pos
== it
.pos
; }
222 bool operator!=(const const_iterator
& it
) const { return pos
!= it
.pos
; }
226 const table_type
*ht
;
230 template <class Value
, class Key
, class HashFcn
,
231 class ExtractKey
, class EqualKey
, class Alloc
>
232 class dense_hashtable
{
234 typedef Key key_type
;
235 typedef Value value_type
;
236 typedef HashFcn hasher
;
237 typedef EqualKey key_equal
;
239 typedef size_t size_type
;
240 typedef ptrdiff_t difference_type
;
241 typedef value_type
* pointer
;
242 typedef const value_type
* const_pointer
;
243 typedef value_type
& reference
;
244 typedef const value_type
& const_reference
;
245 typedef dense_hashtable_iterator
<Value
, Key
, HashFcn
,
246 ExtractKey
, EqualKey
, Alloc
>
249 typedef dense_hashtable_const_iterator
<Value
, Key
, HashFcn
,
250 ExtractKey
, EqualKey
, Alloc
>
253 // How full we let the table get before we resize. Knuth says .8 is
254 // good -- higher causes us to probe too much, though saves memory
255 static const float HT_OCCUPANCY_FLT
; // = 0.8;
257 // How empty we let the table get before we resize lower.
258 // It should be less than OCCUPANCY_FLT / 2 or we thrash resizing
259 static const float HT_EMPTY_FLT
; // = 0.4 * HT_OCCUPANCY_FLT
261 // Minimum size we're willing to let hashtables be.
262 // Must be a power of two, and at least 4.
263 // Note, however, that for a given hashtable, the minimum size is
264 // determined by the first constructor arg, and may be >HT_MIN_BUCKETS.
265 static const size_t HT_MIN_BUCKETS
= 32;
268 // ITERATOR FUNCTIONS
269 iterator
begin() { return iterator(this, table
,
270 table
+ num_buckets
, true); }
271 iterator
end() { return iterator(this, table
+ num_buckets
,
272 table
+ num_buckets
, true); }
273 const_iterator
begin() const { return const_iterator(this, table
,
274 table
+num_buckets
,true);}
275 const_iterator
end() const { return const_iterator(this, table
+ num_buckets
,
276 table
+num_buckets
,true);}
278 // ACCESSOR FUNCTIONS for the things we templatize on, basically
279 hasher
hash_funct() const { return hash
; }
280 key_equal
key_eq() const { return equals
; }
282 // Annoyingly, we can't copy values around, because they might have
283 // const components (they're probably pair<const X, Y>). We use
284 // placement new to get around this. Arg.
286 void set_value(value_type
* dst
, const value_type src
) {
287 new(dst
) value_type(src
);
290 void set_key(key_type
* dst
, const key_type src
) {
291 new(dst
) key_type(src
); // used for set_deleted_key(), etc
294 // DELETE HELPER FUNCTIONS
295 // This lets the user describe a key that will indicate deleted
296 // table entries. This key should be an "impossible" entry --
297 // if you try to insert it for real, you won't be able to retrieve it!
298 // (NB: while you pass in an entire value, only the key part is looked
299 // at. This is just because I don't know how to assign just a key.)
301 void squash_deleted() { // gets rid of any deleted entries we have
302 if ( num_deleted
) { // get rid of deleted before writing
303 dense_hashtable
tmp(*this); // copying will get rid of deleted
304 swap(tmp
); // now we are tmp
306 assert(num_deleted
== 0);
310 void set_deleted_key(const key_type
&key
) {
311 // It's only safe to change what "deleted" means if we purge deleted guys
314 set_key(&delkey
, key
); // save the key
316 void clear_deleted_key() {
321 // These are public so the iterators can use them
322 // True if the item at position bucknum is "deleted" marker
323 bool test_deleted(size_type bucknum
) const {
324 // The num_deleted test is crucial for read(): after read(), the ht values
325 // are garbage, and we don't want to think some of them are deleted.
326 return (use_deleted
&& num_deleted
> 0 &&
327 equals(delkey
, get_key(table
[bucknum
])));
329 bool test_deleted(const iterator
&it
) const {
330 return (use_deleted
&& num_deleted
> 0 &&
331 equals(delkey
, get_key(*it
)));
333 bool test_deleted(const const_iterator
&it
) const {
334 return (use_deleted
&& num_deleted
> 0 &&
335 equals(delkey
, get_key(*it
)));
337 // Set it so test_deleted is true. true if object didn't used to be deleted
338 // See below (at erase()) to explain why we allow const_iterators
339 bool set_deleted(const_iterator
&it
) {
340 assert(use_deleted
); // bad if set_deleted_key() wasn't called
341 bool retval
= !test_deleted(it
);
342 set_key(const_cast<key_type
*>(&get_key(*it
)), delkey
);
345 // Set it so test_deleted is false. true if object used to be deleted
346 bool clear_deleted(const_iterator
&it
) {
347 assert(use_deleted
); // bad if set_deleted_key() wasn't called
348 // happens automatically when we assign something else in its place
349 return test_deleted(it
);
352 // EMPTY HELPER FUNCTIONS
353 // This lets the user describe a key that will indicate empty (unused)
354 // table entries. This key should be an "impossible" entry --
355 // if you try to insert it for real, you won't be able to retrieve it!
356 // (NB: while you pass in an entire value, only the key part is looked
357 // at. This is just because I don't know how to assign just a key.)
359 // These are public so the iterators can use them
360 // True if the item at position bucknum is "empty" marker
361 bool test_empty(size_type bucknum
) const {
362 assert(use_empty
); // we always need to know what's empty!
363 return equals(emptykey
, get_key(table
[bucknum
]));
365 bool test_empty(const iterator
&it
) const {
366 assert(use_empty
); // we always need to know what's empty!
367 return equals(emptykey
, get_key(*it
));
369 bool test_empty(const const_iterator
&it
) const {
370 assert(use_empty
); // we always need to know what's empty!
371 return equals(emptykey
, get_key(*it
));
375 // You can either set a range empty or an individual element
376 void set_empty(size_type bucknum
) {
378 set_key(const_cast<key_type
*>(&get_key(table
[bucknum
])), emptykey
);
380 void set_empty(size_type buckstart
, size_type buckend
) {
382 if ( empty_is_zero
) // we can empty a lot of buckets at once
383 memset(table
+ buckstart
, 0, (buckend
-buckstart
) * sizeof(*table
));
385 for ( ; buckstart
< buckend
; buckstart
++ )
386 set_empty(buckstart
);
390 void set_empty_key(const key_type
&key
) {
391 // Once you set the empty key, you can't change it
394 set_key(&emptykey
, key
);
395 empty_is_zero
= true; // true if key is all 0's
396 for ( size_t i
= 0; i
< sizeof(emptykey
); ++i
)
397 if ( (reinterpret_cast<const char *>(&emptykey
))[i
] != 0 ) {
398 empty_is_zero
= false; // won't be able to optimize :-(
401 empty_is_zero
= false; // empty heuristic seems to hurt
403 assert(!table
); // must set before first use
404 // num_buckets was set in constructor even though table was NULL
405 table
= static_cast<value_type
*>(malloc(num_buckets
* sizeof(*table
)));
407 set_empty(0, num_buckets
);
410 // FUNCTIONS CONCERNING SIZE
412 size_type
size() const { return num_elements
- num_deleted
; }
413 // Buckets are always a power of 2
414 size_type
max_size() const { return (size_type(-1) >> 1U) + 1; }
415 bool empty() const { return size() == 0; }
416 size_type
bucket_count() const { return num_buckets
; }
417 size_type
max_bucket_count() const { return max_size(); }
420 // Because of the above, size_type(-1) is never legal; use it for errors
421 static const size_type ILLEGAL_BUCKET
= size_type(-1);
424 // This is the smallest size a hashtable can be without being too crowded
425 // If you like, you can give a min #buckets as well as a min #elts
426 size_type
min_size(size_type num_elts
, size_type min_buckets_wanted
) {
427 size_type sz
= HT_MIN_BUCKETS
; // min buckets allowed
428 while ( sz
< min_buckets_wanted
|| num_elts
>= sz
* HT_OCCUPANCY_FLT
)
433 // Used after a string of deletes
434 void maybe_shrink() {
435 assert(num_elements
>= num_deleted
);
436 assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
437 assert(bucket_count() >= HT_MIN_BUCKETS
);
439 if ( (num_elements
-num_deleted
) <= shrink_threshold
&&
440 bucket_count() > HT_MIN_BUCKETS
) {
441 size_type sz
= bucket_count() / 2; // find how much we should shrink
442 while ( sz
> HT_MIN_BUCKETS
&&
443 (num_elements
- num_deleted
) <= sz
* HT_EMPTY_FLT
)
444 sz
/= 2; // stay a power of 2
445 dense_hashtable
tmp(*this, sz
); // Do the actual resizing
446 swap(tmp
); // now we are tmp
448 consider_shrink
= false; // because we just considered it
451 // We'll let you resize a hashtable -- though this makes us copy all!
452 // When you resize, you say, "make it big enough for this many more elements"
453 void resize_delta(size_type delta
, size_type min_buckets_wanted
= 0) {
454 if ( consider_shrink
) // see if lots of deletes happened
456 if ( bucket_count() > min_buckets_wanted
&&
457 (num_elements
+ delta
) <= enlarge_threshold
)
458 return; // we're ok as we are
460 const size_type resize_to
= min_size(num_elements
+ delta
,
462 if ( resize_to
> bucket_count() ) { // we don't have enough buckets
463 dense_hashtable
tmp(*this, resize_to
);
464 swap(tmp
); // now we are tmp
468 // Used to actually do the rehashing when we grow/shrink a hashtable
469 void copy_from(const dense_hashtable
&ht
, size_type min_buckets_wanted
= 0) {
470 clear(); // clear table, set num_deleted to 0
472 // If we need to change the size of our table, do it now
473 const size_type resize_to
= min_size(ht
.size(), min_buckets_wanted
);
474 if ( resize_to
> bucket_count() ) { // we don't have enough buckets
475 table
= static_cast<value_type
*>(realloc(table
, resize_to
* sizeof(*table
)));
476 assert(table
!= NULL
);
477 set_empty(num_buckets
, resize_to
); // empty everything between them
478 num_buckets
= resize_to
;
482 // We use a normal iterator to get non-deleted bcks from ht
483 // We could use insert() here, but since we know there are
484 // no duplicates and no deleted items, we can be more efficient
485 assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two
486 for ( const_iterator it
= ht
.begin(); it
!= ht
.end(); ++it
) {
487 size_type num_probes
= 0; // how many times we've probed
489 const size_type bucket_count_minus_one
= bucket_count() - 1;
490 for (bucknum
= hash(get_key(*it
)) & bucket_count_minus_one
;
491 !test_empty(bucknum
); // not empty
492 bucknum
= (bucknum
+ JUMP_(key
, num_probes
)) & bucket_count_minus_one
) {
494 assert(num_probes
< bucket_count()); // or else the hashtable is full
496 set_value(&table
[bucknum
], *it
); // copies the value to here
501 // Required by the spec for hashed associative container
503 // Though the docs say this should be num_buckets, I think it's much
504 // more useful as req_elements. As a special feature, calling with
505 // req_elements==0 will cause us to shrink if we can, saving space.
506 void resize(size_type req_elements
) { // resize to this or larger
507 if ( consider_shrink
|| req_elements
== 0 )
509 if ( req_elements
> num_elements
)
510 return resize_delta(req_elements
- num_elements
, 0);
514 // CONSTRUCTORS -- as required by the specs, we take a size,
515 // but also let you specify a hashfunction, key comparator,
516 // and key extractor. We also define a copy constructor and =.
517 // DESTRUCTOR -- needs to free the table
518 explicit dense_hashtable(size_type n
= 0,
519 const HashFcn
& hf
= HashFcn(),
520 const EqualKey
& eql
= EqualKey(),
521 const ExtractKey
& ext
= ExtractKey())
522 : hash(hf
), equals(eql
), get_key(ext
), num_deleted(0),
523 use_deleted(false), use_empty(false),
524 empty_is_zero(false), delkey(), emptykey(),
525 table(NULL
), num_buckets(min_size(0, n
)), num_elements(0) {
526 // table is NULL until emptykey is set. However, we set num_buckets
527 // here so we know how much space to allocate once emptykey is set
531 // As a convenience for resize(), we allow an optional second argument
532 // which lets you make this new hashtable a different size than ht
533 dense_hashtable(const dense_hashtable
& ht
, size_type min_buckets_wanted
= 0)
534 : hash(ht
.hash
), equals(ht
.equals
), get_key(ht
.get_key
), num_deleted(0),
535 use_deleted(ht
.use_deleted
), use_empty(ht
.use_empty
),
536 empty_is_zero(ht
.empty_is_zero
), delkey(ht
.delkey
), emptykey(ht
.emptykey
),
537 table(NULL
), num_buckets(min_size(0, min_buckets_wanted
)),
540 copy_from(ht
, min_buckets_wanted
); // copy_from() ignores deleted entries
543 dense_hashtable
& operator= (const dense_hashtable
& ht
) {
544 if (&ht
== this) return *this; // don't copy onto ourselves
548 get_key
= ht
.get_key
;
549 use_deleted
= ht
.use_deleted
;
550 use_empty
= ht
.use_empty
;
551 empty_is_zero
= ht
.empty_is_zero
;
552 set_key(&delkey
, ht
.delkey
);
553 set_key(&emptykey
, ht
.emptykey
);
554 copy_from(ht
); // sets num_deleted to 0 too
559 if (table
) free(table
);
562 // Many STL algorithms use swap instead of copy constructors
563 void swap(dense_hashtable
& ht
) {
564 STL_NAMESPACE::swap(hash
, ht
.hash
);
565 STL_NAMESPACE::swap(equals
, ht
.equals
);
566 STL_NAMESPACE::swap(get_key
, ht
.get_key
);
567 STL_NAMESPACE::swap(num_deleted
, ht
.num_deleted
);
568 STL_NAMESPACE::swap(use_deleted
, ht
.use_deleted
);
569 STL_NAMESPACE::swap(use_empty
, ht
.use_empty
);
570 STL_NAMESPACE::swap(empty_is_zero
, ht
.empty_is_zero
);
571 { key_type tmp
; // for annoying reasons, swap() doesn't work
572 set_key(&tmp
, delkey
);
573 set_key(&delkey
, ht
.delkey
);
574 set_key(&ht
.delkey
, tmp
);
576 { key_type tmp
; // for annoying reasons, swap() doesn't work
577 set_key(&tmp
, emptykey
);
578 set_key(&emptykey
, ht
.emptykey
);
579 set_key(&ht
.emptykey
, tmp
);
581 STL_NAMESPACE::swap(table
, ht
.table
);
582 STL_NAMESPACE::swap(num_buckets
, ht
.num_buckets
);
583 STL_NAMESPACE::swap(num_elements
, ht
.num_elements
);
585 ht
.reset_thresholds();
588 // It's always nice to be able to clear a table without deallocating it
590 num_buckets
= min_size(0,0); // our new size
592 table
= static_cast<value_type
*>(realloc(table
, num_buckets
* sizeof(*table
)));
594 set_empty(0, num_buckets
);
602 // Returns a pair of positions: 1st where the object is, 2nd where
603 // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET
604 // if object is not found; 2nd is ILLEGAL_BUCKET if it is.
605 // Note: because of deletions where-to-insert is not trivial: it's the
606 // first deleted bucket we see, as long as we don't find the key later
607 pair
<size_type
, size_type
> find_position(const key_type
&key
) const {
608 size_type num_probes
= 0; // how many times we've probed
609 const size_type bucket_count_minus_one
= bucket_count() - 1;
610 size_type bucknum
= hash(key
) & bucket_count_minus_one
;
611 size_type insert_pos
= ILLEGAL_BUCKET
; // where we would insert
612 while ( 1 ) { // probe until something happens
613 if ( test_empty(bucknum
) ) { // bucket is empty
614 if ( insert_pos
== ILLEGAL_BUCKET
) // found no prior place to insert
615 return pair
<size_type
,size_type
>(ILLEGAL_BUCKET
, bucknum
);
617 return pair
<size_type
,size_type
>(ILLEGAL_BUCKET
, insert_pos
);
619 } else if ( test_deleted(bucknum
) ) {// keep searching, but mark to insert
620 if ( insert_pos
== ILLEGAL_BUCKET
)
621 insert_pos
= bucknum
;
623 } else if ( equals(key
, get_key(table
[bucknum
])) ) {
624 return pair
<size_type
,size_type
>(bucknum
, ILLEGAL_BUCKET
);
626 ++num_probes
; // we're doing another probe
627 bucknum
= (bucknum
+ JUMP_(key
, num_probes
)) & bucket_count_minus_one
;
628 assert(num_probes
< bucket_count()); // don't probe too many times!
633 iterator
find(const key_type
& key
) {
634 if ( size() == 0 ) return end();
635 pair
<size_type
, size_type
> pos
= find_position(key
);
636 if ( pos
.first
== ILLEGAL_BUCKET
) // alas, not there
639 return iterator(this, table
+ pos
.first
, table
+ num_buckets
, false);
642 const_iterator
find(const key_type
& key
) const {
643 if ( size() == 0 ) return end();
644 pair
<size_type
, size_type
> pos
= find_position(key
);
645 if ( pos
.first
== ILLEGAL_BUCKET
) // alas, not there
648 return const_iterator(this, table
+ pos
.first
, table
+num_buckets
, false);
651 // Counts how many elements have key key. For maps, it's either 0 or 1.
652 size_type
count(const key_type
&key
) const {
653 pair
<size_type
, size_type
> pos
= find_position(key
);
654 return pos
.first
== ILLEGAL_BUCKET
? 0 : 1;
657 // Likewise, equal_range doesn't really make sense for us. Oh well.
658 pair
<iterator
,iterator
> equal_range(const key_type
& key
) {
659 const iterator pos
= find(key
); // either an iterator or end
660 return pair
<iterator
,iterator
>(pos
, pos
);
662 pair
<const_iterator
,const_iterator
> equal_range(const key_type
& key
) const {
663 const const_iterator pos
= find(key
); // either an iterator or end
664 return pair
<iterator
,iterator
>(pos
, pos
);
668 // INSERTION ROUTINES
670 // If you know *this is big enough to hold obj, use this routine
671 pair
<iterator
, bool> insert_noresize(const value_type
& obj
) {
672 const pair
<size_type
,size_type
> pos
= find_position(get_key(obj
));
673 if ( pos
.first
!= ILLEGAL_BUCKET
) { // object was already there
674 return pair
<iterator
,bool>(iterator(this, table
+ pos
.first
,
675 table
+ num_buckets
, false),
676 false); // false: we didn't insert
677 } else { // pos.second says where to put it
678 if ( test_deleted(pos
.second
) ) { // just replace if it's been del.
679 const_iterator
delpos(this, table
+ pos
.second
, // shrug:
680 table
+ num_buckets
, false);// shouldn't need const
681 clear_deleted(delpos
);
682 assert( num_deleted
> 0);
683 --num_deleted
; // used to be, now it isn't
685 ++num_elements
; // replacing an empty bucket
687 set_value(&table
[pos
.second
], obj
);
688 return pair
<iterator
,bool>(iterator(this, table
+ pos
.second
,
689 table
+ num_buckets
, false),
690 true); // true: we did insert
695 // This is the normal insert routine, used by the outside world
696 pair
<iterator
, bool> insert(const value_type
& obj
) {
697 resize_delta(1); // adding an object, grow if need be
698 return insert_noresize(obj
);
701 // When inserting a lot at a time, we specialize on the type of iterator
702 template <class InputIterator
>
703 void insert(InputIterator f
, InputIterator l
) {
704 // specializes on iterator type
705 insert(f
, l
, typename
STL_NAMESPACE::iterator_traits
<InputIterator
>::iterator_category());
708 // Iterator supports operator-, resize before inserting
709 template <class ForwardIterator
>
710 void insert(ForwardIterator f
, ForwardIterator l
,
711 STL_NAMESPACE::forward_iterator_tag
) {
712 size_type n
= STL_NAMESPACE::distance(f
, l
); // TODO(csilvers): standard?
714 for ( ; n
> 0; --n
, ++f
)
718 // Arbitrary iterator, can't tell how much to resize
719 template <class InputIterator
>
720 void insert(InputIterator f
, InputIterator l
,
721 STL_NAMESPACE::input_iterator_tag
) {
727 size_type
erase(const key_type
& key
) {
728 const_iterator pos
= find(key
); // shrug: shouldn't need to be const
729 if ( pos
!= end() ) {
730 assert(!test_deleted(pos
)); // or find() shouldn't have returned it
733 consider_shrink
= true; // will think about shrink after next insert
734 return 1; // because we deleted one thing
736 return 0; // because we deleted nothing
740 // This is really evil: really it should be iterator, not const_iterator.
741 // But...the only reason keys are const is to allow lookup.
742 // Since that's a moot issue for deleted keys, we allow const_iterators
743 void erase(const_iterator pos
) {
744 if ( pos
== end() ) return; // sanity check
745 if ( set_deleted(pos
) ) { // true if object has been newly deleted
747 consider_shrink
= true; // will think about shrink after next insert
751 void erase(const_iterator f
, const_iterator l
) {
752 for ( ; f
!= l
; ++f
) {
753 if ( set_deleted(f
) ) // should always be true
756 consider_shrink
= true; // will think about shrink after next insert
761 bool operator==(const dense_hashtable
& ht
) const {
762 // We really want to check that the hash functions are the same
763 // but alas there's no way to do this. We just hope.
764 return ( num_deleted
== ht
.num_deleted
&& table
== ht
.table
);
766 bool operator!=(const dense_hashtable
& ht
) const {
767 return !(*this == ht
);
772 // We support reading and writing hashtables to disk. Alas, since
773 // I don't know how to write a hasher or key_equal, you have to make
774 // sure everything but the table is the same. We compact before writing
776 // NOTE: These functions are currently TODO. They've not been implemented.
777 bool write_metadata(FILE *fp
) {
778 squash_deleted(); // so we don't have to worry about delkey
779 return false; // TODO
782 bool read_metadata(FILE *fp
) {
783 num_deleted
= 0; // since we got rid before writing
784 assert(use_empty
); // have to set this before calling us
785 if (table
) free(table
); // we'll make our own
786 // TODO: read magic number
787 // TODO: read num_buckets
789 table
= static_cast<value_type
*>(malloc(num_buckets
* sizeof(*table
)));
791 set_empty(0, num_buckets
);
792 // TODO: read num_elements
793 for ( size_type i
= 0; i
< num_elements
; ++i
) {
794 // TODO: read bucket_num
795 // TODO: set with non-empty, non-deleted value
797 return false; // TODO
800 // If your keys and values are simple enough, we can write them
801 // to disk for you. "simple enough" means no pointers.
802 // However, we don't try to normalize endianness
803 bool write_nopointer_data(FILE *fp
) const {
804 for ( const_iterator it
= begin(); it
!= end(); ++it
) {
805 // TODO: skip empty/deleted values
806 if ( !fwrite(&*it
, sizeof(*it
), 1, fp
) ) return false;
808 return false; // TODO
811 // When reading, we have to override the potential const-ness of *it
812 bool read_nopointer_data(FILE *fp
) {
813 for ( iterator it
= begin(); it
!= end(); ++it
) {
814 // TODO: skip empty/deleted values
815 if ( !fread(reinterpret_cast<void*>(&(*it
)), sizeof(*it
), 1, fp
) )
818 return false; // TODO
822 // We need to enforce that our value_type is a Plain Old Data Type
823 // (so we know realloc and memmove will work). We use traits to
824 // enforce this. The following gives a compile-time error if
825 // is_POD_type is false (which is the default for user types).
827 // IF YOU GET AN ERROR HERE, make sure your class is a POD type,
828 // and if so tell the compiler via code similar to this:
829 // template<> struct __type_traits<classname> {
830 // typedef __true_type has_trivial_default_constructor;
831 // typedef __true_type has_trivial_copy_constructor;
832 // typedef __true_type has_trivial_assignment_operator;
833 // typedef __true_type has_trivial_destructor;
834 // typedef __true_type is_POD_type;
837 // If this is part of a hash_map, you need to make sure both the
838 // Key and Value types are POD types, if they're user-defined.
839 #ifdef UNDERSTANDS_TYPE_TRAITS
840 static __true_type
* const enforce_pod
;
844 hasher hash
; // required by hashed_associative_container
847 size_type num_deleted
; // how many occupied buckets are marked deleted
848 bool use_deleted
; // false until delkey has been set
849 bool use_empty
; // you must do this before you start
850 bool empty_is_zero
; // can optimize this case when emptying
851 key_type delkey
; // which key marks deleted entries
852 key_type emptykey
; // which key marks unused entries
854 size_type num_buckets
;
855 size_type num_elements
;
856 size_type shrink_threshold
; // num_buckets * HT_EMPTY_FLT
857 size_type enlarge_threshold
; // num_buckets * HT_OCCUPANCY_FLT
858 bool consider_shrink
; // true if we should try to shrink before next insert
860 void reset_thresholds() {
861 enlarge_threshold
= static_cast<size_type
>(num_buckets
*HT_OCCUPANCY_FLT
);
862 shrink_threshold
= static_cast<size_type
>(num_buckets
*HT_EMPTY_FLT
);
863 consider_shrink
= false; // whatever caused us to reset already considered
867 // We need a global swap as well
868 template <class V
, class K
, class HF
, class ExK
, class EqK
, class A
>
869 inline void swap(dense_hashtable
<V
,K
,HF
,ExK
,EqK
,A
> &x
,
870 dense_hashtable
<V
,K
,HF
,ExK
,EqK
,A
> &y
) {
874 #ifdef UNDERSTANDS_TYPE_TRAITS
875 template <class V
, class K
, class HF
, class ExK
, class EqK
, class A
>
876 __true_type
* const dense_hashtable
<V
,K
,HF
,ExK
,EqK
,A
>::enforce_pod
=
877 static_cast<typename __type_traits
<value_type
>::is_POD_type
*>(0);
882 template <class V
, class K
, class HF
, class ExK
, class EqK
, class A
>
883 const typename dense_hashtable
<V
,K
,HF
,ExK
,EqK
,A
>::size_type
884 dense_hashtable
<V
,K
,HF
,ExK
,EqK
,A
>::ILLEGAL_BUCKET
;
886 // How full we let the table get before we resize. Knuth says .8 is
887 // good -- higher causes us to probe too much, though saves memory
888 template <class V
, class K
, class HF
, class ExK
, class EqK
, class A
>
889 const float dense_hashtable
<V
,K
,HF
,ExK
,EqK
,A
>::HT_OCCUPANCY_FLT
= 0.5f
;
891 // How empty we let the table get before we resize lower.
892 // It should be less than OCCUPANCY_FLT / 2 or we thrash resizing
893 template <class V
, class K
, class HF
, class ExK
, class EqK
, class A
>
894 const float dense_hashtable
<V
,K
,HF
,ExK
,EqK
,A
>::HT_EMPTY_FLT
= 0.4 *
895 dense_hashtable
<V
,K
,HF
,ExK
,EqK
,A
>::HT_OCCUPANCY_FLT
;
897 _END_GOOGLE_NAMESPACE_
899 #endif /* _DENSEHASHTABLE_H_ */