1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 // Author: Craig Silverstein
33 // A sparsetable is a random container that implements a sparse array,
34 // that is, an array that uses very little memory to store unassigned
35 // indices (in this case, between 1-2 bits per unassigned index). For
36 // instance, if you allocate an array of size 5 and assign a[2] = <big
37 // struct>, then a[2] will take up a lot of memory but a[0], a[1],
38 // a[3], and a[4] will not. Array elements that have a value are
39 // called "assigned". Array elements that have no value yet, or have
40 // had their value cleared using erase() or clear(), are called
43 // Unassigned values seem to have the default value of T (see below).
44 // Nevertheless, there is a difference between an unassigned index and
45 // one explicitly assigned the value of T(). The latter is considered
48 // Access to an array element is constant time, as is insertion and
49 // deletion. Insertion and deletion may be fairly slow, however:
50 // because of this container's memory economy, each insert and delete
51 // causes a memory reallocation.
53 // See /usr/(local/)?doc/sparsehash-0.1/sparsetable.html
54 // for information about how to use this class.
56 #ifndef _SPARSETABLE_H_
57 #define _SPARSETABLE_H_
59 // !!! DR changed some <google/...> to "..." due to include path problems...
60 #include "sparsehash/google_config.h"
61 #include <stdlib.h> // for malloc/free
62 #include <stdio.h> // to read/write tables
63 #if defined HAVE_STDINT_H
64 #include <stdint.h> // to get uint16_t (ISO naming madness)
65 #elif defined HAVE_INTTYPES_H
66 #include <inttypes.h> // another place uint16_t might be defined
68 #include <sys/types.h> // our last best hope
70 #include <assert.h> // for bounds checking
71 #include <iterator> // to define reverse_iterator<> for me
72 #include <vector> // a sparsetable is a vector of groups
75 #include <string.h> // for memcpy and mmemmove
78 #define memcpy(d, s, n) bcopy ((s), (d), (n))
79 #define memmove(d, s, n) bcopy ((s), (d), (n))
83 #if !defined HAVE_U_INT16_T && defined HAVE___UINT16
84 typedef __uint16 u_int16_t // true on vc++7
87 _START_GOOGLE_NAMESPACE_
89 using STL_NAMESPACE::vector;
91 // The smaller this is, the faster lookup is (because the group bitmap is
92 // smaller) and the faster insert is, because there's less to memmove.
93 // On the other hand, there are more groups. Since group::size_type is
94 // a short, this number should be of the form 32*x + 16 to avoid waste.
95 static const uint16_t DEFAULT_SPARSEGROUP_SIZE = 48; // fits in 1.5 words
98 // A NOTE ON ASSIGNING:
99 // A sparse table does not actually allocate memory for entries
100 // that are not filled. Because of this, it becomes complicated
101 // to have a non-const iterator: we don't know, if the iterator points
102 // to a not-filled bucket, whether you plan to fill it with something
103 // or whether you plan to read its value (in which case you'll get
104 // the default bucket value). Therefore, while we can define const
105 // operations in a pretty 'normal' way, for non-const operations, we
106 // define something that returns a helper object with operator= and
107 // operator& that allocate a bucket lazily. We use this for table[]
108 // and also for regular table iterators.
110 template <class tabletype>
111 class table_element_adaptor {
113 typedef typename tabletype::value_type value_type;
114 typedef typename tabletype::size_type size_type;
115 typedef typename tabletype::reference reference;
116 typedef typename tabletype::pointer pointer;
118 table_element_adaptor(tabletype *tbl, size_type p)
119 : table(tbl), pos(p) { }
120 table_element_adaptor& operator= (value_type val) {
121 table->set(pos, val);
124 operator value_type() { return table->get(pos); } // we look like a value
125 pointer operator& () { return &table->mutating_get(pos); }
132 // Our iterator as simple as iterators can be: basically it's just
133 // the index into our table. Dereference, the only complicated
134 // thing, we punt to the table class. This just goes to show how
135 // much machinery STL requires to do even the most trivial tasks.
137 // By templatizing over tabletype, we have one iterator type which
138 // we can use for both sparsetables and sparsebins. In fact it
139 // works on any class that allows size() and operator[] (eg vector),
140 // as long as it does the standard STL typedefs too (eg value_type).
142 template <class tabletype>
143 class table_iterator {
145 typedef table_iterator iterator;
147 #ifdef UNDERSTANDS_ITERATOR_TAGS
148 typedef STL_NAMESPACE::random_access_iterator_tag iterator_category;
150 typedef typename tabletype::value_type value_type;
151 typedef typename tabletype::difference_type difference_type;
152 typedef typename tabletype::size_type size_type;
153 typedef table_element_adaptor<tabletype> reference;
154 typedef table_element_adaptor<tabletype>* pointer;
156 // The "real" constructor
157 table_iterator(tabletype *tbl, size_type p)
158 : table(tbl), pos(p) { }
159 // The default constructor, used when I define vars of type table::iterator
160 table_iterator() : table(NULL), pos(0) { }
161 // The copy constructor, for when I say table::iterator foo = tbl.begin()
162 // The default destructor is fine; we don't define one
163 // The default operator= is fine; we don't define one
165 // The main thing our iterator does is dereference. If the table entry
166 // we point to is empty, we return the default value type.
167 // This is the big different function from the const iterator.
168 reference operator*() {
169 return table_element_adaptor<tabletype>(table, pos);
171 pointer operator->() { return &(operator*()); }
173 // Helper function to assert things are ok; eg pos is still in range
176 assert(pos <= table->size());
179 // Arithmetic: we just do arithmetic on pos. We don't even need to
180 // do bounds checking, since STL doesn't consider that it's job. :-)
181 iterator& operator+=(size_type t) { pos += t; check(); return *this; }
182 iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
183 iterator& operator++() { ++pos; check(); return *this; }
184 iterator& operator--() { --pos; check(); return *this; }
185 iterator operator++(int) { iterator tmp(*this); // for x++
186 ++pos; check(); return tmp; }
187 iterator operator--(int) { iterator tmp(*this); // for x--
188 --pos; check(); return tmp; }
189 iterator operator+(difference_type i) const { iterator tmp(*this);
190 tmp += i; return tmp; }
191 iterator operator-(difference_type i) const { iterator tmp(*this);
192 tmp -= i; return tmp; }
193 difference_type operator-(iterator it) const { // for "x = it2 - it"
194 assert(table == it.table);
197 reference operator[](difference_type n) const {
198 return *(*this + n); // simple though not totally efficient
202 bool operator==(const iterator& it) const {
203 return table == it.table && pos == it.pos;
205 bool operator<(const iterator& it) const {
206 assert(table == it.table); // life is bad bad bad otherwise
209 bool operator<=(const iterator& it) const {
210 assert(table == it.table);
211 return pos <= it.pos;
213 bool operator!=(const iterator& it) const { return !(*this == it); }
214 bool operator>(const iterator& it) const { return it <= *this; }
215 bool operator>=(const iterator& it) const { return it < *this; }
217 // Here's the info we actually need to be an iterator
218 tabletype *table; // so we can dereference and bounds-check
219 size_type pos; // index into the table
222 // support for "3 + iterator" has to be defined outside the class, alas
224 table_iterator<T> operator+(typename table_iterator<T>::difference_type i,
225 table_iterator<T> it) {
226 return it + i; // so people can say it2 = 3 + it
229 template <class tabletype>
230 class const_table_iterator {
232 typedef table_iterator<tabletype> iterator;
233 typedef const_table_iterator const_iterator;
235 #ifdef UNDERSTANDS_ITERATOR_TAGS
236 typedef STL_NAMESPACE::random_access_iterator_tag iterator_category;
238 typedef typename tabletype::value_type value_type;
239 typedef typename tabletype::difference_type difference_type;
240 typedef typename tabletype::size_type size_type;
241 typedef typename tabletype::const_reference reference; // we're const-only
242 typedef typename tabletype::const_pointer pointer;
244 // The "real" constructor
245 const_table_iterator(const tabletype *tbl, size_type p)
246 : table(tbl), pos(p) { }
247 // The default constructor, used when I define vars of type table::iterator
248 const_table_iterator() : table(NULL), pos(0) { }
249 // The copy constructor, for when I say table::iterator foo = tbl.begin()
250 // Also converts normal iterators to const iterators
251 const_table_iterator(const iterator &from)
252 : table(from.table), pos(from.pos) { }
253 // The default destructor is fine; we don't define one
254 // The default operator= is fine; we don't define one
256 // The main thing our iterator does is dereference. If the table entry
257 // we point to is empty, we return the default value type.
258 reference operator*() const { return (*table)[pos]; }
259 pointer operator->() const { return &(operator*()); }
261 // Helper function to assert things are ok; eg pos is still in range
264 assert(pos <= table->size());
267 // Arithmetic: we just do arithmetic on pos. We don't even need to
268 // do bounds checking, since STL doesn't consider that it's job. :-)
269 const_iterator& operator+=(size_type t) { pos += t; check(); return *this; }
270 const_iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
271 const_iterator& operator++() { ++pos; check(); return *this; }
272 const_iterator& operator--() { --pos; check(); return *this; }
273 const_iterator operator++(int) { const_iterator tmp(*this); // for x++
274 ++pos; check(); return tmp; }
275 const_iterator operator--(int) { const_iterator tmp(*this); // for x--
276 --pos; check(); return tmp; }
277 const_iterator operator+(difference_type i) const { const_iterator tmp(*this);
278 tmp += i; return tmp; }
279 const_iterator operator-(difference_type i) const { const_iterator tmp(*this);
280 tmp -= i; return tmp; }
281 difference_type operator-(const_iterator it) const { // for "x = it2 - it"
282 assert(table == it.table);
285 reference operator[](difference_type n) const {
286 return *(*this + n); // simple though not totally efficient
290 bool operator==(const const_iterator& it) const {
291 return table == it.table && pos == it.pos;
293 bool operator<(const const_iterator& it) const {
294 assert(table == it.table); // life is bad bad bad otherwise
297 bool operator<=(const const_iterator& it) const {
298 assert(table == it.table);
299 return pos <= it.pos;
301 bool operator!=(const const_iterator& it) const { return !(*this == it); }
302 bool operator>(const const_iterator& it) const { return it <= *this; }
303 bool operator>=(const const_iterator& it) const { return it < *this; }
305 // Here's the info we actually need to be an iterator
306 const tabletype *table; // so we can dereference and bounds-check
307 size_type pos; // index into the table
310 // support for "3 + iterator" has to be defined outside the class, alas
312 const_table_iterator<T> operator+(typename
313 const_table_iterator<T>::difference_type i,
314 const_table_iterator<T> it) {
315 return it + i; // so people can say it2 = 3 + it
319 // ---------------------------------------------------------------------------
323 // This is a 2-D iterator. You specify a begin and end over a list
324 // of *containers*. We iterate over each container by iterating over
325 // it. It's actually simple:
326 // VECTOR.begin() VECTOR[0].begin() --------> VECTOR[0].end() ---\
327 // | ________________________________________________/
328 // | \_> VECTOR[1].begin() --------> VECTOR[1].end() -\
329 // | ___________________________________________________/
333 // It's impossible to do random access on one of these things in constant
334 // time, so it's just a bidirectional iterator.
336 // Unfortunately, because we need to use this for a non-empty iterator,
337 // we use nonempty_begin() and nonempty_end() instead of begin() and end()
338 // (though only going across, not down).
341 #define TWOD_BEGIN_ nonempty_begin
342 #define TWOD_END_ nonempty_end
343 #define TWOD_ITER_ nonempty_iterator
344 #define TWOD_CONST_ITER_ const_nonempty_iterator
346 template <class containertype>
347 class two_d_iterator {
349 typedef two_d_iterator iterator;
351 #ifdef UNDERSTANDS_ITERATOR_TAGS
352 typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category;
354 // apparently some versions of VC++ have trouble with two ::'s in a typename
355 typedef typename containertype::value_type _tmp_vt;
356 typedef typename _tmp_vt::value_type value_type;
357 typedef typename _tmp_vt::difference_type difference_type;
358 typedef typename _tmp_vt::reference reference;
359 typedef typename _tmp_vt::pointer pointer;
361 // The "real" constructor. begin and end specify how many rows we have
362 // (in the diagram above); we always iterate over each row completely.
363 two_d_iterator(typename containertype::iterator begin,
364 typename containertype::iterator end,
365 typename containertype::iterator curr)
366 : row_begin(begin), row_end(end), row_current(curr), col_current() {
367 if ( row_current != row_end ) {
368 col_current = row_current->TWOD_BEGIN_();
369 advance_past_end(); // in case cur->begin() == cur->end()
372 // If you want to start at an arbitrary place, you can, I guess
373 two_d_iterator(typename containertype::iterator begin,
374 typename containertype::iterator end,
375 typename containertype::iterator curr,
376 typename containertype::value_type::TWOD_ITER_ col)
377 : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
378 advance_past_end(); // in case cur->begin() == cur->end()
380 // The default constructor, used when I define vars of type table::iterator
381 two_d_iterator() : row_begin(), row_end(), row_current(), col_current() { }
382 // The default destructor is fine; we don't define one
383 // The default operator= is fine; we don't define one
385 // Happy dereferencer
386 reference operator*() const { return *col_current; }
387 pointer operator->() const { return &(operator*()); }
389 // Arithmetic: we just do arithmetic on pos. We don't even need to
390 // do bounds checking, since STL doesn't consider that it's job. :-)
391 // NOTE: this is not amortized constant time! What do we do about it?
392 void advance_past_end() { // used when col_current points to end()
393 while ( col_current == row_current->TWOD_END_() ) { // end of current row
394 ++row_current; // go to beginning of next
395 if ( row_current != row_end ) // col is irrelevant at end
396 col_current = row_current->TWOD_BEGIN_();
398 break; // don't go past row_end
402 iterator& operator++() {
403 assert(row_current != row_end); // how to ++ from there?
405 advance_past_end(); // in case col_current is at end()
408 iterator& operator--() {
409 while ( row_current == row_end ||
410 col_current == row_current->TWOD_BEGIN_() ) {
411 assert(row_current != row_begin);
413 col_current = row_current->TWOD_END_(); // this is 1 too far
418 iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
419 iterator operator--(int) { iterator tmp(*this); --*this; return tmp; }
423 bool operator==(const iterator& it) const {
424 return ( row_begin == it.row_begin &&
425 row_end == it.row_end &&
426 row_current == it.row_current &&
427 (row_current == row_end || col_current == it.col_current) );
429 bool operator!=(const iterator& it) const { return !(*this == it); }
432 // Here's the info we actually need to be an iterator
433 // These need to be public so we convert from iterator to const_iterator
434 typename containertype::iterator row_begin, row_end, row_current;
435 typename containertype::value_type::TWOD_ITER_ col_current;
438 // The same thing again, but this time const. :-(
439 template <class containertype>
440 class const_two_d_iterator {
442 typedef const_two_d_iterator iterator;
444 #ifdef UNDERSTANDS_ITERATOR_TAGS
445 typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category;
447 // apparently some versions of VC++ have trouble with two ::'s in a typename
448 typedef typename containertype::value_type _tmp_vt;
449 typedef typename _tmp_vt::value_type value_type;
450 typedef typename _tmp_vt::difference_type difference_type;
451 typedef typename _tmp_vt::const_reference reference;
452 typedef typename _tmp_vt::const_pointer pointer;
454 const_two_d_iterator(typename containertype::const_iterator begin,
455 typename containertype::const_iterator end,
456 typename containertype::const_iterator curr)
457 : row_begin(begin), row_end(end), row_current(curr), col_current() {
459 col_current = curr->TWOD_BEGIN_();
460 advance_past_end(); // in case cur->begin() == cur->end()
463 const_two_d_iterator(typename containertype::const_iterator begin,
464 typename containertype::const_iterator end,
465 typename containertype::const_iterator curr,
466 typename containertype::value_type::TWOD_CONST_ITER_ col)
467 : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
468 advance_past_end(); // in case cur->begin() == cur->end()
470 const_two_d_iterator()
471 : row_begin(), row_end(), row_current(), col_current() {
473 // Need this explicitly so we can convert normal iterators to const iterators
474 const_two_d_iterator(const two_d_iterator<containertype>& it) :
475 row_begin(it.row_begin), row_end(it.row_end), row_current(it.row_current),
476 col_current(it.col_current) { }
478 typename containertype::const_iterator row_begin, row_end, row_current;
479 typename containertype::value_type::TWOD_CONST_ITER_ col_current;
482 // EVERYTHING FROM HERE DOWN IS THE SAME AS THE NON-CONST ITERATOR
483 reference operator*() const { return *col_current; }
484 pointer operator->() const { return &(operator*()); }
486 void advance_past_end() { // used when col_current points to end()
487 while ( col_current == row_current->TWOD_END_() ) { // end of current row
488 ++row_current; // go to beginning of next
489 if ( row_current != row_end ) // col is irrelevant at end
490 col_current = row_current->TWOD_BEGIN_();
492 break; // don't go past row_end
495 iterator& operator++() {
496 assert(row_current != row_end); // how to ++ from there?
498 advance_past_end(); // in case col_current is at end()
501 iterator& operator--() {
502 while ( row_current == row_end ||
503 col_current == row_current->TWOD_BEGIN_() ) {
504 assert(row_current != row_begin);
506 col_current = row_current->TWOD_END_(); // this is 1 too far
511 iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
512 iterator operator--(int) { iterator tmp(*this); --*this; return tmp; }
514 bool operator==(const iterator& it) const {
515 return ( row_begin == it.row_begin &&
516 row_end == it.row_end &&
517 row_current == it.row_current &&
518 (row_current == row_end || col_current == it.col_current) );
520 bool operator!=(const iterator& it) const { return !(*this == it); }
523 // We provide yet another version, to be as frugal with memory as
524 // possible. This one frees each block of memory as it finishes
525 // iterating over it. By the end, the entire table is freed.
526 // For understandable reasons, you can only iterate over it once,
527 // which is why it's an input iterator
528 template <class containertype>
529 class destructive_two_d_iterator {
531 typedef destructive_two_d_iterator iterator;
533 #ifdef UNDERSTANDS_ITERATOR_TAGS
534 typedef STL_NAMESPACE::input_iterator_tag iterator_category;
536 // apparently some versions of VC++ have trouble with two ::'s in a typename
537 typedef typename containertype::value_type _tmp_vt;
538 typedef typename _tmp_vt::value_type value_type;
539 typedef typename _tmp_vt::difference_type difference_type;
540 typedef typename _tmp_vt::reference reference;
541 typedef typename _tmp_vt::pointer pointer;
543 destructive_two_d_iterator(typename containertype::iterator begin,
544 typename containertype::iterator end,
545 typename containertype::iterator curr)
546 : row_begin(begin), row_end(end), row_current(curr), col_current() {
548 col_current = curr->TWOD_BEGIN_();
549 advance_past_end(); // in case cur->begin() == cur->end()
552 destructive_two_d_iterator(typename containertype::iterator begin,
553 typename containertype::iterator end,
554 typename containertype::iterator curr,
555 typename containertype::value_type::TWOD_ITER_ col)
556 : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
557 advance_past_end(); // in case cur->begin() == cur->end()
559 destructive_two_d_iterator()
560 : row_begin(), row_end(), row_current(), col_current() {
563 typename containertype::iterator row_begin, row_end, row_current;
564 typename containertype::value_type::TWOD_ITER_ col_current;
566 // This is the part that destroys
567 void advance_past_end() { // used when col_current points to end()
568 while ( col_current == row_current->TWOD_END_() ) { // end of current row
569 row_current->clear(); // the destructive part
570 // It would be nice if we could decrement sparsetable->num_buckets here
571 ++row_current; // go to beginning of next
572 if ( row_current != row_end ) // col is irrelevant at end
573 col_current = row_current->TWOD_BEGIN_();
575 break; // don't go past row_end
579 // EVERYTHING FROM HERE DOWN IS THE SAME AS THE REGULAR ITERATOR
580 reference operator*() const { return *col_current; }
581 pointer operator->() const { return &(operator*()); }
583 iterator& operator++() {
584 assert(row_current != row_end); // how to ++ from there?
586 advance_past_end(); // in case col_current is at end()
589 iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
591 bool operator==(const iterator& it) const {
592 return ( row_begin == it.row_begin &&
593 row_end == it.row_end &&
594 row_current == it.row_current &&
595 (row_current == row_end || col_current == it.col_current) );
597 bool operator!=(const iterator& it) const { return !(*this == it); }
603 #undef TWOD_CONST_ITER_
610 // The idea is that a table with (logically) t buckets is divided
611 // into t/M *groups* of M buckets each. (M is a constant set in
612 // GROUP_SIZE for efficiency.) Each group is stored sparsely.
613 // Thus, inserting into the table causes some array to grow, which is
614 // slow but still constant time. Lookup involves doing a
615 // logical-position-to-sparse-position lookup, which is also slow but
616 // constant time. The larger M is, the slower these operations are
617 // but the less overhead (slightly).
619 // To store the sparse array, we store a bitmap B, where B[i] = 1 iff
620 // bucket i is non-empty. Then to look up bucket i we really look up
621 // array[# of 1s before i in B]. This is constant time for fixed M.
623 // Terminology: the position of an item in the overall table (from
624 // 1 .. t) is called its "location." The logical position in a group
625 // (from 1 .. M ) is called its "position." The actual location in
626 // the array (from 1 .. # of non-empty buckets in the group) is
627 // called its "offset."
629 // The weird mod in the offset is entirely to quiet compiler warnings
630 #define PUT_(take_from, offset) \
631 if ( putc(((take_from) >> ((offset) % (sizeof(take_from)*8))) % 256, fp) \
635 #define GET_(add_to, offset) \
636 if ((x=getc(fp)) == EOF) \
639 add_to |= (static_cast<size_type>(x) << ((offset) % (sizeof(add_to)*8)))
642 template <class T, uint16_t GROUP_SIZE>
646 typedef T value_type;
647 typedef value_type* pointer;
648 typedef const value_type* const_pointer;
649 typedef table_iterator<sparsegroup<T, GROUP_SIZE> > iterator;
650 typedef const_table_iterator<sparsegroup<T, GROUP_SIZE> > const_iterator;
651 typedef table_element_adaptor<sparsegroup<T, GROUP_SIZE> > element_adaptor;
652 typedef value_type &reference;
653 typedef const value_type &const_reference;
654 typedef uint16_t size_type; // max # of buckets
655 typedef int16_t difference_type;
656 typedef STL_NAMESPACE::reverse_iterator<const_iterator> const_reverse_iterator;
657 typedef STL_NAMESPACE::reverse_iterator<iterator> reverse_iterator;
659 // These are our special iterators, that go over non-empty buckets in a
660 // group. These aren't const-only because you can change non-empty bcks.
661 typedef pointer nonempty_iterator;
662 typedef const_pointer const_nonempty_iterator;
663 typedef STL_NAMESPACE::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
664 typedef STL_NAMESPACE::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator;
666 // Iterator functions
667 iterator begin() { return iterator(this, 0); }
668 const_iterator begin() const { return const_iterator(this, 0); }
669 iterator end() { return iterator(this, size()); }
670 const_iterator end() const { return const_iterator(this, size()); }
671 reverse_iterator rbegin() { return reverse_iterator(end()); }
672 const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
673 reverse_iterator rend() { return reverse_iterator(begin()); }
674 const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
676 // We'll have versions for our special non-empty iterator too
677 nonempty_iterator nonempty_begin() { return group; }
678 const_nonempty_iterator nonempty_begin() const { return group; }
679 nonempty_iterator nonempty_end() { return group + num_buckets; }
680 const_nonempty_iterator nonempty_end() const { return group + num_buckets; }
681 reverse_nonempty_iterator nonempty_rbegin() {
682 return reverse_nonempty_iterator(nonempty_end());
684 const_reverse_nonempty_iterator nonempty_rbegin() const {
685 return const_reverse_nonempty_iterator(nonempty_end());
687 reverse_nonempty_iterator nonempty_rend() {
688 return reverse_nonempty_iterator(nonempty_begin());
690 const_reverse_nonempty_iterator nonempty_rend() const {
691 return const_reverse_nonempty_iterator(nonempty_begin());
695 // This gives us the "default" value to return for an empty bucket.
696 // We just use the default constructor on T, the template type
697 const_reference default_value() const {
698 static value_type defaultval = value_type();
704 // We need to do all this bit manipulation, of course. ick
705 static size_type charbit(size_type i) { return i >> 3; }
706 static size_type modbit(size_type i) { return 1 << (i&7); }
707 bool bmtest(size_type i) const { return bitmap[charbit(i)] & modbit(i); }
708 void bmset(size_type i) { bitmap[charbit(i)] |= modbit(i); }
709 void bmclear(size_type i) { bitmap[charbit(i)] &= ~modbit(i); }
711 // malloc / realloc that dies if allocation fails
712 void* realloc_or_die(void* ptr, size_t num_bytes) {
713 void* retval = realloc(ptr, num_bytes);
714 if (retval == NULL) {
715 fprintf(stderr, "FATAL ERROR: failed to allocate %d bytes for ptr %p",
722 void* malloc_or_die(size_t num_bytes) {
723 return realloc_or_die(NULL, num_bytes);
726 public: // get_iter() in sparsetable needs it
727 // We need a small function that tells us how many set bits there are
728 // in positions 0..i-1 of the bitmap. It uses a big table.
729 // We make it static so templates don't allocate lots of these tables
730 static size_type pos_to_offset(const unsigned char *bm, size_type pos) {
731 // We could make these ints. The tradeoff is size (eg does it overwhelm
732 // the cache?) vs efficiency in referencing sub-word-sized array elements
733 static const char bits_in[256] = { // # of bits set in one char
734 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
735 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
736 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
737 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
738 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
739 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
740 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
741 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
742 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
743 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
744 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
745 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
746 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
747 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
748 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
749 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
751 size_type retval = 0;
753 // [Note: condition pos > 8 is an optimization; convince yourself we
754 // give exactly the same result as if we had pos >= 8 here instead.]
755 for ( ; pos > 8; pos -= 8 ) // bm[0..pos/8-1]
756 retval += bits_in[*bm++]; // chars we want *all* bits in
757 return retval + bits_in[*bm & ((1 << pos)-1)]; // the char that includes pos
760 size_type pos_to_offset(size_type pos) const { // not static but still const
761 return pos_to_offset(bitmap, pos);
766 // Constructors -- default and copy -- and destructor
767 sparsegroup() : group(0), num_buckets(0) { memset(bitmap, 0, sizeof(bitmap)); }
768 sparsegroup(const sparsegroup& x) : group(0), num_buckets(x.num_buckets) {
770 group = (value_type *)malloc_or_die(x.num_buckets * sizeof(*group));
771 memcpy(group, x.group, x.num_buckets * sizeof(*group));
773 memcpy(bitmap, x.bitmap, sizeof(bitmap));
775 ~sparsegroup() { free(group); } // free(NULL) does the right thing
777 // Operator= is just like the copy constructor, I guess
778 sparsegroup &operator=(const sparsegroup& x) {
779 if ( &x == this ) return *this; // x = x
780 if ( x.num_buckets == 0 ) {
784 group = (value_type *)
785 realloc_or_die(group, x.num_buckets * sizeof(*group));
786 memcpy(group, x.group, x.num_buckets * sizeof(*group));
788 memcpy(bitmap, x.bitmap, sizeof(bitmap));
789 num_buckets = x.num_buckets;
793 // Many STL algorithms use swap instead of copy constructors
794 void swap(sparsegroup& x) {
795 STL_NAMESPACE::swap(group, x.group);
796 for ( int i = 0; i < sizeof(bitmap) / sizeof(*bitmap); ++i )
797 STL_NAMESPACE::swap(bitmap[i], x.bitmap[i]); // swap not defined on arrays
798 STL_NAMESPACE::swap(num_buckets, x.num_buckets);
801 // It's always nice to be able to clear a table without deallocating it
806 memset(bitmap, 0, sizeof(bitmap));
810 // Functions that tell you about size. Alas, these aren't so useful
811 // because our table is always fixed size.
812 size_type size() const { return GROUP_SIZE; }
813 size_type max_size() const { return GROUP_SIZE; }
814 bool empty() const { return false; }
815 // We also may want to know how many *used* buckets there are
816 size_type num_nonempty() const { return num_buckets; }
819 // get()/set() are explicitly const/non-const. You can use [] if
820 // you want something that can be either (potentially more expensive).
821 const_reference get(size_type i) const {
822 if ( bmtest(i) ) // bucket i is occupied
823 return group[pos_to_offset(bitmap, i)];
825 return default_value(); // return the default reference
828 // TODO(csilvers): make protected + friend
829 reference mutating_get(size_type i) { // fills bucket i before getting
831 set(i, default_value());
832 return group[pos_to_offset(bitmap, i)];
835 // Syntactic sugar. It's easy to return a const reference. To
836 // return a non-const reference, we need to use the assigner adaptor.
837 const_reference operator[](size_type i) const {
841 element_adaptor operator[](size_type i) {
842 return element_adaptor(this, i);
845 // This returns a reference to the inserted item (which is a copy of val)
846 reference set(size_type i, const_reference val) {
847 size_type offset = pos_to_offset(bitmap, i); // where we'll find (or insert)
848 if ( !bmtest(i) ) { // make space at group[offset]
849 // We know the realloc and memmove are safe because we ensure
850 // value_type is a Plain Old Data Type (see below)
851 group = (value_type *)
852 realloc_or_die(group, sizeof(*group) * ++num_buckets);
853 memmove(group + offset+1, group + offset,
854 (num_buckets-1 - offset) * sizeof(*group));
857 // This does the actual inserting. Since we made the array using
858 // malloc, we use "placement new" to just call the constructor.
859 // We really should destruct this later, via group[offset].~value_type(),
860 // every time we free(), but we don't because it's a POD type.
861 new(&group[offset]) value_type(val);
862 return group[offset];
865 // We let you see if a bucket is non-empty without retrieving it
866 bool test(size_type i) const {
869 bool test(iterator pos) const {
870 return bmtest(pos.pos);
873 // This takes the specified elements out of the group. This is
874 // "undefining", rather than "clearing".
875 void erase(size_type i) {
876 if ( bmtest(i) ) { // trivial to erase empty bucket
877 size_type offset = pos_to_offset(bitmap,i); // where we'll find (or insert)
878 if ( --num_buckets == 0 ) {
882 memmove(group + offset, group + offset+1,
883 (num_buckets - offset) * sizeof(*group));
884 group = (value_type *)
885 realloc_or_die(group, sizeof(*group) * num_buckets);
891 void erase(iterator pos) {
895 void erase(iterator start, iterator end) {
896 // This could be more efficient, but to do so we'd need to make
897 // bmclear() clear a range of indices. Doesn't seem worth it.
898 for ( ; start != end; ++start )
904 // We support reading and writing groups to disk. We don't store
905 // the actual array contents (which we don't know how to store),
906 // just the bitmap and size. Meant to be used with table I/O.
907 // Returns true if all was ok
908 bool write_metadata(FILE *fp) const {
909 assert(sizeof(num_buckets) == 2); // we explicitly set to u_int16_t
910 PUT_(num_buckets, 8);
911 PUT_(num_buckets, 0);
912 if ( !fwrite(bitmap, sizeof(bitmap), 1, fp) ) return false;
916 // Reading destroys the old group contents! Returns true if all was ok
917 bool read_metadata(FILE *fp) {
920 int x; // the GET_ macro requires an 'int x' to be defined
921 GET_(num_buckets, 8);
922 GET_(num_buckets, 0);
924 if ( !fread(bitmap, sizeof(bitmap), 1, fp) ) return false;
926 // We'll allocate the space; you'd better fill it somehow!
927 group = (value_type *)malloc(num_buckets * sizeof(*group));
928 if ( group == NULL ) return false;
932 // If your keys and values are simple enough, we can write them
933 // to disk for you. "simple enough" means no pointers.
934 // However, we don't try to normalize endianness
935 bool write_nopointer_data(FILE *fp) const {
936 for ( const_nonempty_iterator it = nonempty_begin();
937 it != nonempty_end(); ++it ) {
938 if ( !fwrite(&*it, sizeof(*it), 1, fp) ) return false;
943 // When reading, we have to override the potential const-ness of *it
944 bool read_nopointer_data(FILE *fp) {
945 for ( nonempty_iterator it = nonempty_begin();
946 it != nonempty_end(); ++it ) {
947 if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
953 // Comparisons. Note the comparisons are pretty arbitrary: we
954 // compare values of the first index that isn't equal (using default
955 // value for empty buckets).
956 bool operator==(const sparsegroup& x) const {
957 return ( num_buckets == x.num_buckets &&
958 memcmp(bitmap, x.bitmap, sizeof(bitmap)) == 0 &&
959 STL_NAMESPACE::equal(begin(), end(), x.begin()) ); // from algorithm
961 bool operator<(const sparsegroup& x) const { // also from algorithm
962 return STL_NAMESPACE::lexicographical_compare(begin(), end(),
965 bool operator!=(const sparsegroup& x) const { return !(*this == x); }
966 bool operator<=(const sparsegroup& x) const { return *this < x || *this == x; }
967 bool operator>(const sparsegroup& x) const { return x <= *this; }
968 bool operator>=(const sparsegroup& x) const { return x < *this; }
971 // We need to enforce that our value_type is a Plain Old Data Type
972 // (so we know realloc and memmove will work). We use traits to
973 // enforce this. The following gives a compile-time error if
974 // is_POD_type is false (which is the default for user types).
976 // IF YOU GET AN ERROR HERE, make sure your class is a POD type,
977 // and if so tell the compiler via code similar to this:
978 // template<> struct __type_traits<classname> {
979 // typedef __true_type has_trivial_default_constructor;
980 // typedef __true_type has_trivial_copy_constructor;
981 // typedef __true_type has_trivial_assignment_operator;
982 // typedef __true_type has_trivial_destructor;
983 // typedef __true_type is_POD_type;
986 // If this is part of a hash_map, you need to make sure both the
987 // Key and Value types are POD types, if they're user-defined.
988 #ifdef UNDERSTANDS_TYPE_TRAITS
989 static __true_type * const enforce_pod;
993 value_type *group; // (small) array of T's
994 unsigned char bitmap[(GROUP_SIZE-1)/8 + 1]; // fancy math is so we round up
995 size_type num_buckets; // limits GROUP_SIZE to 64K
998 // We need a global swap as well
999 template <class T, uint16_t GROUP_SIZE>
1000 inline void swap(sparsegroup<T,GROUP_SIZE> &x, sparsegroup<T,GROUP_SIZE> &y) {
1004 #ifdef UNDERSTANDS_TYPE_TRAITS
1005 template <class T, uint16_t GROUP_SIZE>
1006 __true_type * const sparsegroup<T, GROUP_SIZE>::enforce_pod =
1007 static_cast<typename __type_traits<value_type>::is_POD_type *>(0);
1010 // ---------------------------------------------------------------------------
1013 template <class T, uint16_t GROUP_SIZE = DEFAULT_SPARSEGROUP_SIZE>
1017 typedef T value_type; // stolen from stl_vector.h
1018 typedef value_type* pointer;
1019 typedef const value_type* const_pointer;
1020 typedef table_iterator<sparsetable<T, GROUP_SIZE> > iterator;
1021 typedef const_table_iterator<sparsetable<T, GROUP_SIZE> > const_iterator;
1022 typedef table_element_adaptor<sparsetable<T, GROUP_SIZE> > element_adaptor;
1023 typedef value_type &reference;
1024 typedef const value_type &const_reference;
1025 typedef size_t size_type;
1026 typedef ptrdiff_t difference_type;
1027 typedef STL_NAMESPACE::reverse_iterator<const_iterator> const_reverse_iterator;
1028 typedef STL_NAMESPACE::reverse_iterator<iterator> reverse_iterator;
1030 // These are our special iterators, that go over non-empty buckets in a
1031 // table. These aren't const only because you can change non-empty bcks.
1032 typedef two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
1034 typedef const_two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
1035 const_nonempty_iterator;
1036 typedef STL_NAMESPACE::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
1037 typedef STL_NAMESPACE::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator;
1038 // Another special iterator: it frees memory as it iterates (used to resize)
1039 typedef destructive_two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
1040 destructive_iterator;
1042 // Iterator functions
1043 iterator begin() { return iterator(this, 0); }
1044 const_iterator begin() const { return const_iterator(this, 0); }
1045 iterator end() { return iterator(this, size()); }
1046 const_iterator end() const { return const_iterator(this, size()); }
1047 reverse_iterator rbegin() { return reverse_iterator(end()); }
1048 const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
1049 reverse_iterator rend() { return reverse_iterator(begin()); }
1050 const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
1052 // Versions for our special non-empty iterator
1053 nonempty_iterator nonempty_begin() {
1054 return nonempty_iterator(groups.begin(), groups.end(), groups.begin());
1056 const_nonempty_iterator nonempty_begin() const {
1057 return const_nonempty_iterator(groups.begin(),groups.end(), groups.begin());
1059 nonempty_iterator nonempty_end() {
1060 return nonempty_iterator(groups.begin(), groups.end(), groups.end());
1062 const_nonempty_iterator nonempty_end() const {
1063 return const_nonempty_iterator(groups.begin(), groups.end(), groups.end());
1065 reverse_nonempty_iterator nonempty_rbegin() {
1066 return reverse_nonempty_iterator(nonempty_end());
1068 const_reverse_nonempty_iterator nonempty_rbegin() const {
1069 return const_reverse_nonempty_iterator(nonempty_end());
1071 reverse_nonempty_iterator nonempty_rend() {
1072 return reverse_nonempty_iterator(nonempty_begin());
1074 const_reverse_nonempty_iterator nonempty_rend() const {
1075 return const_reverse_nonempty_iterator(nonempty_begin());
1077 destructive_iterator destructive_begin() {
1078 return destructive_iterator(groups.begin(), groups.end(), groups.begin());
1080 destructive_iterator destructive_end() {
1081 return destructive_iterator(groups.begin(), groups.end(), groups.end());
1085 typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::reference
1088 vector< sparsegroup<value_type, GROUP_SIZE> >::const_reference
1089 GroupsConstReference;
1090 typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::iterator
1092 typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::const_iterator
1093 GroupsConstIterator;
1095 // How to deal with the proper group
1096 static size_type num_groups(size_type num) { // how many to hold num buckets
1097 return num == 0 ? 0 : ((num-1) / GROUP_SIZE) + 1;
1100 size_type pos_in_group(size_type i) const { return i % GROUP_SIZE; }
1101 size_type group_num(size_type i) const { return i / GROUP_SIZE; }
1102 GroupsReference which_group(size_type i) { return groups[group_num(i)]; }
1103 GroupsConstReference which_group(size_type i) const
1104 { return groups[group_num(i)]; }
1108 // Constructors -- default, normal (when you specify size), and copy
1109 sparsetable(size_type size = 0)
1110 : groups(num_groups(size)), table_size(size), num_buckets(0) { }
1111 // We'll can get away with using the default copy constructor,
1112 // and default destructor, and hence the default operator=. Huzzah!
1114 // Many STL algorithms use swap instead of copy constructors
1115 void swap(sparsetable& x) {
1116 STL_NAMESPACE::swap(groups, x.groups);
1117 STL_NAMESPACE::swap(table_size, x.table_size);
1118 STL_NAMESPACE::swap(num_buckets, x.num_buckets);
1121 // It's always nice to be able to clear a table without deallocating it
1123 GroupsIterator group;
1124 for ( group = groups.begin(); group != groups.end(); ++group ) {
1130 // Functions that tell you about size.
1131 // NOTE: empty() is non-intuitive! It does not tell you the number
1132 // of not-empty buckets (use num_nonempty() for that). Instead
1133 // it says whether you've allocated any buckets or not.
1134 size_type size() const { return table_size; }
1135 size_type max_size() const { return size_type(-1); }
1136 bool empty() const { return table_size == 0; }
1137 // We also may want to know how many *used* buckets there are
1138 size_type num_nonempty() const { return num_buckets; }
1140 // OK, we'll let you resize one of these puppies
1141 void resize(size_type new_size) {
1142 groups.resize(num_groups(new_size));
1143 if ( new_size < table_size) { // lower num_buckets, clear last group
1144 if ( new_size % GROUP_SIZE > 0 ) // need to clear from last group
1145 groups.back().erase(groups.back().begin() + (new_size % GROUP_SIZE),
1146 groups.back().end());
1147 num_buckets = 0; // refigure # of used buckets
1148 GroupsConstIterator group;
1149 for ( group = groups.begin(); group != groups.end(); ++group )
1150 num_buckets += group->num_nonempty();
1152 table_size = new_size;
1156 // We let you see if a bucket is non-empty without retrieving it
1157 bool test(size_type i) const {
1158 return which_group(i).test(pos_in_group(i));
1160 bool test(iterator pos) const {
1161 return which_group(pos.pos).test(pos_in_group(pos.pos));
1163 bool test(const_iterator pos) const {
1164 return which_group(pos.pos).test(pos_in_group(pos.pos));
1167 // We only return const_references because it's really hard to
1168 // return something settable for empty buckets. Use set() instead.
1169 const_reference get(size_type i) const {
1170 assert(i < table_size);
1171 return which_group(i).get(pos_in_group(i));
1174 // TODO(csilvers): make protected + friend element_adaptor
1175 reference mutating_get(size_type i) { // fills bucket i before getting
1176 assert(i < table_size);
1177 size_type old_numbuckets = which_group(i).num_nonempty();
1178 reference retval = which_group(i).mutating_get(pos_in_group(i));
1179 num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1183 // Syntactic sugar. As in sparsegroup, the non-const version is harder
1184 const_reference operator[](size_type i) const {
1188 element_adaptor operator[](size_type i) {
1189 return element_adaptor(this, i);
1192 // Needed for hashtables, gets as a nonempty_iterator. Crashes for empty bcks
1193 const_nonempty_iterator get_iter(size_type i) const {
1194 assert(test(i)); // how can a nonempty_iterator point to an empty bucket?
1195 return const_nonempty_iterator(
1196 groups.begin(), groups.end(),
1197 groups.begin() + group_num(i),
1198 (groups[group_num(i)].nonempty_begin() +
1199 groups[group_num(i)].pos_to_offset(pos_in_group(i))));
1201 // For nonempty we can return a non-const version
1202 nonempty_iterator get_iter(size_type i) {
1203 assert(test(i)); // how can a nonempty_iterator point to an empty bucket?
1204 return nonempty_iterator(
1205 groups.begin(), groups.end(),
1206 groups.begin() + group_num(i),
1207 (groups[group_num(i)].nonempty_begin() +
1208 groups[group_num(i)].pos_to_offset(pos_in_group(i))));
1212 // This returns a reference to the inserted item (which is a copy of val)
1213 // The trick is to figure out whether we're replacing or inserting anew
1214 reference set(size_type i, const_reference val) {
1215 assert(i < table_size);
1216 size_type old_numbuckets = which_group(i).num_nonempty();
1217 reference retval = which_group(i).set(pos_in_group(i), val);
1218 num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1222 // This takes the specified elements out of the table. This is
1223 // "undefining", rather than "clearing".
1224 void erase(size_type i) {
1225 assert(i < table_size);
1226 size_type old_numbuckets = which_group(i).num_nonempty();
1227 which_group(i).erase(pos_in_group(i));
1228 num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1231 void erase(iterator pos) {
1235 void erase(iterator start, iterator end) {
1236 // This could be more efficient, but then we'd need to figure
1237 // out if we spanned groups or not. Doesn't seem worth it.
1238 for ( ; start != end; ++start )
1243 // We support reading and writing tables to disk. We don't store
1244 // the actual array contents (which we don't know how to store),
1245 // just the groups and sizes. Returns true if all went ok.
1248 // Every time the disk format changes, this should probably change too
1249 static const unsigned long MAGIC_NUMBER = 0x24687531;
1251 // Old versions of this code write all data in 32 bits. We need to
1252 // support these files as well as having support for 64-bit systems.
1253 // So we use the following encoding scheme: for values < 2^32-1, we
1254 // store in 4 bytes in big-endian order. For values > 2^32, we
1255 // store 0xFFFFFFF followed by 8 bytes in big-endian order. This
1256 // causes us to mis-read old-version code that stores exactly
1257 // 0xFFFFFFF, but I don't think that is likely to have happened for
1258 // these particular values.
1259 static bool write_32_or_64(FILE* fp, size_type value) {
1260 if ( value < 0xFFFFFFFFULL ) { // fits in 4 bytes
1265 } else if ( value == 0xFFFFFFFFUL ) { // special case in 32bit systems
1266 PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); // marker
1267 PUT_(0, 0); PUT_(0, 0); PUT_(0, 0); PUT_(0, 0);
1268 PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0);
1270 PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); // marker
1283 static bool read_32_or_64(FILE* fp, size_type *value) { // reads into value
1284 size_type first4 = 0;
1290 if ( first4 < 0xFFFFFFFFULL ) {
1306 bool write_metadata(FILE *fp) const {
1307 if ( !write_32_or_64(fp, MAGIC_NUMBER) ) return false;
1308 if ( !write_32_or_64(fp, table_size) ) return false;
1309 if ( !write_32_or_64(fp, num_buckets) ) return false;
1311 GroupsConstIterator group;
1312 for ( group = groups.begin(); group != groups.end(); ++group )
1313 if ( group->write_metadata(fp) == false ) return false;
1317 // Reading destroys the old table contents! Returns true if read ok.
1318 bool read_metadata(FILE *fp) {
1319 size_type magic_read = 0;
1320 if ( !read_32_or_64(fp, &magic_read) ) return false;
1321 if ( magic_read != MAGIC_NUMBER ) {
1322 clear(); // just to be consistent
1326 if ( !read_32_or_64(fp, &table_size) ) return false;
1327 if ( !read_32_or_64(fp, &num_buckets) ) return false;
1329 resize(table_size); // so the vector's sized ok
1330 GroupsIterator group;
1331 for ( group = groups.begin(); group != groups.end(); ++group )
1332 if ( group->read_metadata(fp) == false ) return false;
1336 // This code is identical to that for SparseGroup
1337 // If your keys and values are simple enough, we can write them
1338 // to disk for you. "simple enough" means no pointers.
1339 // However, we don't try to normalize endianness
1340 bool write_nopointer_data(FILE *fp) const {
1341 for ( const_nonempty_iterator it = nonempty_begin();
1342 it != nonempty_end(); ++it ) {
1343 if ( !fwrite(&*it, sizeof(*it), 1, fp) ) return false;
1348 // When reading, we have to override the potential const-ness of *it
1349 bool read_nopointer_data(FILE *fp) {
1350 for ( nonempty_iterator it = nonempty_begin();
1351 it != nonempty_end(); ++it ) {
1352 if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
1358 // Comparisons. Note the comparisons are pretty arbitrary: we
1359 // compare values of the first index that isn't equal (using default
1360 // value for empty buckets).
1361 bool operator==(const sparsetable& x) const {
1362 return ( table_size == x.table_size &&
1363 num_buckets == x.num_buckets &&
1364 groups == x.groups );
1366 bool operator<(const sparsetable& x) const { // also from algobase.h
1367 return STL_NAMESPACE::lexicographical_compare(begin(), end(),
1368 x.begin(), x.end());
1370 bool operator!=(const sparsetable& x) const { return !(*this == x); }
1371 bool operator<=(const sparsetable& x) const { return *this < x || *this == x; }
1372 bool operator>(const sparsetable& x) const { return x <= *this; }
1373 bool operator>=(const sparsetable& x) const { return x < *this; }
1378 vector< sparsegroup<value_type, GROUP_SIZE> > groups; // our list of groups
1379 size_type table_size; // how many buckets they want
1380 size_type num_buckets; // number of non-empty buckets
1383 // We need a global swap as well
1384 template <class T, uint16_t GROUP_SIZE>
1385 inline void swap(sparsetable<T,GROUP_SIZE> &x, sparsetable<T,GROUP_SIZE> &y) {
1392 _END_GOOGLE_NAMESPACE_