Source/External/google/sparsetable

   1 // Copyright (c) 2005, Google Inc.
   2 // All rights reserved.
   3 //
   4 // Redistribution and use in source and binary forms, with or without
   5 // modification, are permitted provided that the following conditions are
   6 // met:
   7 //
   8 //     * Redistributions of source code must retain the above copyright
   9 // notice, this list of conditions and the following disclaimer.
  10 //     * Redistributions in binary form must reproduce the above
  11 // copyright notice, this list of conditions and the following disclaimer
  12 // in the documentation and/or other materials provided with the
  13 // distribution.
  14 //     * Neither the name of Google Inc. nor the names of its
  15 // contributors may be used to endorse or promote products derived from
  16 // this software without specific prior written permission.
  17 //
  18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30 // ---
  31 // Author: Craig Silverstein
  32 //
  33 // A sparsetable is a random container that implements a sparse array,
  34 // that is, an array that uses very little memory to store unassigned
  35 // indices (in this case, between 1-2 bits per unassigned index).  For
  36 // instance, if you allocate an array of size 5 and assign a[2] = <big
  37 // struct>, then a[2] will take up a lot of memory but a[0], a[1],
  38 // a[3], and a[4] will not.  Array elements that have a value are
  39 // called "assigned".  Array elements that have no value yet, or have
  40 // had their value cleared using erase() or clear(), are called
  41 // "unassigned".
  42 //
  43 // Unassigned values seem to have the default value of T (see below).
  44 // Nevertheless, there is a difference between an unassigned index and
  45 // one explicitly assigned the value of T().  The latter is considered
  46 // assigned.
  47 //
  48 // Access to an array element is constant time, as is insertion and
  49 // deletion.  Insertion and deletion may be fairly slow, however:
  50 // because of this container's memory economy, each insert and delete
  51 // causes a memory reallocation.
  52 //
  53 // See /usr/(local/)?doc/sparsehash-0.1/sparsetable.html
  54 // for information about how to use this class.
  55
  56 #ifndef _SPARSETABLE_H_
  57 #define _SPARSETABLE_H_
  58
  59 // !!! DR changed some <google/...> to "..." due to include path problems...
  60 #include "sparsehash/google_config.h"
  61 #include <stdlib.h>             // for malloc/free
  62 #include <stdio.h>              // to read/write tables
  63 #if defined HAVE_STDINT_H
  64 #include <stdint.h>             // to get uint16_t (ISO naming madness)
  65 #elif defined HAVE_INTTYPES_H
  66 #include <inttypes.h>           // another place uint16_t might be defined
  67 #else
  68 #include <sys/types.h>          // our last best hope
  69 #endif
  70 #include <assert.h>             // for bounds checking
  71 #include <iterator>             // to define reverse_iterator<> for me
  72 #include <vector>               // a sparsetable is a vector of groups
  73
  74 #if STDC_HEADERS
  75 #include <string.h>             // for memcpy and mmemmove
  76 #else
  77 #if !HAVE_MEMCPY
  78 #define memcpy(d, s, n)   bcopy ((s), (d), (n))
  79 #define memmove(d, s, n)  bcopy ((s), (d), (n))
  80 #endif
  81 #endif
  82
  83 #if !defined HAVE_U_INT16_T && defined HAVE___UINT16
  84 typedef __uint16 u_int16_t        // true on vc++7
  85 #endif
  86
  87 _START_GOOGLE_NAMESPACE_
  88
  89 using STL_NAMESPACE::vector;
  90
  91 // The smaller this is, the faster lookup is (because the group bitmap is
  92 // smaller) and the faster insert is, because there's less to memmove.
  93 // On the other hand, there are more groups.  Since group::size_type is
  94 // a short, this number should be of the form 32*x + 16 to avoid waste.
  95 static const uint16_t DEFAULT_SPARSEGROUP_SIZE = 48;   // fits in 1.5 words
  96
  97
  98 // A NOTE ON ASSIGNING:
  99 // A sparse table does not actually allocate memory for entries
 100 // that are not filled.  Because of this, it becomes complicated
 101 // to have a non-const iterator: we don't know, if the iterator points
 102 // to a not-filled bucket, whether you plan to fill it with something
 103 // or whether you plan to read its value (in which case you'll get
 104 // the default bucket value).  Therefore, while we can define const
 105 // operations in a pretty 'normal' way, for non-const operations, we
 106 // define something that returns a helper object with operator= and
 107 // operator& that allocate a bucket lazily.  We use this for table[]
 108 // and also for regular table iterators.
 109
 110 template <class tabletype>
 111 class table_element_adaptor {
 112  public:
 113   typedef typename tabletype::value_type value_type;
 114   typedef typename tabletype::size_type size_type;
 115   typedef typename tabletype::reference reference;
 116   typedef typename tabletype::pointer pointer;
 117
 118   table_element_adaptor(tabletype *tbl, size_type p)
 119     : table(tbl), pos(p) { }
 120   table_element_adaptor& operator= (value_type val) {
 121     table->set(pos, val);
 122     return *this;
 123   }
 124   operator value_type() { return table->get(pos); }   // we look like a value
 125   pointer operator& () { return &table->mutating_get(pos); }
 126
 127  private:
 128   tabletype* table;
 129   size_type pos;
 130 };
 131
 132 // Our iterator as simple as iterators can be: basically it's just
 133 // the index into our table.  Dereference, the only complicated
 134 // thing, we punt to the table class.  This just goes to show how
 135 // much machinery STL requires to do even the most trivial tasks.
 136 //
 137 // By templatizing over tabletype, we have one iterator type which
 138 // we can use for both sparsetables and sparsebins.  In fact it
 139 // works on any class that allows size() and operator[] (eg vector),
 140 // as long as it does the standard STL typedefs too (eg value_type).
 141
 142 template <class tabletype>
 143 class table_iterator {
 144  public:
 145   typedef table_iterator iterator;
 146
 147 #ifdef UNDERSTANDS_ITERATOR_TAGS
 148   typedef STL_NAMESPACE::random_access_iterator_tag iterator_category;
 149 #endif
 150   typedef typename tabletype::value_type value_type;
 151   typedef typename tabletype::difference_type difference_type;
 152   typedef typename tabletype::size_type size_type;
 153   typedef table_element_adaptor<tabletype> reference;
 154   typedef table_element_adaptor<tabletype>* pointer;
 155
 156   // The "real" constructor
 157   table_iterator(tabletype *tbl, size_type p)
 158     : table(tbl), pos(p) { }
 159   // The default constructor, used when I define vars of type table::iterator
 160   table_iterator() : table(NULL), pos(0) { }
 161   // The copy constructor, for when I say table::iterator foo = tbl.begin()
 162   // The default destructor is fine; we don't define one
 163   // The default operator= is fine; we don't define one
 164
 165   // The main thing our iterator does is dereference.  If the table entry
 166   // we point to is empty, we return the default value type.
 167   // This is the big different function from the const iterator.
 168   reference operator*()              {
 169     return table_element_adaptor<tabletype>(table, pos);
 170   }
 171   pointer operator->()               { return &(operator*()); }
 172
 173   // Helper function to assert things are ok; eg pos is still in range
 174   void check() const {
 175     assert(table);
 176     assert(pos <= table->size());
 177   }
 178
 179   // Arithmetic: we just do arithmetic on pos.  We don't even need to
 180   // do bounds checking, since STL doesn't consider that it's job.  :-)
 181   iterator& operator+=(size_type t) { pos += t; check(); return *this; }
 182   iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
 183   iterator& operator++()            { ++pos; check(); return *this; }
 184   iterator& operator--()            { --pos; check(); return *this; }
 185   iterator operator++(int)          { iterator tmp(*this);     // for x++
 186                                       ++pos; check(); return tmp; }
 187   iterator operator--(int)          { iterator tmp(*this);     // for x--
 188                                       --pos; check(); return tmp; }
 189   iterator operator+(difference_type i) const  { iterator tmp(*this);
 190                                                  tmp += i; return tmp; }
 191   iterator operator-(difference_type i) const  { iterator tmp(*this);
 192                                                  tmp -= i; return tmp; }
 193   difference_type operator-(iterator it) const {      // for "x = it2 - it"
 194     assert(table == it.table);
 195     return pos - it.pos;
 196   }
 197   reference operator[](difference_type n) const {
 198     return *(*this + n);            // simple though not totally efficient
 199   }
 200
 201   // Comparisons.
 202   bool operator==(const iterator& it) const {
 203     return table == it.table && pos == it.pos;
 204   }
 205   bool operator<(const iterator& it) const {
 206     assert(table == it.table);              // life is bad bad bad otherwise
 207     return pos < it.pos;
 208   }
 209   bool operator<=(const iterator& it) const {
 210     assert(table == it.table);
 211     return pos <= it.pos;
 212   }
 213   bool operator!=(const iterator& it) const { return !(*this == it); }
 214   bool operator>(const iterator& it) const { return it <= *this; }
 215   bool operator>=(const iterator& it) const { return it < *this; }
 216
 217   // Here's the info we actually need to be an iterator
 218   tabletype *table;              // so we can dereference and bounds-check
 219   size_type pos;                 // index into the table
 220 };
 221
 222 // support for "3 + iterator" has to be defined outside the class, alas
 223 template<class T>
 224 table_iterator<T> operator+(typename table_iterator<T>::difference_type i,
 225                             table_iterator<T> it) {
 226   return it + i;               // so people can say it2 = 3 + it
 227 }
 228
 229 template <class tabletype>
 230 class const_table_iterator {
 231  public:
 232   typedef table_iterator<tabletype> iterator;
 233   typedef const_table_iterator const_iterator;
 234
 235 #ifdef UNDERSTANDS_ITERATOR_TAGS
 236   typedef STL_NAMESPACE::random_access_iterator_tag iterator_category;
 237 #endif
 238   typedef typename tabletype::value_type value_type;
 239   typedef typename tabletype::difference_type difference_type;
 240   typedef typename tabletype::size_type size_type;
 241   typedef typename tabletype::const_reference reference;  // we're const-only
 242   typedef typename tabletype::const_pointer pointer;
 243
 244   // The "real" constructor
 245   const_table_iterator(const tabletype *tbl, size_type p)
 246     : table(tbl), pos(p) { }
 247   // The default constructor, used when I define vars of type table::iterator
 248   const_table_iterator() : table(NULL), pos(0) { }
 249   // The copy constructor, for when I say table::iterator foo = tbl.begin()
 250   // Also converts normal iterators to const iterators
 251   const_table_iterator(const iterator &from)
 252     : table(from.table), pos(from.pos) { }
 253   // The default destructor is fine; we don't define one
 254   // The default operator= is fine; we don't define one
 255
 256   // The main thing our iterator does is dereference.  If the table entry
 257   // we point to is empty, we return the default value type.
 258   reference operator*() const       { return (*table)[pos]; }
 259   pointer operator->() const        { return &(operator*()); }
 260
 261   // Helper function to assert things are ok; eg pos is still in range
 262   void check() const {
 263     assert(table);
 264     assert(pos <= table->size());
 265   }
 266
 267   // Arithmetic: we just do arithmetic on pos.  We don't even need to
 268   // do bounds checking, since STL doesn't consider that it's job.  :-)
 269   const_iterator& operator+=(size_type t) { pos += t; check(); return *this; }
 270   const_iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
 271   const_iterator& operator++()            { ++pos; check(); return *this; }
 272   const_iterator& operator--()            { --pos; check(); return *this; }
 273   const_iterator operator++(int)          { const_iterator tmp(*this); // for x++
 274                                             ++pos; check(); return tmp; }
 275   const_iterator operator--(int)          { const_iterator tmp(*this); // for x--
 276                                             --pos; check(); return tmp; }
 277   const_iterator operator+(difference_type i) const  { const_iterator tmp(*this);
 278                                                        tmp += i; return tmp; }
 279   const_iterator operator-(difference_type i) const  { const_iterator tmp(*this);
 280                                                        tmp -= i; return tmp; }
 281   difference_type operator-(const_iterator it) const {   // for "x = it2 - it"
 282     assert(table == it.table);
 283     return pos - it.pos;
 284   }
 285   reference operator[](difference_type n) const {
 286     return *(*this + n);            // simple though not totally efficient
 287   }
 288
 289   // Comparisons.
 290   bool operator==(const const_iterator& it) const {
 291     return table == it.table && pos == it.pos;
 292   }
 293   bool operator<(const const_iterator& it) const {
 294     assert(table == it.table);              // life is bad bad bad otherwise
 295     return pos < it.pos;
 296   }
 297   bool operator<=(const const_iterator& it) const {
 298     assert(table == it.table);
 299     return pos <= it.pos;
 300   }
 301   bool operator!=(const const_iterator& it) const { return !(*this == it); }
 302   bool operator>(const const_iterator& it) const { return it <= *this; }
 303   bool operator>=(const const_iterator& it) const { return it < *this; }
 304
 305   // Here's the info we actually need to be an iterator
 306   const tabletype *table;        // so we can dereference and bounds-check
 307   size_type pos;                 // index into the table
 308 };
 309
 310 // support for "3 + iterator" has to be defined outside the class, alas
 311 template<class T>
 312 const_table_iterator<T> operator+(typename
 313                                   const_table_iterator<T>::difference_type i,
 314                                   const_table_iterator<T> it) {
 315   return it + i;               // so people can say it2 = 3 + it
 316 }
 317
 318
 319 // ---------------------------------------------------------------------------
 320
 321
 322 /*
 323 // This is a 2-D iterator.  You specify a begin and end over a list
 324 // of *containers*.  We iterate over each container by iterating over
 325 // it.  It's actually simple:
 326 // VECTOR.begin() VECTOR[0].begin()  --------> VECTOR[0].end() ---\
 327 //     |          ________________________________________________/
 328 //     |          \_> VECTOR[1].begin()  -------->  VECTOR[1].end() -\
 329 //     |          ___________________________________________________/
 330 //     v          \_> ......
 331 // VECTOR.end()
 332 //
 333 // It's impossible to do random access on one of these things in constant
 334 // time, so it's just a bidirectional iterator.
 335 //
 336 // Unfortunately, because we need to use this for a non-empty iterator,
 337 // we use nonempty_begin() and nonempty_end() instead of begin() and end()
 338 // (though only going across, not down).
 339 */
 340
 341 #define TWOD_BEGIN_      nonempty_begin
 342 #define TWOD_END_        nonempty_end
 343 #define TWOD_ITER_       nonempty_iterator
 344 #define TWOD_CONST_ITER_ const_nonempty_iterator
 345
 346 template <class containertype>
 347 class two_d_iterator {
 348  public:
 349   typedef two_d_iterator iterator;
 350
 351 #ifdef UNDERSTANDS_ITERATOR_TAGS
 352   typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category;
 353 #endif
 354   // apparently some versions of VC++ have trouble with two ::'s in a typename
 355   typedef typename containertype::value_type _tmp_vt;
 356   typedef typename _tmp_vt::value_type value_type;
 357   typedef typename _tmp_vt::difference_type difference_type;
 358   typedef typename _tmp_vt::reference reference;
 359   typedef typename _tmp_vt::pointer pointer;
 360
 361   // The "real" constructor.  begin and end specify how many rows we have
 362   // (in the diagram above); we always iterate over each row completely.
 363   two_d_iterator(typename containertype::iterator begin,
 364                  typename containertype::iterator end,
 365                  typename containertype::iterator curr)
 366     : row_begin(begin), row_end(end), row_current(curr), col_current() {
 367     if ( row_current != row_end ) {
 368       col_current = row_current->TWOD_BEGIN_();
 369       advance_past_end();                 // in case cur->begin() == cur->end()
 370     }
 371   }
 372   // If you want to start at an arbitrary place, you can, I guess
 373   two_d_iterator(typename containertype::iterator begin,
 374                  typename containertype::iterator end,
 375                  typename containertype::iterator curr,
 376                  typename containertype::value_type::TWOD_ITER_ col)
 377     : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
 378     advance_past_end();                 // in case cur->begin() == cur->end()
 379   }
 380   // The default constructor, used when I define vars of type table::iterator
 381   two_d_iterator() : row_begin(), row_end(), row_current(), col_current() { }
 382   // The default destructor is fine; we don't define one
 383   // The default operator= is fine; we don't define one
 384
 385   // Happy dereferencer
 386   reference operator*() const    { return *col_current; }
 387   pointer operator->() const     { return &(operator*()); }
 388
 389   // Arithmetic: we just do arithmetic on pos.  We don't even need to
 390   // do bounds checking, since STL doesn't consider that it's job.  :-)
 391   // NOTE: this is not amortized constant time!  What do we do about it?
 392   void advance_past_end() {          // used when col_current points to end()
 393     while ( col_current == row_current->TWOD_END_() ) {  // end of current row
 394       ++row_current;                                // go to beginning of next
 395       if ( row_current != row_end )                 // col is irrelevant at end
 396         col_current = row_current->TWOD_BEGIN_();
 397       else
 398         break;                                      // don't go past row_end
 399     }
 400   }
 401
 402   iterator& operator++() {
 403     assert(row_current != row_end);                 // how to ++ from there?
 404     ++col_current;
 405     advance_past_end();                 // in case col_current is at end()
 406     return *this;
 407   }
 408   iterator& operator--() {
 409     while ( row_current == row_end ||
 410             col_current == row_current->TWOD_BEGIN_() ) {
 411       assert(row_current != row_begin);
 412       --row_current;
 413       col_current = row_current->TWOD_END_();             // this is 1 too far
 414     }
 415     --col_current;
 416     return *this;
 417   }
 418   iterator operator++(int)       { iterator tmp(*this); ++*this; return tmp; }
 419   iterator operator--(int)       { iterator tmp(*this); --*this; return tmp; }
 420
 421
 422   // Comparisons.
 423   bool operator==(const iterator& it) const {
 424     return ( row_begin == it.row_begin &&
 425              row_end == it.row_end &&
 426              row_current == it.row_current &&
 427              (row_current == row_end || col_current == it.col_current) );
 428   }
 429   bool operator!=(const iterator& it) const { return !(*this == it); }
 430
 431
 432   // Here's the info we actually need to be an iterator
 433   // These need to be public so we convert from iterator to const_iterator
 434   typename containertype::iterator row_begin, row_end, row_current;
 435   typename containertype::value_type::TWOD_ITER_ col_current;
 436 };
 437
 438 // The same thing again, but this time const.  :-(
 439 template <class containertype>
 440 class const_two_d_iterator {
 441  public:
 442   typedef const_two_d_iterator iterator;
 443
 444 #ifdef UNDERSTANDS_ITERATOR_TAGS
 445   typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category;
 446 #endif
 447   // apparently some versions of VC++ have trouble with two ::'s in a typename
 448   typedef typename containertype::value_type _tmp_vt;
 449   typedef typename _tmp_vt::value_type value_type;
 450   typedef typename _tmp_vt::difference_type difference_type;
 451   typedef typename _tmp_vt::const_reference reference;
 452   typedef typename _tmp_vt::const_pointer pointer;
 453
 454   const_two_d_iterator(typename containertype::const_iterator begin,
 455                        typename containertype::const_iterator end,
 456                        typename containertype::const_iterator curr)
 457     : row_begin(begin), row_end(end), row_current(curr), col_current() {
 458     if ( curr != end ) {
 459       col_current = curr->TWOD_BEGIN_();
 460       advance_past_end();                 // in case cur->begin() == cur->end()
 461     }
 462   }
 463   const_two_d_iterator(typename containertype::const_iterator begin,
 464                        typename containertype::const_iterator end,
 465                        typename containertype::const_iterator curr,
 466                        typename containertype::value_type::TWOD_CONST_ITER_ col)
 467     : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
 468     advance_past_end();                 // in case cur->begin() == cur->end()
 469   }
 470   const_two_d_iterator()
 471     : row_begin(), row_end(), row_current(), col_current() {
 472   }
 473   // Need this explicitly so we can convert normal iterators to const iterators
 474   const_two_d_iterator(const two_d_iterator<containertype>& it) :
 475     row_begin(it.row_begin), row_end(it.row_end), row_current(it.row_current),
 476     col_current(it.col_current) { }
 477
 478   typename containertype::const_iterator row_begin, row_end, row_current;
 479   typename containertype::value_type::TWOD_CONST_ITER_ col_current;
 480
 481
 482   // EVERYTHING FROM HERE DOWN IS THE SAME AS THE NON-CONST ITERATOR
 483   reference operator*() const    { return *col_current; }
 484   pointer operator->() const     { return &(operator*()); }
 485
 486   void advance_past_end() {          // used when col_current points to end()
 487     while ( col_current == row_current->TWOD_END_() ) {  // end of current row
 488       ++row_current;                                // go to beginning of next
 489       if ( row_current != row_end )                 // col is irrelevant at end
 490         col_current = row_current->TWOD_BEGIN_();
 491       else
 492         break;                                      // don't go past row_end
 493     }
 494   }
 495   iterator& operator++() {
 496     assert(row_current != row_end);                 // how to ++ from there?
 497     ++col_current;
 498     advance_past_end();                 // in case col_current is at end()
 499     return *this;
 500   }
 501   iterator& operator--() {
 502     while ( row_current == row_end ||
 503             col_current == row_current->TWOD_BEGIN_() ) {
 504       assert(row_current != row_begin);
 505       --row_current;
 506       col_current = row_current->TWOD_END_();             // this is 1 too far
 507     }
 508     --col_current;
 509     return *this;
 510   }
 511   iterator operator++(int)       { iterator tmp(*this); ++*this; return tmp; }
 512   iterator operator--(int)       { iterator tmp(*this); --*this; return tmp; }
 513
 514   bool operator==(const iterator& it) const {
 515     return ( row_begin == it.row_begin &&
 516              row_end == it.row_end &&
 517              row_current == it.row_current &&
 518              (row_current == row_end || col_current == it.col_current) );
 519   }
 520   bool operator!=(const iterator& it) const { return !(*this == it); }
 521 };
 522
 523 // We provide yet another version, to be as frugal with memory as
 524 // possible.  This one frees each block of memory as it finishes
 525 // iterating over it.  By the end, the entire table is freed.
 526 // For understandable reasons, you can only iterate over it once,
 527 // which is why it's an input iterator
 528 template <class containertype>
 529 class destructive_two_d_iterator {
 530  public:
 531   typedef destructive_two_d_iterator iterator;
 532
 533 #ifdef UNDERSTANDS_ITERATOR_TAGS
 534   typedef STL_NAMESPACE::input_iterator_tag iterator_category;
 535 #endif
 536   // apparently some versions of VC++ have trouble with two ::'s in a typename
 537   typedef typename containertype::value_type _tmp_vt;
 538   typedef typename _tmp_vt::value_type value_type;
 539   typedef typename _tmp_vt::difference_type difference_type;
 540   typedef typename _tmp_vt::reference reference;
 541   typedef typename _tmp_vt::pointer pointer;
 542
 543   destructive_two_d_iterator(typename containertype::iterator begin,
 544                              typename containertype::iterator end,
 545                              typename containertype::iterator curr)
 546     : row_begin(begin), row_end(end), row_current(curr), col_current() {
 547     if ( curr != end ) {
 548       col_current = curr->TWOD_BEGIN_();
 549       advance_past_end();                 // in case cur->begin() == cur->end()
 550     }
 551   }
 552   destructive_two_d_iterator(typename containertype::iterator begin,
 553                              typename containertype::iterator end,
 554                              typename containertype::iterator curr,
 555                              typename containertype::value_type::TWOD_ITER_ col)
 556     : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
 557     advance_past_end();                 // in case cur->begin() == cur->end()
 558   }
 559   destructive_two_d_iterator()
 560     : row_begin(), row_end(), row_current(), col_current() {
 561   }
 562
 563   typename containertype::iterator row_begin, row_end, row_current;
 564   typename containertype::value_type::TWOD_ITER_ col_current;
 565
 566   // This is the part that destroys
 567   void advance_past_end() {          // used when col_current points to end()
 568     while ( col_current == row_current->TWOD_END_() ) {  // end of current row
 569       row_current->clear();                         // the destructive part
 570       // It would be nice if we could decrement sparsetable->num_buckets here
 571       ++row_current;                                // go to beginning of next
 572       if ( row_current != row_end )                 // col is irrelevant at end
 573         col_current = row_current->TWOD_BEGIN_();
 574       else
 575         break;                                      // don't go past row_end
 576     }
 577   }
 578
 579   // EVERYTHING FROM HERE DOWN IS THE SAME AS THE REGULAR ITERATOR
 580   reference operator*() const    { return *col_current; }
 581   pointer operator->() const     { return &(operator*()); }
 582
 583   iterator& operator++() {
 584     assert(row_current != row_end);                 // how to ++ from there?
 585     ++col_current;
 586     advance_past_end();                 // in case col_current is at end()
 587     return *this;
 588   }
 589   iterator operator++(int)       { iterator tmp(*this); ++*this; return tmp; }
 590
 591   bool operator==(const iterator& it) const {
 592     return ( row_begin == it.row_begin &&
 593              row_end == it.row_end &&
 594              row_current == it.row_current &&
 595              (row_current == row_end || col_current == it.col_current) );
 596   }
 597   bool operator!=(const iterator& it) const { return !(*this == it); }
 598 };
 599
 600 #undef TWOD_BEGIN_
 601 #undef TWOD_END_
 602 #undef TWOD_ITER_
 603 #undef TWOD_CONST_ITER_
 604
 605
 606 \f
 607
 608 // SPARSE-TABLE
 609 // ------------
 610 // The idea is that a table with (logically) t buckets is divided
 611 // into t/M *groups* of M buckets each.  (M is a constant set in
 612 // GROUP_SIZE for efficiency.)  Each group is stored sparsely.
 613 // Thus, inserting into the table causes some array to grow, which is
 614 // slow but still constant time.  Lookup involves doing a
 615 // logical-position-to-sparse-position lookup, which is also slow but
 616 // constant time.  The larger M is, the slower these operations are
 617 // but the less overhead (slightly).
 618 //
 619 // To store the sparse array, we store a bitmap B, where B[i] = 1 iff
 620 // bucket i is non-empty.  Then to look up bucket i we really look up
 621 // array[# of 1s before i in B].  This is constant time for fixed M.
 622 //
 623 // Terminology: the position of an item in the overall table (from
 624 // 1 .. t) is called its "location."  The logical position in a group
 625 // (from 1 .. M ) is called its "position."  The actual location in
 626 // the array (from 1 .. # of non-empty buckets in the group) is
 627 // called its "offset."
 628
 629 // The weird mod in the offset is entirely to quiet compiler warnings
 630 #define PUT_(take_from, offset)                                              \
 631   if ( putc(((take_from) >> ((offset) % (sizeof(take_from)*8))) % 256, fp)   \
 632        == EOF )                                                              \
 633     return false
 634
 635 #define GET_(add_to, offset)                            \
 636   if ((x=getc(fp)) == EOF)                              \
 637     return false;                                       \
 638   else                                                  \
 639     add_to |= (static_cast<size_type>(x) << ((offset) % (sizeof(add_to)*8)))
 640
 641
 642 template <class T, uint16_t GROUP_SIZE>
 643 class sparsegroup {
 644  public:
 645   // Basic types
 646   typedef T value_type;
 647   typedef value_type* pointer;
 648   typedef const value_type* const_pointer;
 649   typedef table_iterator<sparsegroup<T, GROUP_SIZE> > iterator;
 650   typedef const_table_iterator<sparsegroup<T, GROUP_SIZE> > const_iterator;
 651   typedef table_element_adaptor<sparsegroup<T, GROUP_SIZE> > element_adaptor;
 652   typedef value_type &reference;
 653   typedef const value_type &const_reference;
 654   typedef uint16_t size_type;                  // max # of buckets
 655   typedef int16_t difference_type;
 656   typedef STL_NAMESPACE::reverse_iterator<const_iterator> const_reverse_iterator;
 657   typedef STL_NAMESPACE::reverse_iterator<iterator> reverse_iterator;
 658
 659   // These are our special iterators, that go over non-empty buckets in a
 660   // group.  These aren't const-only because you can change non-empty bcks.
 661   typedef pointer nonempty_iterator;
 662   typedef const_pointer const_nonempty_iterator;
 663   typedef STL_NAMESPACE::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
 664   typedef STL_NAMESPACE::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator;
 665
 666   // Iterator functions
 667   iterator begin()                      { return iterator(this, 0); }
 668   const_iterator begin() const          { return const_iterator(this, 0); }
 669   iterator end()                        { return iterator(this, size()); }
 670   const_iterator end() const            { return const_iterator(this, size()); }
 671   reverse_iterator rbegin()             { return reverse_iterator(end()); }
 672   const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
 673   reverse_iterator rend()               { return reverse_iterator(begin()); }
 674   const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
 675
 676   // We'll have versions for our special non-empty iterator too
 677   nonempty_iterator nonempty_begin()             { return group; }
 678   const_nonempty_iterator nonempty_begin() const { return group; }
 679   nonempty_iterator nonempty_end()               { return group + num_buckets; }
 680   const_nonempty_iterator nonempty_end() const   { return group + num_buckets; }
 681   reverse_nonempty_iterator nonempty_rbegin() {
 682     return reverse_nonempty_iterator(nonempty_end());
 683   }
 684   const_reverse_nonempty_iterator nonempty_rbegin() const {
 685     return const_reverse_nonempty_iterator(nonempty_end());
 686   }
 687   reverse_nonempty_iterator nonempty_rend() {
 688     return reverse_nonempty_iterator(nonempty_begin());
 689   }
 690   const_reverse_nonempty_iterator nonempty_rend() const {
 691     return const_reverse_nonempty_iterator(nonempty_begin());
 692   }
 693
 694
 695   // This gives us the "default" value to return for an empty bucket.
 696   // We just use the default constructor on T, the template type
 697   const_reference default_value() const {
 698     static value_type defaultval = value_type();
 699     return defaultval;
 700   }
 701
 702
 703  private:
 704   // We need to do all this bit manipulation, of course.  ick
 705   static size_type charbit(size_type i)  { return i >> 3; }
 706   static size_type modbit(size_type i)   { return 1 << (i&7); }
 707   bool bmtest(size_type i) const   { return bitmap[charbit(i)] & modbit(i); }
 708   void bmset(size_type i)          { bitmap[charbit(i)] |= modbit(i); }
 709   void bmclear(size_type i)        { bitmap[charbit(i)] &= ~modbit(i); }
 710
 711   // malloc / realloc that dies if allocation fails
 712   void* realloc_or_die(void* ptr, size_t num_bytes) {
 713     void* retval = realloc(ptr, num_bytes);
 714     if (retval == NULL) {
 715       fprintf(stderr, "FATAL ERROR: failed to allocate %d bytes for ptr %p",
 716               num_bytes, ptr);
 717       exit(1);
 718     }
 719     return retval;
 720   }
 721
 722   void* malloc_or_die(size_t num_bytes) {
 723     return realloc_or_die(NULL, num_bytes);
 724   }
 725
 726  public:                         // get_iter() in sparsetable needs it
 727   // We need a small function that tells us how many set bits there are
 728   // in positions 0..i-1 of the bitmap.  It uses a big table.
 729   // We make it static so templates don't allocate lots of these tables
 730   static size_type pos_to_offset(const unsigned char *bm, size_type pos) {
 731     // We could make these ints.  The tradeoff is size (eg does it overwhelm
 732     // the cache?) vs efficiency in referencing sub-word-sized array elements
 733     static const char bits_in[256] = {      // # of bits set in one char
 734       0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
 735       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 736       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 737       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 738       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 739       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 740       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 741       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 742       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 743       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 744       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 745       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 746       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 747       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 748       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 749       4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
 750     };
 751     size_type retval = 0;
 752
 753     // [Note: condition pos > 8 is an optimization; convince yourself we
 754     // give exactly the same result as if we had pos >= 8 here instead.]
 755     for ( ; pos > 8; pos -= 8 )                    // bm[0..pos/8-1]
 756       retval += bits_in[*bm++];                    // chars we want *all* bits in
 757     return retval + bits_in[*bm & ((1 << pos)-1)]; // the char that includes pos
 758   }
 759
 760   size_type pos_to_offset(size_type pos) const {   // not static but still const
 761     return pos_to_offset(bitmap, pos);
 762   }
 763
 764
 765  public:
 766   // Constructors -- default and copy -- and destructor
 767   sparsegroup() : group(0), num_buckets(0) { memset(bitmap, 0, sizeof(bitmap)); }
 768   sparsegroup(const sparsegroup& x) : group(0), num_buckets(x.num_buckets) {
 769     if ( num_buckets ) {
 770       group = (value_type *)malloc_or_die(x.num_buckets * sizeof(*group));
 771       memcpy(group, x.group, x.num_buckets * sizeof(*group));
 772     }
 773     memcpy(bitmap, x.bitmap, sizeof(bitmap));
 774   }
 775   ~sparsegroup() { free(group); }       // free(NULL) does the right thing
 776
 777   // Operator= is just like the copy constructor, I guess
 778   sparsegroup &operator=(const sparsegroup& x) {
 779     if ( &x == this ) return *this;                    // x = x
 780     if ( x.num_buckets == 0 ) {
 781       free(group);
 782       group = NULL;
 783     } else {
 784       group = (value_type *)
 785               realloc_or_die(group, x.num_buckets * sizeof(*group));
 786       memcpy(group, x.group, x.num_buckets * sizeof(*group));
 787     }
 788     memcpy(bitmap, x.bitmap, sizeof(bitmap));
 789     num_buckets = x.num_buckets;
 790     return *this;
 791   }
 792
 793   // Many STL algorithms use swap instead of copy constructors
 794   void swap(sparsegroup& x) {
 795     STL_NAMESPACE::swap(group, x.group);
 796     for ( int i = 0; i < sizeof(bitmap) / sizeof(*bitmap); ++i )
 797       STL_NAMESPACE::swap(bitmap[i], x.bitmap[i]);  // swap not defined on arrays
 798     STL_NAMESPACE::swap(num_buckets, x.num_buckets);
 799   }
 800
 801   // It's always nice to be able to clear a table without deallocating it
 802   void clear() {
 803     if ( group )
 804       free(group);
 805     group = NULL;
 806     memset(bitmap, 0, sizeof(bitmap));
 807     num_buckets = 0;
 808   }
 809
 810   // Functions that tell you about size.  Alas, these aren't so useful
 811   // because our table is always fixed size.
 812   size_type size() const           { return GROUP_SIZE; }
 813   size_type max_size() const       { return GROUP_SIZE; }
 814   bool empty() const               { return false; }
 815   // We also may want to know how many *used* buckets there are
 816   size_type num_nonempty() const   { return num_buckets; }
 817
 818
 819   // get()/set() are explicitly const/non-const.  You can use [] if
 820   // you want something that can be either (potentially more expensive).
 821   const_reference get(size_type i) const {
 822     if ( bmtest(i) )           // bucket i is occupied
 823       return group[pos_to_offset(bitmap, i)];
 824     else
 825       return default_value();  // return the default reference
 826   }
 827
 828   // TODO(csilvers): make protected + friend
 829   reference mutating_get(size_type i) {    // fills bucket i before getting
 830     if ( !bmtest(i) )
 831       set(i, default_value());
 832     return group[pos_to_offset(bitmap, i)];
 833   }
 834
 835   // Syntactic sugar.  It's easy to return a const reference.  To
 836   // return a non-const reference, we need to use the assigner adaptor.
 837   const_reference operator[](size_type i) const {
 838     return get(i);
 839   }
 840
 841   element_adaptor operator[](size_type i) {
 842     return element_adaptor(this, i);
 843   }
 844
 845   // This returns a reference to the inserted item (which is a copy of val)
 846   reference set(size_type i, const_reference val) {
 847     size_type offset = pos_to_offset(bitmap, i);  // where we'll find (or insert)
 848     if ( !bmtest(i) ) {                           // make space at group[offset]
 849       // We know the realloc and memmove are safe because we ensure
 850       // value_type is a Plain Old Data Type (see below)
 851       group = (value_type *)
 852               realloc_or_die(group, sizeof(*group) * ++num_buckets);
 853       memmove(group + offset+1, group + offset,
 854               (num_buckets-1 - offset) * sizeof(*group));
 855       bmset(i);
 856     }
 857     // This does the actual inserting.  Since we made the array using
 858     // malloc, we use "placement new" to just call the constructor.
 859     // We really should destruct this later, via group[offset].~value_type(),
 860     // every time we free(), but we don't because it's a POD type.
 861     new(&group[offset]) value_type(val);
 862     return group[offset];
 863   }
 864
 865   // We let you see if a bucket is non-empty without retrieving it
 866   bool test(size_type i) const {
 867     return bmtest(i);
 868   }
 869   bool test(iterator pos) const {
 870     return bmtest(pos.pos);
 871   }
 872
 873   // This takes the specified elements out of the group.  This is
 874   // "undefining", rather than "clearing".
 875   void erase(size_type i) {
 876     if ( bmtest(i) ) {                           // trivial to erase empty bucket
 877       size_type offset = pos_to_offset(bitmap,i); // where we'll find (or insert)
 878       if ( --num_buckets == 0 ) {
 879         free(group);
 880         group = NULL;
 881       } else {
 882         memmove(group + offset, group + offset+1,
 883                 (num_buckets - offset) * sizeof(*group));
 884         group = (value_type *)
 885                 realloc_or_die(group, sizeof(*group) * num_buckets);
 886       }
 887       bmclear(i);
 888     }
 889     }
 890
 891   void erase(iterator pos) {
 892     erase(pos.pos);
 893   }
 894
 895   void erase(iterator start, iterator end) {
 896     // This could be more efficient, but to do so we'd need to make
 897     // bmclear() clear a range of indices.  Doesn't seem worth it.
 898     for ( ; start != end; ++start )
 899       erase(start);
 900   }
 901
 902
 903   // I/O
 904   // We support reading and writing groups to disk.  We don't store
 905   // the actual array contents (which we don't know how to store),
 906   // just the bitmap and size.  Meant to be used with table I/O.
 907   // Returns true if all was ok
 908   bool write_metadata(FILE *fp) const {
 909     assert(sizeof(num_buckets) == 2);     // we explicitly set to u_int16_t
 910     PUT_(num_buckets, 8);
 911     PUT_(num_buckets, 0);
 912     if ( !fwrite(bitmap, sizeof(bitmap), 1, fp) )  return false;
 913     return true;
 914   }
 915
 916   // Reading destroys the old group contents!  Returns true if all was ok
 917   bool read_metadata(FILE *fp) {
 918     clear();
 919
 920     int x;          // the GET_ macro requires an 'int x' to be defined
 921     GET_(num_buckets, 8);
 922     GET_(num_buckets, 0);
 923
 924     if ( !fread(bitmap, sizeof(bitmap), 1, fp) )  return false;
 925
 926     // We'll allocate the space; you'd better fill it somehow!
 927     group = (value_type *)malloc(num_buckets * sizeof(*group));
 928     if ( group == NULL )  return false;
 929     return true;
 930   }
 931
 932   // If your keys and values are simple enough, we can write them
 933   // to disk for you.  "simple enough" means no pointers.
 934   // However, we don't try to normalize endianness
 935   bool write_nopointer_data(FILE *fp) const {
 936     for ( const_nonempty_iterator it = nonempty_begin();
 937           it != nonempty_end(); ++it ) {
 938       if ( !fwrite(&*it, sizeof(*it), 1, fp) )  return false;
 939     }
 940     return true;
 941   }
 942
 943   // When reading, we have to override the potential const-ness of *it
 944   bool read_nopointer_data(FILE *fp) {
 945     for ( nonempty_iterator it = nonempty_begin();
 946           it != nonempty_end(); ++it ) {
 947       if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
 948         return false;
 949     }
 950     return true;
 951   }
 952
 953   // Comparisons.  Note the comparisons are pretty arbitrary: we
 954   // compare values of the first index that isn't equal (using default
 955   // value for empty buckets).
 956   bool operator==(const sparsegroup& x) const {
 957     return ( num_buckets == x.num_buckets &&
 958              memcmp(bitmap, x.bitmap, sizeof(bitmap)) == 0 &&
 959              STL_NAMESPACE::equal(begin(), end(), x.begin()) ); // from algorithm
 960   }
 961   bool operator<(const sparsegroup& x) const {      // also from algorithm
 962     return STL_NAMESPACE::lexicographical_compare(begin(), end(),
 963                                                   x.begin(), x.end());
 964   }
 965   bool operator!=(const sparsegroup& x) const { return !(*this == x); }
 966   bool operator<=(const sparsegroup& x) const { return *this < x || *this == x; }
 967   bool operator>(const sparsegroup& x) const { return x <= *this; }
 968   bool operator>=(const sparsegroup& x) const { return x < *this; }
 969
 970  private:
 971   // We need to enforce that our value_type is a Plain Old Data Type
 972   // (so we know realloc and memmove will work).  We use traits to
 973   // enforce this.  The following gives a compile-time error if
 974   // is_POD_type is false (which is the default for user types).
 975   //
 976   // IF YOU GET AN ERROR HERE, make sure your class is a POD type,
 977   // and if so tell the compiler via code similar to this:
 978   // template<> struct __type_traits<classname> {
 979   //   typedef __true_type    has_trivial_default_constructor;
 980   //   typedef __true_type    has_trivial_copy_constructor;
 981   //   typedef __true_type    has_trivial_assignment_operator;
 982   //   typedef __true_type    has_trivial_destructor;
 983   //   typedef __true_type    is_POD_type;
 984   // };
 985   //
 986   // If this is part of a hash_map, you need to make sure both the
 987   // Key and Value types are POD types, if they're user-defined.
 988 #ifdef UNDERSTANDS_TYPE_TRAITS
 989   static __true_type * const enforce_pod;
 990 #endif
 991
 992   // The actual data
 993   value_type *group;                            // (small) array of T's
 994   unsigned char bitmap[(GROUP_SIZE-1)/8 + 1];   // fancy math is so we round up
 995   size_type num_buckets;                        // limits GROUP_SIZE to 64K
 996 };
 997
 998 // We need a global swap as well
 999 template <class T, uint16_t GROUP_SIZE>
1000 inline void swap(sparsegroup<T,GROUP_SIZE> &x, sparsegroup<T,GROUP_SIZE> &y) {
1001   x.swap(y);
1002 }
1003
1004 #ifdef UNDERSTANDS_TYPE_TRAITS
1005 template <class T, uint16_t GROUP_SIZE>
1006 __true_type * const sparsegroup<T, GROUP_SIZE>::enforce_pod =
1007 static_cast<typename __type_traits<value_type>::is_POD_type *>(0);
1008 #endif
1009
1010 // ---------------------------------------------------------------------------
1011
1012
1013 template <class T, uint16_t GROUP_SIZE = DEFAULT_SPARSEGROUP_SIZE>
1014 class sparsetable {
1015  public:
1016   // Basic types
1017   typedef T value_type;                        // stolen from stl_vector.h
1018   typedef value_type* pointer;
1019   typedef const value_type* const_pointer;
1020   typedef table_iterator<sparsetable<T, GROUP_SIZE> > iterator;
1021   typedef const_table_iterator<sparsetable<T, GROUP_SIZE> > const_iterator;
1022   typedef table_element_adaptor<sparsetable<T, GROUP_SIZE> > element_adaptor;
1023   typedef value_type &reference;
1024   typedef const value_type &const_reference;
1025   typedef size_t size_type;
1026   typedef ptrdiff_t difference_type;
1027   typedef STL_NAMESPACE::reverse_iterator<const_iterator> const_reverse_iterator;
1028   typedef STL_NAMESPACE::reverse_iterator<iterator> reverse_iterator;
1029
1030   // These are our special iterators, that go over non-empty buckets in a
1031   // table.  These aren't const only because you can change non-empty bcks.
1032   typedef two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
1033      nonempty_iterator;
1034   typedef const_two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
1035      const_nonempty_iterator;
1036   typedef STL_NAMESPACE::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
1037   typedef STL_NAMESPACE::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator;
1038   // Another special iterator: it frees memory as it iterates (used to resize)
1039   typedef destructive_two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
1040      destructive_iterator;
1041
1042   // Iterator functions
1043   iterator begin()                      { return iterator(this, 0); }
1044   const_iterator begin() const          { return const_iterator(this, 0); }
1045   iterator end()                        { return iterator(this, size()); }
1046   const_iterator end() const            { return const_iterator(this, size()); }
1047   reverse_iterator rbegin()             { return reverse_iterator(end()); }
1048   const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
1049   reverse_iterator rend()               { return reverse_iterator(begin()); }
1050   const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
1051
1052   // Versions for our special non-empty iterator
1053   nonempty_iterator nonempty_begin()             {
1054     return nonempty_iterator(groups.begin(), groups.end(), groups.begin());
1055   }
1056   const_nonempty_iterator nonempty_begin() const {
1057     return const_nonempty_iterator(groups.begin(),groups.end(), groups.begin());
1058   }
1059   nonempty_iterator nonempty_end() {
1060     return nonempty_iterator(groups.begin(), groups.end(), groups.end());
1061   }
1062   const_nonempty_iterator nonempty_end() const {
1063     return const_nonempty_iterator(groups.begin(), groups.end(), groups.end());
1064   }
1065   reverse_nonempty_iterator nonempty_rbegin() {
1066     return reverse_nonempty_iterator(nonempty_end());
1067   }
1068   const_reverse_nonempty_iterator nonempty_rbegin() const {
1069     return const_reverse_nonempty_iterator(nonempty_end());
1070   }
1071   reverse_nonempty_iterator nonempty_rend() {
1072     return reverse_nonempty_iterator(nonempty_begin());
1073   }
1074   const_reverse_nonempty_iterator nonempty_rend() const {
1075     return const_reverse_nonempty_iterator(nonempty_begin());
1076   }
1077   destructive_iterator destructive_begin() {
1078     return destructive_iterator(groups.begin(), groups.end(), groups.begin());
1079   }
1080   destructive_iterator destructive_end() {
1081     return destructive_iterator(groups.begin(), groups.end(), groups.end());
1082   }
1083
1084  private:
1085   typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::reference
1086     GroupsReference;
1087   typedef typename
1088     vector< sparsegroup<value_type, GROUP_SIZE> >::const_reference
1089     GroupsConstReference;
1090   typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::iterator
1091     GroupsIterator;
1092   typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::const_iterator
1093     GroupsConstIterator;
1094
1095   // How to deal with the proper group
1096   static size_type num_groups(size_type num) {   // how many to hold num buckets
1097     return num == 0 ? 0 : ((num-1) / GROUP_SIZE) + 1;
1098   }
1099
1100   size_type pos_in_group(size_type i) const { return i % GROUP_SIZE; }
1101   size_type group_num(size_type i) const    { return i / GROUP_SIZE; }
1102   GroupsReference which_group(size_type i)  { return groups[group_num(i)]; }
1103   GroupsConstReference which_group(size_type i) const
1104                                             { return groups[group_num(i)]; }
1105
1106
1107  public:
1108   // Constructors -- default, normal (when you specify size), and copy
1109   sparsetable(size_type size = 0)
1110     : groups(num_groups(size)), table_size(size), num_buckets(0) { }
1111   // We'll can get away with using the default copy constructor,
1112   // and default destructor, and hence the default operator=.  Huzzah!
1113
1114   // Many STL algorithms use swap instead of copy constructors
1115   void swap(sparsetable& x) {
1116     STL_NAMESPACE::swap(groups, x.groups);
1117     STL_NAMESPACE::swap(table_size, x.table_size);
1118     STL_NAMESPACE::swap(num_buckets, x.num_buckets);
1119   }
1120
1121   // It's always nice to be able to clear a table without deallocating it
1122   void clear() {
1123     GroupsIterator group;
1124     for ( group = groups.begin(); group != groups.end(); ++group ) {
1125       group->clear();
1126     }
1127     num_buckets = 0;
1128   }
1129
1130   // Functions that tell you about size.
1131   // NOTE: empty() is non-intuitive!  It does not tell you the number
1132   // of not-empty buckets (use num_nonempty() for that).  Instead
1133   // it says whether you've allocated any buckets or not.
1134   size_type size() const           { return table_size; }
1135   size_type max_size() const       { return size_type(-1); }
1136   bool empty() const               { return table_size == 0; }
1137   // We also may want to know how many *used* buckets there are
1138   size_type num_nonempty() const   { return num_buckets; }
1139
1140   // OK, we'll let you resize one of these puppies
1141   void resize(size_type new_size) {
1142     groups.resize(num_groups(new_size));
1143     if ( new_size < table_size) {   // lower num_buckets, clear last group
1144       if ( new_size % GROUP_SIZE > 0 )       // need to clear from last group
1145         groups.back().erase(groups.back().begin() + (new_size % GROUP_SIZE),
1146                             groups.back().end());
1147       num_buckets = 0;                       // refigure # of used buckets
1148       GroupsConstIterator group;
1149       for ( group = groups.begin(); group != groups.end(); ++group )
1150         num_buckets += group->num_nonempty();
1151     }
1152     table_size = new_size;
1153   }
1154
1155
1156   // We let you see if a bucket is non-empty without retrieving it
1157   bool test(size_type i) const {
1158     return which_group(i).test(pos_in_group(i));
1159   }
1160   bool test(iterator pos) const {
1161     return which_group(pos.pos).test(pos_in_group(pos.pos));
1162   }
1163   bool test(const_iterator pos) const {
1164     return which_group(pos.pos).test(pos_in_group(pos.pos));
1165   }
1166
1167   // We only return const_references because it's really hard to
1168   // return something settable for empty buckets.  Use set() instead.
1169   const_reference get(size_type i) const {
1170     assert(i < table_size);
1171     return which_group(i).get(pos_in_group(i));
1172   }
1173
1174   // TODO(csilvers): make protected + friend element_adaptor
1175   reference mutating_get(size_type i) {    // fills bucket i before getting
1176     assert(i < table_size);
1177     size_type old_numbuckets = which_group(i).num_nonempty();
1178     reference retval = which_group(i).mutating_get(pos_in_group(i));
1179     num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1180     return retval;
1181   }
1182
1183   // Syntactic sugar.  As in sparsegroup, the non-const version is harder
1184   const_reference operator[](size_type i) const {
1185     return get(i);
1186   }
1187
1188   element_adaptor operator[](size_type i) {
1189     return element_adaptor(this, i);
1190   }
1191
1192   // Needed for hashtables, gets as a nonempty_iterator.  Crashes for empty bcks
1193   const_nonempty_iterator get_iter(size_type i) const {
1194     assert(test(i));    // how can a nonempty_iterator point to an empty bucket?
1195     return const_nonempty_iterator(
1196       groups.begin(), groups.end(),
1197       groups.begin() + group_num(i),
1198       (groups[group_num(i)].nonempty_begin() +
1199        groups[group_num(i)].pos_to_offset(pos_in_group(i))));
1200   }
1201   // For nonempty we can return a non-const version
1202   nonempty_iterator get_iter(size_type i) {
1203     assert(test(i));    // how can a nonempty_iterator point to an empty bucket?
1204     return nonempty_iterator(
1205       groups.begin(), groups.end(),
1206       groups.begin() + group_num(i),
1207       (groups[group_num(i)].nonempty_begin() +
1208        groups[group_num(i)].pos_to_offset(pos_in_group(i))));
1209   }
1210
1211
1212   // This returns a reference to the inserted item (which is a copy of val)
1213   // The trick is to figure out whether we're replacing or inserting anew
1214   reference set(size_type i, const_reference val) {
1215     assert(i < table_size);
1216     size_type old_numbuckets = which_group(i).num_nonempty();
1217     reference retval = which_group(i).set(pos_in_group(i), val);
1218     num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1219     return retval;
1220   }
1221
1222   // This takes the specified elements out of the table.  This is
1223   // "undefining", rather than "clearing".
1224   void erase(size_type i) {
1225     assert(i < table_size);
1226     size_type old_numbuckets = which_group(i).num_nonempty();
1227     which_group(i).erase(pos_in_group(i));
1228     num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1229   }
1230
1231   void erase(iterator pos) {
1232     erase(pos.pos);
1233   }
1234
1235   void erase(iterator start, iterator end) {
1236     // This could be more efficient, but then we'd need to figure
1237     // out if we spanned groups or not.  Doesn't seem worth it.
1238     for ( ; start != end; ++start )
1239       erase(start);
1240   }
1241
1242
1243   // We support reading and writing tables to disk.  We don't store
1244   // the actual array contents (which we don't know how to store),
1245   // just the groups and sizes.  Returns true if all went ok.
1246
1247  private:
1248   // Every time the disk format changes, this should probably change too
1249   static const unsigned long MAGIC_NUMBER = 0x24687531;
1250
1251   // Old versions of this code write all data in 32 bits.  We need to
1252   // support these files as well as having support for 64-bit systems.
1253   // So we use the following encoding scheme: for values < 2^32-1, we
1254   // store in 4 bytes in big-endian order.  For values > 2^32, we
1255   // store 0xFFFFFFF followed by 8 bytes in big-endian order.  This
1256   // causes us to mis-read old-version code that stores exactly
1257   // 0xFFFFFFF, but I don't think that is likely to have happened for
1258   // these particular values.
1259   static bool write_32_or_64(FILE* fp, size_type value) {
1260     if ( value < 0xFFFFFFFFULL ) {        // fits in 4 bytes
1261       PUT_(value, 24);
1262       PUT_(value, 16);
1263       PUT_(value, 8);
1264       PUT_(value, 0);
1265     } else if ( value == 0xFFFFFFFFUL ) {   // special case in 32bit systems
1266       PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0);  // marker
1267       PUT_(0, 0); PUT_(0, 0); PUT_(0, 0); PUT_(0, 0);
1268       PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0);
1269     } else {
1270       PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0);  // marker
1271       PUT_(value, 56);
1272       PUT_(value, 48);
1273       PUT_(value, 40);
1274       PUT_(value, 32);
1275       PUT_(value, 24);
1276       PUT_(value, 16);
1277       PUT_(value, 8);
1278       PUT_(value, 0);
1279     }
1280     return true;
1281   }
1282
1283   static bool read_32_or_64(FILE* fp, size_type *value) {  // reads into value
1284     size_type first4 = 0;
1285     int x;
1286     GET_(first4, 24);
1287     GET_(first4, 16);
1288     GET_(first4, 8);
1289     GET_(first4, 0);
1290     if ( first4 < 0xFFFFFFFFULL ) {
1291       *value = first4;
1292     } else {
1293       GET_(*value, 56);
1294       GET_(*value, 48);
1295       GET_(*value, 40);
1296       GET_(*value, 32);
1297       GET_(*value, 24);
1298       GET_(*value, 16);
1299       GET_(*value, 8);
1300       GET_(*value, 0);
1301     }
1302     return true;
1303   }
1304
1305  public:
1306   bool write_metadata(FILE *fp) const {
1307     if ( !write_32_or_64(fp, MAGIC_NUMBER) )  return false;
1308     if ( !write_32_or_64(fp, table_size) )  return false;
1309     if ( !write_32_or_64(fp, num_buckets) )  return false;
1310
1311     GroupsConstIterator group;
1312     for ( group = groups.begin(); group != groups.end(); ++group )
1313       if ( group->write_metadata(fp) == false )  return false;
1314     return true;
1315   }
1316
1317   // Reading destroys the old table contents!  Returns true if read ok.
1318   bool read_metadata(FILE *fp) {
1319     size_type magic_read = 0;
1320     if ( !read_32_or_64(fp, &magic_read) )  return false;
1321     if ( magic_read != MAGIC_NUMBER ) {
1322       clear();                        // just to be consistent
1323       return false;
1324     }
1325
1326     if ( !read_32_or_64(fp, &table_size) )  return false;
1327     if ( !read_32_or_64(fp, &num_buckets) )  return false;
1328
1329     resize(table_size);                            // so the vector's sized ok
1330     GroupsIterator group;
1331     for ( group = groups.begin(); group != groups.end(); ++group )
1332       if ( group->read_metadata(fp) == false )  return false;
1333     return true;
1334   }
1335
1336   // This code is identical to that for SparseGroup
1337   // If your keys and values are simple enough, we can write them
1338   // to disk for you.  "simple enough" means no pointers.
1339   // However, we don't try to normalize endianness
1340   bool write_nopointer_data(FILE *fp) const {
1341     for ( const_nonempty_iterator it = nonempty_begin();
1342           it != nonempty_end(); ++it ) {
1343       if ( !fwrite(&*it, sizeof(*it), 1, fp) )  return false;
1344     }
1345     return true;
1346   }
1347
1348   // When reading, we have to override the potential const-ness of *it
1349   bool read_nopointer_data(FILE *fp) {
1350     for ( nonempty_iterator it = nonempty_begin();
1351           it != nonempty_end(); ++it ) {
1352       if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
1353         return false;
1354     }
1355     return true;
1356   }
1357
1358   // Comparisons.  Note the comparisons are pretty arbitrary: we
1359   // compare values of the first index that isn't equal (using default
1360   // value for empty buckets).
1361   bool operator==(const sparsetable& x) const {
1362     return ( table_size == x.table_size &&
1363              num_buckets == x.num_buckets &&
1364              groups == x.groups );
1365   }
1366   bool operator<(const sparsetable& x) const {      // also from algobase.h
1367     return STL_NAMESPACE::lexicographical_compare(begin(), end(),
1368                                                   x.begin(), x.end());
1369   }
1370   bool operator!=(const sparsetable& x) const { return !(*this == x); }
1371   bool operator<=(const sparsetable& x) const { return *this < x || *this == x; }
1372   bool operator>(const sparsetable& x) const { return x <= *this; }
1373   bool operator>=(const sparsetable& x) const { return x < *this; }
1374
1375
1376  private:
1377   // The actual data
1378   vector< sparsegroup<value_type, GROUP_SIZE> > groups;  // our list of groups
1379   size_type table_size;                         // how many buckets they want
1380   size_type num_buckets;                        // number of non-empty buckets
1381 };
1382
1383 // We need a global swap as well
1384 template <class T, uint16_t GROUP_SIZE>
1385 inline void swap(sparsetable<T,GROUP_SIZE> &x, sparsetable<T,GROUP_SIZE> &y) {
1386   x.swap(y);
1387 }
1388
1389 #undef GET_
1390 #undef PUT_
1391
1392 _END_GOOGLE_NAMESPACE_
1393
1394 #endif