src/include/lib/simplehash.h

   1 /*
   2  * simplehash.h
   3  *
   4  *        When included this file generates a "templated" (by way of macros)
   5  *        open-addressing hash table implementation specialized to user-defined
   6  *        types.
   7  *
   8  *        It's probably not worthwhile to generate such a specialized implementation
   9  *        for hash tables that aren't performance or space sensitive.
  10  *
  11  *        Compared to dynahash, simplehash has the following benefits:
  12  *
  13  *        - Due to the "templated" code generation has known structure sizes and no
  14  *          indirect function calls (which show up substantially in dynahash
  15  *          profiles). These features considerably increase speed for small
  16  *          entries.
  17  *        - Open addressing has better CPU cache behavior than dynahash's chained
  18  *          hashtables.
  19  *        - The generated interface is type-safe and easier to use than dynahash,
  20  *          though at the cost of more complex setup.
  21  *        - Allocates memory in a MemoryContext or another allocator with a
  22  *          malloc/free style interface (which isn't easily usable in a shared
  23  *          memory context)
  24  *        - Does not require the overhead of a separate memory context.
  25  *
  26  * Usage notes:
  27  *
  28  *        To generate a hash-table and associated functions for a use case several
  29  *        macros have to be #define'ed before this file is included.  Including
  30  *        the file #undef's all those, so a new hash table can be generated
  31  *        afterwards.
  32  *        The relevant parameters are:
  33  *        - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
  34  *              will result in hash table type 'foo_hash' and functions like
  35  *              'foo_insert'/'foo_lookup' and so forth.
  36  *        - SH_ELEMENT_TYPE - type of the contained elements
  37  *        - SH_KEY_TYPE - type of the hashtable's key
  38  *        - SH_DECLARE - if defined function prototypes and type declarations are
  39  *              generated
  40  *        - SH_DEFINE - if defined function definitions are generated
  41  *        - SH_SCOPE - in which scope (e.g. extern, static inline) do function
  42  *              declarations reside
  43  *        - SH_RAW_ALLOCATOR - if defined, memory contexts are not used; instead,
  44  *          use this to allocate bytes
  45  *        - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
  46  *              are defined, so you can supply your own
  47  *        The following parameters are only relevant when SH_DEFINE is defined:
  48  *        - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
  49  *        - SH_EQUAL(table, a, b) - compare two table keys
  50  *        - SH_HASH_KEY(table, key) - generate hash for the key
  51  *        - SH_STORE_HASH - if defined the hash is stored in the elements
  52  *        - SH_GET_HASH(tb, a) - return the field to store the hash in
  53  *
  54  *        The element type is required to contain a "status" member that can store
  55  *        the range of values defined in the SH_STATUS enum.
  56  *
  57  *        While SH_STORE_HASH (and subsequently SH_GET_HASH) are optional, because
  58  *        the hash table implementation needs to compare hashes to move elements
  59  *        (particularly when growing the hash), it's preferable, if possible, to
  60  *        store the element's hash in the element's data type. If the hash is so
  61  *        stored, the hash table will also compare hashes before calling SH_EQUAL
  62  *        when comparing two keys.
  63  *
  64  *        For convenience the hash table create functions accept a void pointer
  65  *        that will be stored in the hash table type's member private_data. This
  66  *        allows callbacks to reference caller provided data.
  67  *
  68  *        For examples of usage look at tidbitmap.c (file local definition) and
  69  *        execnodes.h/execGrouping.c (exposed declaration, file local
  70  *        implementation).
  71  *
  72  * Hash table design:
  73  *
  74  *        The hash table design chosen is a variant of linear open-addressing. The
  75  *        reason for doing so is that linear addressing is CPU cache & pipeline
  76  *        friendly. The biggest disadvantage of simple linear addressing schemes
  77  *        are highly variable lookup times due to clustering, and deletions
  78  *        leaving a lot of tombstones around.  To address these issues a variant
  79  *        of "robin hood" hashing is employed.  Robin hood hashing optimizes
  80  *        chaining lengths by moving elements close to their optimal bucket
  81  *        ("rich" elements), out of the way if a to-be-inserted element is further
  82  *        away from its optimal position (i.e. it's "poor").  While that can make
  83  *        insertions slower, the average lookup performance is a lot better, and
  84  *        higher fill factors can be used in a still performant manner.  To avoid
  85  *        tombstones - which normally solve the issue that a deleted node's
  86  *        presence is relevant to determine whether a lookup needs to continue
  87  *        looking or is done - buckets following a deleted element are shifted
  88  *        backwards, unless they're empty or already at their optimal position.
  89  */
  90
  91 #include "port/pg_bitutils.h"
  92
  93 /* helpers */
  94 #define SH_MAKE_PREFIX(a) CppConcat(a,_)
  95 #define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
  96 #define SH_MAKE_NAME_(a,b) CppConcat(a,b)
  97
  98 /* name macros for: */
  99
 100 /* type declarations */
 101 #define SH_TYPE SH_MAKE_NAME(hash)
 102 #define SH_STATUS SH_MAKE_NAME(status)
 103 #define SH_STATUS_EMPTY SH_MAKE_NAME(SH_EMPTY)
 104 #define SH_STATUS_IN_USE SH_MAKE_NAME(SH_IN_USE)
 105 #define SH_ITERATOR SH_MAKE_NAME(iterator)
 106
 107 /* function declarations */
 108 #define SH_CREATE SH_MAKE_NAME(create)
 109 #define SH_DESTROY SH_MAKE_NAME(destroy)
 110 #define SH_RESET SH_MAKE_NAME(reset)
 111 #define SH_INSERT SH_MAKE_NAME(insert)
 112 #define SH_INSERT_HASH SH_MAKE_NAME(insert_hash)
 113 #define SH_DELETE_ITEM SH_MAKE_NAME(delete_item)
 114 #define SH_DELETE SH_MAKE_NAME(delete)
 115 #define SH_LOOKUP SH_MAKE_NAME(lookup)
 116 #define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash)
 117 #define SH_GROW SH_MAKE_NAME(grow)
 118 #define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
 119 #define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
 120 #define SH_ITERATE SH_MAKE_NAME(iterate)
 121 #define SH_ALLOCATE SH_MAKE_NAME(allocate)
 122 #define SH_FREE SH_MAKE_NAME(free)
 123 #define SH_STAT SH_MAKE_NAME(stat)
 124
 125 /* internal helper functions (no externally visible prototypes) */
 126 #define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters)
 127 #define SH_NEXT SH_MAKE_NAME(next)
 128 #define SH_PREV SH_MAKE_NAME(prev)
 129 #define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
 130 #define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
 131 #define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
 132 #define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal)
 133 #define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal)
 134
 135 /* generate forward declarations necessary to use the hash table */
 136 #ifdef SH_DECLARE
 137
 138 /* type definitions */
 139 typedef struct SH_TYPE
 140 {
 141         /*
 142          * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
 143          * tables.  Note that the maximum number of elements is lower
 144          * (SH_MAX_FILLFACTOR)
 145          */
 146         uint64          size;
 147
 148         /* how many elements have valid contents */
 149         uint32          members;
 150
 151         /* mask for bucket and size calculations, based on size */
 152         uint32          sizemask;
 153
 154         /* boundary after which to grow hashtable */
 155         uint32          grow_threshold;
 156
 157         /* hash buckets */
 158         SH_ELEMENT_TYPE *data;
 159
 160 #ifndef SH_RAW_ALLOCATOR
 161         /* memory context to use for allocations */
 162         MemoryContext ctx;
 163 #endif
 164
 165         /* user defined data, useful for callbacks */
 166         void       *private_data;
 167 }                       SH_TYPE;
 168
 169 typedef enum SH_STATUS
 170 {
 171         SH_STATUS_EMPTY = 0x00,
 172         SH_STATUS_IN_USE = 0x01
 173 } SH_STATUS;
 174
 175 typedef struct SH_ITERATOR
 176 {
 177         uint32          cur;                    /* current element */
 178         uint32          end;
 179         bool            done;                   /* iterator exhausted? */
 180 }                       SH_ITERATOR;
 181
 182 /* externally visible function prototypes */
 183 #ifdef SH_RAW_ALLOCATOR
 184 /* <prefix>_hash <prefix>_create(uint32 nelements, void *private_data) */
 185 SH_SCOPE        SH_TYPE *SH_CREATE(uint32 nelements, void *private_data);
 186 #else
 187 /*
 188  * <prefix>_hash <prefix>_create(MemoryContext ctx, uint32 nelements,
 189  *                                                               void *private_data)
 190  */
 191 SH_SCOPE        SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
 192                                                            void *private_data);
 193 #endif
 194
 195 /* void <prefix>_destroy(<prefix>_hash *tb) */
 196 SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
 197
 198 /* void <prefix>_reset(<prefix>_hash *tb) */
 199 SH_SCOPE void SH_RESET(SH_TYPE * tb);
 200
 201 /* void <prefix>_grow(<prefix>_hash *tb, uint64 newsize) */
 202 SH_SCOPE void SH_GROW(SH_TYPE * tb, uint64 newsize);
 203
 204 /* <element> *<prefix>_insert(<prefix>_hash *tb, <key> key, bool *found) */
 205 SH_SCOPE        SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
 206
 207 /*
 208  * <element> *<prefix>_insert_hash(<prefix>_hash *tb, <key> key, uint32 hash,
 209  *                                                                bool *found)
 210  */
 211 SH_SCOPE        SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
 212                                                                                         uint32 hash, bool *found);
 213
 214 /* <element> *<prefix>_lookup(<prefix>_hash *tb, <key> key) */
 215 SH_SCOPE        SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
 216
 217 /* <element> *<prefix>_lookup_hash(<prefix>_hash *tb, <key> key, uint32 hash) */
 218 SH_SCOPE        SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
 219                                                                                         uint32 hash);
 220
 221 /* void <prefix>_delete_item(<prefix>_hash *tb, <element> *entry) */
 222 SH_SCOPE void SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry);
 223
 224 /* bool <prefix>_delete(<prefix>_hash *tb, <key> key) */
 225 SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
 226
 227 /* void <prefix>_start_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
 228 SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
 229
 230 /*
 231  * void <prefix>_start_iterate_at(<prefix>_hash *tb, <prefix>_iterator *iter,
 232  *                                                                uint32 at)
 233  */
 234 SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
 235
 236 /* <element> *<prefix>_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
 237 SH_SCOPE        SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
 238
 239 /* void <prefix>_stat(<prefix>_hash *tb */
 240 SH_SCOPE void SH_STAT(SH_TYPE * tb);
 241
 242 #endif                                                  /* SH_DECLARE */
 243
 244
 245 /* generate implementation of the hash table */
 246 #ifdef SH_DEFINE
 247
 248 #ifndef SH_RAW_ALLOCATOR
 249 #include "utils/memutils.h"
 250 #endif
 251
 252 /* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
 253 #define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
 254
 255 /* normal fillfactor, unless already close to maximum */
 256 #ifndef SH_FILLFACTOR
 257 #define SH_FILLFACTOR (0.9)
 258 #endif
 259 /* increase fillfactor if we otherwise would error out */
 260 #define SH_MAX_FILLFACTOR (0.98)
 261 /* grow if actual and optimal location bigger than */
 262 #ifndef SH_GROW_MAX_DIB
 263 #define SH_GROW_MAX_DIB 25
 264 #endif
 265 /* grow if more than elements to move when inserting */
 266 #ifndef SH_GROW_MAX_MOVE
 267 #define SH_GROW_MAX_MOVE 150
 268 #endif
 269 #ifndef SH_GROW_MIN_FILLFACTOR
 270 /* but do not grow due to SH_GROW_MAX_* if below */
 271 #define SH_GROW_MIN_FILLFACTOR 0.1
 272 #endif
 273
 274 #ifdef SH_STORE_HASH
 275 #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
 276 #else
 277 #define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
 278 #endif
 279
 280 /*
 281  * Wrap the following definitions in include guards, to avoid multiple
 282  * definition errors if this header is included more than once.  The rest of
 283  * the file deliberately has no include guards, because it can be included
 284  * with different parameters to define functions and types with non-colliding
 285  * names.
 286  */
 287 #ifndef SIMPLEHASH_H
 288 #define SIMPLEHASH_H
 289
 290 #ifdef FRONTEND
 291 #define sh_error(...) pg_log_error(__VA_ARGS__)
 292 #define sh_log(...) pg_log_info(__VA_ARGS__)
 293 #else
 294 #define sh_error(...) elog(ERROR, __VA_ARGS__)
 295 #define sh_log(...) elog(LOG, __VA_ARGS__)
 296 #endif
 297
 298 #endif
 299
 300 /*
 301  * Compute sizing parameters for hashtable. Called when creating and growing
 302  * the hashtable.
 303  */
 304 static inline void
 305 SH_COMPUTE_PARAMETERS(SH_TYPE * tb, uint64 newsize)
 306 {
 307         uint64          size;
 308
 309         /* supporting zero sized hashes would complicate matters */
 310         size = Max(newsize, 2);
 311
 312         /* round up size to the next power of 2, that's how bucketing works */
 313         size = pg_nextpower2_64(size);
 314         Assert(size <= SH_MAX_SIZE);
 315
 316         /*
 317          * Verify that allocation of ->data is possible on this platform, without
 318          * overflowing Size.
 319          */
 320         if ((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= SIZE_MAX / 2)
 321                 sh_error("hash table too large");
 322
 323         /* now set size */
 324         tb->size = size;
 325         tb->sizemask = (uint32) (size - 1);
 326
 327         /*
 328          * Compute the next threshold at which we need to grow the hash table
 329          * again.
 330          */
 331         if (tb->size == SH_MAX_SIZE)
 332                 tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
 333         else
 334                 tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
 335 }
 336
 337 /* return the optimal bucket for the hash */
 338 static inline uint32
 339 SH_INITIAL_BUCKET(SH_TYPE * tb, uint32 hash)
 340 {
 341         return hash & tb->sizemask;
 342 }
 343
 344 /* return next bucket after the current, handling wraparound */
 345 static inline uint32
 346 SH_NEXT(SH_TYPE * tb, uint32 curelem, uint32 startelem)
 347 {
 348         curelem = (curelem + 1) & tb->sizemask;
 349
 350         Assert(curelem != startelem);
 351
 352         return curelem;
 353 }
 354
 355 /* return bucket before the current, handling wraparound */
 356 static inline uint32
 357 SH_PREV(SH_TYPE * tb, uint32 curelem, uint32 startelem)
 358 {
 359         curelem = (curelem - 1) & tb->sizemask;
 360
 361         Assert(curelem != startelem);
 362
 363         return curelem;
 364 }
 365
 366 /* return distance between bucket and its optimal position */
 367 static inline uint32
 368 SH_DISTANCE_FROM_OPTIMAL(SH_TYPE * tb, uint32 optimal, uint32 bucket)
 369 {
 370         if (optimal <= bucket)
 371                 return bucket - optimal;
 372         else
 373                 return (tb->size + bucket) - optimal;
 374 }
 375
 376 static inline uint32
 377 SH_ENTRY_HASH(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
 378 {
 379 #ifdef SH_STORE_HASH
 380         return SH_GET_HASH(tb, entry);
 381 #else
 382         return SH_HASH_KEY(tb, entry->SH_KEY);
 383 #endif
 384 }
 385
 386 /* default memory allocator function */
 387 static inline void *SH_ALLOCATE(SH_TYPE * type, Size size);
 388 static inline void SH_FREE(SH_TYPE * type, void *pointer);
 389
 390 #ifndef SH_USE_NONDEFAULT_ALLOCATOR
 391
 392 /* default memory allocator function */
 393 static inline void *
 394 SH_ALLOCATE(SH_TYPE * type, Size size)
 395 {
 396 #ifdef SH_RAW_ALLOCATOR
 397         return SH_RAW_ALLOCATOR(size);
 398 #else
 399         return MemoryContextAllocExtended(type->ctx, size,
 400                                                                           MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
 401 #endif
 402 }
 403
 404 /* default memory free function */
 405 static inline void
 406 SH_FREE(SH_TYPE * type, void *pointer)
 407 {
 408         pfree(pointer);
 409 }
 410
 411 #endif
 412
 413 /*
 414  * Create a hash table with enough space for `nelements` distinct members.
 415  * Memory for the hash table is allocated from the passed-in context.  If
 416  * desired, the array of elements can be allocated using a passed-in allocator;
 417  * this could be useful in order to place the array of elements in a shared
 418  * memory, or in a context that will outlive the rest of the hash table.
 419  * Memory other than for the array of elements will still be allocated from
 420  * the passed-in context.
 421  */
 422 #ifdef SH_RAW_ALLOCATOR
 423 SH_SCOPE        SH_TYPE *
 424 SH_CREATE(uint32 nelements, void *private_data)
 425 #else
 426 SH_SCOPE        SH_TYPE *
 427 SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
 428 #endif
 429 {
 430         SH_TYPE    *tb;
 431         uint64          size;
 432
 433 #ifdef SH_RAW_ALLOCATOR
 434         tb = SH_RAW_ALLOCATOR(sizeof(SH_TYPE));
 435 #else
 436         tb = MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
 437         tb->ctx = ctx;
 438 #endif
 439         tb->private_data = private_data;
 440
 441         /* increase nelements by fillfactor, want to store nelements elements */
 442         size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
 443
 444         SH_COMPUTE_PARAMETERS(tb, size);
 445
 446         tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
 447
 448         return tb;
 449 }
 450
 451 /* destroy a previously created hash table */
 452 SH_SCOPE void
 453 SH_DESTROY(SH_TYPE * tb)
 454 {
 455         SH_FREE(tb, tb->data);
 456         pfree(tb);
 457 }
 458
 459 /* reset the contents of a previously created hash table */
 460 SH_SCOPE void
 461 SH_RESET(SH_TYPE * tb)
 462 {
 463         memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
 464         tb->members = 0;
 465 }
 466
 467 /*
 468  * Grow a hash table to at least `newsize` buckets.
 469  *
 470  * Usually this will automatically be called by insertions/deletions, when
 471  * necessary. But resizing to the exact input size can be advantageous
 472  * performance-wise, when known at some point.
 473  */
 474 SH_SCOPE void
 475 SH_GROW(SH_TYPE * tb, uint64 newsize)
 476 {
 477         uint64          oldsize = tb->size;
 478         SH_ELEMENT_TYPE *olddata = tb->data;
 479         SH_ELEMENT_TYPE *newdata;
 480         uint32          i;
 481         uint32          startelem = 0;
 482         uint32          copyelem;
 483
 484         Assert(oldsize == pg_nextpower2_64(oldsize));
 485         Assert(oldsize != SH_MAX_SIZE);
 486         Assert(oldsize < newsize);
 487
 488         /* compute parameters for new table */
 489         SH_COMPUTE_PARAMETERS(tb, newsize);
 490
 491         tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
 492
 493         newdata = tb->data;
 494
 495         /*
 496          * Copy entries from the old data to newdata. We theoretically could use
 497          * SH_INSERT here, to avoid code duplication, but that's more general than
 498          * we need. We neither want tb->members increased, nor do we need to do
 499          * deal with deleted elements, nor do we need to compare keys. So a
 500          * special-cased implementation is lot faster. As resizing can be time
 501          * consuming and frequent, that's worthwhile to optimize.
 502          *
 503          * To be able to simply move entries over, we have to start not at the
 504          * first bucket (i.e olddata[0]), but find the first bucket that's either
 505          * empty, or is occupied by an entry at its optimal position. Such a
 506          * bucket has to exist in any table with a load factor under 1, as not all
 507          * buckets are occupied, i.e. there always has to be an empty bucket.  By
 508          * starting at such a bucket we can move the entries to the larger table,
 509          * without having to deal with conflicts.
 510          */
 511
 512         /* search for the first element in the hash that's not wrapped around */
 513         for (i = 0; i < oldsize; i++)
 514         {
 515                 SH_ELEMENT_TYPE *oldentry = &olddata[i];
 516                 uint32          hash;
 517                 uint32          optimal;
 518
 519                 if (oldentry->status != SH_STATUS_IN_USE)
 520                 {
 521                         startelem = i;
 522                         break;
 523                 }
 524
 525                 hash = SH_ENTRY_HASH(tb, oldentry);
 526                 optimal = SH_INITIAL_BUCKET(tb, hash);
 527
 528                 if (optimal == i)
 529                 {
 530                         startelem = i;
 531                         break;
 532                 }
 533         }
 534
 535         /* and copy all elements in the old table */
 536         copyelem = startelem;
 537         for (i = 0; i < oldsize; i++)
 538         {
 539                 SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
 540
 541                 if (oldentry->status == SH_STATUS_IN_USE)
 542                 {
 543                         uint32          hash;
 544                         uint32          startelem;
 545                         uint32          curelem;
 546                         SH_ELEMENT_TYPE *newentry;
 547
 548                         hash = SH_ENTRY_HASH(tb, oldentry);
 549                         startelem = SH_INITIAL_BUCKET(tb, hash);
 550                         curelem = startelem;
 551
 552                         /* find empty element to put data into */
 553                         while (true)
 554                         {
 555                                 newentry = &newdata[curelem];
 556
 557                                 if (newentry->status == SH_STATUS_EMPTY)
 558                                 {
 559                                         break;
 560                                 }
 561
 562                                 curelem = SH_NEXT(tb, curelem, startelem);
 563                         }
 564
 565                         /* copy entry to new slot */
 566                         memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
 567                 }
 568
 569                 /* can't use SH_NEXT here, would use new size */
 570                 copyelem++;
 571                 if (copyelem >= oldsize)
 572                 {
 573                         copyelem = 0;
 574                 }
 575         }
 576
 577         SH_FREE(tb, olddata);
 578 }
 579
 580 /*
 581  * This is a separate static inline function, so it can be reliably be inlined
 582  * into its wrapper functions even if SH_SCOPE is extern.
 583  */
 584 static inline SH_ELEMENT_TYPE *
 585 SH_INSERT_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
 586 {
 587         uint32          startelem;
 588         uint32          curelem;
 589         SH_ELEMENT_TYPE *data;
 590         uint32          insertdist;
 591
 592 restart:
 593         insertdist = 0;
 594
 595         /*
 596          * We do the grow check even if the key is actually present, to avoid
 597          * doing the check inside the loop. This also lets us avoid having to
 598          * re-find our position in the hashtable after resizing.
 599          *
 600          * Note that this also reached when resizing the table due to
 601          * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
 602          */
 603         if (unlikely(tb->members >= tb->grow_threshold))
 604         {
 605                 if (tb->size == SH_MAX_SIZE)
 606                 {
 607                         sh_error("hash table size exceeded");
 608                 }
 609
 610                 /*
 611                  * When optimizing, it can be very useful to print these out.
 612                  */
 613                 /* SH_STAT(tb); */
 614                 SH_GROW(tb, tb->size * 2);
 615                 /* SH_STAT(tb); */
 616         }
 617
 618         /* perform insert, start bucket search at optimal location */
 619         data = tb->data;
 620         startelem = SH_INITIAL_BUCKET(tb, hash);
 621         curelem = startelem;
 622         while (true)
 623         {
 624                 uint32          curdist;
 625                 uint32          curhash;
 626                 uint32          curoptimal;
 627                 SH_ELEMENT_TYPE *entry = &data[curelem];
 628
 629                 /* any empty bucket can directly be used */
 630                 if (entry->status == SH_STATUS_EMPTY)
 631                 {
 632                         tb->members++;
 633                         entry->SH_KEY = key;
 634 #ifdef SH_STORE_HASH
 635                         SH_GET_HASH(tb, entry) = hash;
 636 #endif
 637                         entry->status = SH_STATUS_IN_USE;
 638                         *found = false;
 639                         return entry;
 640                 }
 641
 642                 /*
 643                  * If the bucket is not empty, we either found a match (in which case
 644                  * we're done), or we have to decide whether to skip over or move the
 645                  * colliding entry. When the colliding element's distance to its
 646                  * optimal position is smaller than the to-be-inserted entry's, we
 647                  * shift the colliding entry (and its followers) forward by one.
 648                  */
 649
 650                 if (SH_COMPARE_KEYS(tb, hash, key, entry))
 651                 {
 652                         Assert(entry->status == SH_STATUS_IN_USE);
 653                         *found = true;
 654                         return entry;
 655                 }
 656
 657                 curhash = SH_ENTRY_HASH(tb, entry);
 658                 curoptimal = SH_INITIAL_BUCKET(tb, curhash);
 659                 curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
 660
 661                 if (insertdist > curdist)
 662                 {
 663                         SH_ELEMENT_TYPE *lastentry = entry;
 664                         uint32          emptyelem = curelem;
 665                         uint32          moveelem;
 666                         int32           emptydist = 0;
 667
 668                         /* find next empty bucket */
 669                         while (true)
 670                         {
 671                                 SH_ELEMENT_TYPE *emptyentry;
 672
 673                                 emptyelem = SH_NEXT(tb, emptyelem, startelem);
 674                                 emptyentry = &data[emptyelem];
 675
 676                                 if (emptyentry->status == SH_STATUS_EMPTY)
 677                                 {
 678                                         lastentry = emptyentry;
 679                                         break;
 680                                 }
 681
 682                                 /*
 683                                  * To avoid negative consequences from overly imbalanced
 684                                  * hashtables, grow the hashtable if collisions would require
 685                                  * us to move a lot of entries.  The most likely cause of such
 686                                  * imbalance is filling a (currently) small table, from a
 687                                  * currently big one, in hash-table order.  Don't grow if the
 688                                  * hashtable would be too empty, to prevent quick space
 689                                  * explosion for some weird edge cases.
 690                                  */
 691                                 if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
 692                                         ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
 693                                 {
 694                                         tb->grow_threshold = 0;
 695                                         goto restart;
 696                                 }
 697                         }
 698
 699                         /* shift forward, starting at last occupied element */
 700
 701                         /*
 702                          * TODO: This could be optimized to be one memcpy in many cases,
 703                          * excepting wrapping around at the end of ->data. Hasn't shown up
 704                          * in profiles so far though.
 705                          */
 706                         moveelem = emptyelem;
 707                         while (moveelem != curelem)
 708                         {
 709                                 SH_ELEMENT_TYPE *moveentry;
 710
 711                                 moveelem = SH_PREV(tb, moveelem, startelem);
 712                                 moveentry = &data[moveelem];
 713
 714                                 memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
 715                                 lastentry = moveentry;
 716                         }
 717
 718                         /* and fill the now empty spot */
 719                         tb->members++;
 720
 721                         entry->SH_KEY = key;
 722 #ifdef SH_STORE_HASH
 723                         SH_GET_HASH(tb, entry) = hash;
 724 #endif
 725                         entry->status = SH_STATUS_IN_USE;
 726                         *found = false;
 727                         return entry;
 728                 }
 729
 730                 curelem = SH_NEXT(tb, curelem, startelem);
 731                 insertdist++;
 732
 733                 /*
 734                  * To avoid negative consequences from overly imbalanced hashtables,
 735                  * grow the hashtable if collisions lead to large runs. The most
 736                  * likely cause of such imbalance is filling a (currently) small
 737                  * table, from a currently big one, in hash-table order.  Don't grow
 738                  * if the hashtable would be too empty, to prevent quick space
 739                  * explosion for some weird edge cases.
 740                  */
 741                 if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
 742                         ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
 743                 {
 744                         tb->grow_threshold = 0;
 745                         goto restart;
 746                 }
 747         }
 748 }
 749
 750 /*
 751  * Insert the key key into the hash-table, set *found to true if the key
 752  * already exists, false otherwise. Returns the hash-table entry in either
 753  * case.
 754  */
 755 SH_SCOPE        SH_ELEMENT_TYPE *
 756 SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
 757 {
 758         uint32          hash = SH_HASH_KEY(tb, key);
 759
 760         return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
 761 }
 762
 763 /*
 764  * Insert the key key into the hash-table using an already-calculated
 765  * hash. Set *found to true if the key already exists, false
 766  * otherwise. Returns the hash-table entry in either case.
 767  */
 768 SH_SCOPE        SH_ELEMENT_TYPE *
 769 SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
 770 {
 771         return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
 772 }
 773
 774 /*
 775  * This is a separate static inline function, so it can be reliably be inlined
 776  * into its wrapper functions even if SH_SCOPE is extern.
 777  */
 778 static inline SH_ELEMENT_TYPE *
 779 SH_LOOKUP_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
 780 {
 781         const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
 782         uint32          curelem = startelem;
 783
 784         while (true)
 785         {
 786                 SH_ELEMENT_TYPE *entry = &tb->data[curelem];
 787
 788                 if (entry->status == SH_STATUS_EMPTY)
 789                 {
 790                         return NULL;
 791                 }
 792
 793                 Assert(entry->status == SH_STATUS_IN_USE);
 794
 795                 if (SH_COMPARE_KEYS(tb, hash, key, entry))
 796                         return entry;
 797
 798                 /*
 799                  * TODO: we could stop search based on distance. If the current
 800                  * buckets's distance-from-optimal is smaller than what we've skipped
 801                  * already, the entry doesn't exist. Probably only do so if
 802                  * SH_STORE_HASH is defined, to avoid re-computing hashes?
 803                  */
 804
 805                 curelem = SH_NEXT(tb, curelem, startelem);
 806         }
 807 }
 808
 809 /*
 810  * Lookup up entry in hash table.  Returns NULL if key not present.
 811  */
 812 SH_SCOPE        SH_ELEMENT_TYPE *
 813 SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key)
 814 {
 815         uint32          hash = SH_HASH_KEY(tb, key);
 816
 817         return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
 818 }
 819
 820 /*
 821  * Lookup up entry in hash table using an already-calculated hash.
 822  *
 823  * Returns NULL if key not present.
 824  */
 825 SH_SCOPE        SH_ELEMENT_TYPE *
 826 SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
 827 {
 828         return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
 829 }
 830
 831 /*
 832  * Delete entry from hash table by key.  Returns whether to-be-deleted key was
 833  * present.
 834  */
 835 SH_SCOPE bool
 836 SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
 837 {
 838         uint32          hash = SH_HASH_KEY(tb, key);
 839         uint32          startelem = SH_INITIAL_BUCKET(tb, hash);
 840         uint32          curelem = startelem;
 841
 842         while (true)
 843         {
 844                 SH_ELEMENT_TYPE *entry = &tb->data[curelem];
 845
 846                 if (entry->status == SH_STATUS_EMPTY)
 847                         return false;
 848
 849                 if (entry->status == SH_STATUS_IN_USE &&
 850                         SH_COMPARE_KEYS(tb, hash, key, entry))
 851                 {
 852                         SH_ELEMENT_TYPE *lastentry = entry;
 853
 854                         tb->members--;
 855
 856                         /*
 857                          * Backward shift following elements till either an empty element
 858                          * or an element at its optimal position is encountered.
 859                          *
 860                          * While that sounds expensive, the average chain length is short,
 861                          * and deletions would otherwise require tombstones.
 862                          */
 863                         while (true)
 864                         {
 865                                 SH_ELEMENT_TYPE *curentry;
 866                                 uint32          curhash;
 867                                 uint32          curoptimal;
 868
 869                                 curelem = SH_NEXT(tb, curelem, startelem);
 870                                 curentry = &tb->data[curelem];
 871
 872                                 if (curentry->status != SH_STATUS_IN_USE)
 873                                 {
 874                                         lastentry->status = SH_STATUS_EMPTY;
 875                                         break;
 876                                 }
 877
 878                                 curhash = SH_ENTRY_HASH(tb, curentry);
 879                                 curoptimal = SH_INITIAL_BUCKET(tb, curhash);
 880
 881                                 /* current is at optimal position, done */
 882                                 if (curoptimal == curelem)
 883                                 {
 884                                         lastentry->status = SH_STATUS_EMPTY;
 885                                         break;
 886                                 }
 887
 888                                 /* shift */
 889                                 memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
 890
 891                                 lastentry = curentry;
 892                         }
 893
 894                         return true;
 895                 }
 896
 897                 /* TODO: return false; if distance too big */
 898
 899                 curelem = SH_NEXT(tb, curelem, startelem);
 900         }
 901 }
 902
 903 /*
 904  * Delete entry from hash table by entry pointer
 905  */
 906 SH_SCOPE void
 907 SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
 908 {
 909         SH_ELEMENT_TYPE *lastentry = entry;
 910         uint32          hash = SH_ENTRY_HASH(tb, entry);
 911         uint32          startelem = SH_INITIAL_BUCKET(tb, hash);
 912         uint32          curelem;
 913
 914         /* Calculate the index of 'entry' */
 915         curelem = entry - &tb->data[0];
 916
 917         tb->members--;
 918
 919         /*
 920          * Backward shift following elements till either an empty element or an
 921          * element at its optimal position is encountered.
 922          *
 923          * While that sounds expensive, the average chain length is short, and
 924          * deletions would otherwise require tombstones.
 925          */
 926         while (true)
 927         {
 928                 SH_ELEMENT_TYPE *curentry;
 929                 uint32          curhash;
 930                 uint32          curoptimal;
 931
 932                 curelem = SH_NEXT(tb, curelem, startelem);
 933                 curentry = &tb->data[curelem];
 934
 935                 if (curentry->status != SH_STATUS_IN_USE)
 936                 {
 937                         lastentry->status = SH_STATUS_EMPTY;
 938                         break;
 939                 }
 940
 941                 curhash = SH_ENTRY_HASH(tb, curentry);
 942                 curoptimal = SH_INITIAL_BUCKET(tb, curhash);
 943
 944                 /* current is at optimal position, done */
 945                 if (curoptimal == curelem)
 946                 {
 947                         lastentry->status = SH_STATUS_EMPTY;
 948                         break;
 949                 }
 950
 951                 /* shift */
 952                 memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
 953
 954                 lastentry = curentry;
 955         }
 956 }
 957
 958 /*
 959  * Initialize iterator.
 960  */
 961 SH_SCOPE void
 962 SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
 963 {
 964         int                     i;
 965         uint64          startelem = PG_UINT64_MAX;
 966
 967         /*
 968          * Search for the first empty element. As deletions during iterations are
 969          * supported, we want to start/end at an element that cannot be affected
 970          * by elements being shifted.
 971          */
 972         for (i = 0; i < tb->size; i++)
 973         {
 974                 SH_ELEMENT_TYPE *entry = &tb->data[i];
 975
 976                 if (entry->status != SH_STATUS_IN_USE)
 977                 {
 978                         startelem = i;
 979                         break;
 980                 }
 981         }
 982
 983         Assert(startelem < SH_MAX_SIZE);
 984
 985         /*
 986          * Iterate backwards, that allows the current element to be deleted, even
 987          * if there are backward shifts
 988          */
 989         iter->cur = startelem;
 990         iter->end = iter->cur;
 991         iter->done = false;
 992 }
 993
 994 /*
 995  * Initialize iterator to a specific bucket. That's really only useful for
 996  * cases where callers are partially iterating over the hashspace, and that
 997  * iteration deletes and inserts elements based on visited entries. Doing that
 998  * repeatedly could lead to an unbalanced keyspace when always starting at the
 999  * same position.
1000  */
1001 SH_SCOPE void
1002 SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at)
1003 {
1004         /*
1005          * Iterate backwards, that allows the current element to be deleted, even
1006          * if there are backward shifts.
1007          */
1008         iter->cur = at & tb->sizemask;  /* ensure at is within a valid range */
1009         iter->end = iter->cur;
1010         iter->done = false;
1011 }
1012
1013 /*
1014  * Iterate over all entries in the hash-table. Return the next occupied entry,
1015  * or NULL if done.
1016  *
1017  * During iteration the current entry in the hash table may be deleted,
1018  * without leading to elements being skipped or returned twice.  Additionally
1019  * the rest of the table may be modified (i.e. there can be insertions or
1020  * deletions), but if so, there's neither a guarantee that all nodes are
1021  * visited at least once, nor a guarantee that a node is visited at most once.
1022  */
1023 SH_SCOPE        SH_ELEMENT_TYPE *
1024 SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
1025 {
1026         while (!iter->done)
1027         {
1028                 SH_ELEMENT_TYPE *elem;
1029
1030                 elem = &tb->data[iter->cur];
1031
1032                 /* next element in backward direction */
1033                 iter->cur = (iter->cur - 1) & tb->sizemask;
1034
1035                 if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
1036                         iter->done = true;
1037                 if (elem->status == SH_STATUS_IN_USE)
1038                 {
1039                         return elem;
1040                 }
1041         }
1042
1043         return NULL;
1044 }
1045
1046 /*
1047  * Report some statistics about the state of the hashtable. For
1048  * debugging/profiling purposes only.
1049  */
1050 SH_SCOPE void
1051 SH_STAT(SH_TYPE * tb)
1052 {
1053         uint32          max_chain_length = 0;
1054         uint32          total_chain_length = 0;
1055         double          avg_chain_length;
1056         double          fillfactor;
1057         uint32          i;
1058
1059         uint32     *collisions = palloc0(tb->size * sizeof(uint32));
1060         uint32          total_collisions = 0;
1061         uint32          max_collisions = 0;
1062         double          avg_collisions;
1063
1064         for (i = 0; i < tb->size; i++)
1065         {
1066                 uint32          hash;
1067                 uint32          optimal;
1068                 uint32          dist;
1069                 SH_ELEMENT_TYPE *elem;
1070
1071                 elem = &tb->data[i];
1072
1073                 if (elem->status != SH_STATUS_IN_USE)
1074                         continue;
1075
1076                 hash = SH_ENTRY_HASH(tb, elem);
1077                 optimal = SH_INITIAL_BUCKET(tb, hash);
1078                 dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
1079
1080                 if (dist > max_chain_length)
1081                         max_chain_length = dist;
1082                 total_chain_length += dist;
1083
1084                 collisions[optimal]++;
1085         }
1086
1087         for (i = 0; i < tb->size; i++)
1088         {
1089                 uint32          curcoll = collisions[i];
1090
1091                 if (curcoll == 0)
1092                         continue;
1093
1094                 /* single contained element is not a collision */
1095                 curcoll--;
1096                 total_collisions += curcoll;
1097                 if (curcoll > max_collisions)
1098                         max_collisions = curcoll;
1099         }
1100
1101         if (tb->members > 0)
1102         {
1103                 fillfactor = tb->members / ((double) tb->size);
1104                 avg_chain_length = ((double) total_chain_length) / tb->members;
1105                 avg_collisions = ((double) total_collisions) / tb->members;
1106         }
1107         else
1108         {
1109                 fillfactor = 0;
1110                 avg_chain_length = 0;
1111                 avg_collisions = 0;
1112         }
1113
1114         sh_log("size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %i, avg_collisions: %f",
1115                    tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
1116                    total_collisions, max_collisions, avg_collisions);
1117 }
1118
1119 #endif                                                  /* SH_DEFINE */
1120
1121
1122 /* undefine external parameters, so next hash table can be defined */
1123 #undef SH_PREFIX
1124 #undef SH_KEY_TYPE
1125 #undef SH_KEY
1126 #undef SH_ELEMENT_TYPE
1127 #undef SH_HASH_KEY
1128 #undef SH_SCOPE
1129 #undef SH_DECLARE
1130 #undef SH_DEFINE
1131 #undef SH_GET_HASH
1132 #undef SH_STORE_HASH
1133 #undef SH_USE_NONDEFAULT_ALLOCATOR
1134 #undef SH_EQUAL
1135
1136 /* undefine locally declared macros */
1137 #undef SH_MAKE_PREFIX
1138 #undef SH_MAKE_NAME
1139 #undef SH_MAKE_NAME_
1140 #undef SH_FILLFACTOR
1141 #undef SH_MAX_FILLFACTOR
1142 #undef SH_GROW_MAX_DIB
1143 #undef SH_GROW_MAX_MOVE
1144 #undef SH_GROW_MIN_FILLFACTOR
1145 #undef SH_MAX_SIZE
1146
1147 /* types */
1148 #undef SH_TYPE
1149 #undef SH_STATUS
1150 #undef SH_STATUS_EMPTY
1151 #undef SH_STATUS_IN_USE
1152 #undef SH_ITERATOR
1153
1154 /* external function names */
1155 #undef SH_CREATE
1156 #undef SH_DESTROY
1157 #undef SH_RESET
1158 #undef SH_INSERT
1159 #undef SH_INSERT_HASH
1160 #undef SH_DELETE_ITEM
1161 #undef SH_DELETE
1162 #undef SH_LOOKUP
1163 #undef SH_LOOKUP_HASH
1164 #undef SH_GROW
1165 #undef SH_START_ITERATE
1166 #undef SH_START_ITERATE_AT
1167 #undef SH_ITERATE
1168 #undef SH_ALLOCATE
1169 #undef SH_FREE
1170 #undef SH_STAT
1171
1172 /* internal function names */
1173 #undef SH_COMPUTE_PARAMETERS
1174 #undef SH_COMPARE_KEYS
1175 #undef SH_INITIAL_BUCKET
1176 #undef SH_NEXT
1177 #undef SH_PREV
1178 #undef SH_DISTANCE_FROM_OPTIMAL
1179 #undef SH_ENTRY_HASH
1180 #undef SH_INSERT_HASH_INTERNAL
1181 #undef SH_LOOKUP_HASH_INTERNAL