1 /*-------------------------------------------------------------------------
4 * Concurrent hash tables backed by dynamic shared memory areas.
6 * This is an open hashing hash table, with a linked list at each table
7 * entry. It supports dynamic resizing, as required to prevent the linked
8 * lists from growing too long on average. Currently, only growing is
9 * supported: the hash table never becomes smaller.
11 * To deal with concurrency, it has a fixed size set of partitions, each of
12 * which is independently locked. Each bucket maps to a partition; so insert,
13 * find and iterate operations normally only acquire one lock. Therefore,
14 * good concurrency is achieved whenever such operations don't collide at the
15 * lock partition level. However, when a resize operation begins, all
16 * partition locks must be acquired simultaneously for a brief period. This
17 * is only expected to happen a small number of times until a stable size is
18 * found, since growth is geometric.
20 * Future versions may support iterators and incremental resizing; for now
21 * the implementation is minimalist.
23 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
27 * src/backend/lib/dshash.c
29 *-------------------------------------------------------------------------
34 #include "common/hashfn.h"
35 #include "lib/dshash.h"
36 #include "storage/lwlock.h"
37 #include "utils/dsa.h"
40 * An item in the hash table. This wraps the user's entry object in an
41 * envelop that holds a pointer back to the bucket and a pointer to the next
44 struct dshash_table_item
46 /* The next item in the same bucket. */
48 /* The hashed key, to avoid having to recompute it. */
50 /* The user's entry object follows here. See ENTRY_FROM_ITEM(item). */
54 * The number of partitions for locking purposes. This is set to match
55 * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
56 * the buffer pool must be good enough for any other purpose. This could
57 * become a runtime parameter in future.
59 #define DSHASH_NUM_PARTITIONS_LOG2 7
60 #define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
62 /* A magic value used to identify our hash tables. */
63 #define DSHASH_MAGIC 0x75ff6a20
66 * Tracking information for each lock partition. Initially, each partition
67 * corresponds to one bucket, but each time the hash table grows, the buckets
68 * covered by each partition split so the number of buckets covered doubles.
70 * We might want to add padding here so that each partition is on a different
71 * cache line, but doing so would bloat this structure considerably.
73 typedef struct dshash_partition
75 LWLock lock
; /* Protects all buckets in this partition. */
76 size_t count
; /* # of items in this partition's buckets */
80 * The head object for a hash table. This will be stored in dynamic shared
83 typedef struct dshash_table_control
85 dshash_table_handle handle
;
87 dshash_partition partitions
[DSHASH_NUM_PARTITIONS
];
88 int lwlock_tranche_id
;
91 * The following members are written to only when ALL partitions locks are
92 * held. They can be read when any one partition lock is held.
95 /* Number of buckets expressed as power of 2 (8 = 256 buckets). */
96 size_t size_log2
; /* log2(number of buckets) */
97 dsa_pointer buckets
; /* current bucket array */
98 } dshash_table_control
;
101 * Per-backend state for a dynamic hash table.
105 dsa_area
*area
; /* Backing dynamic shared memory area. */
106 dshash_parameters params
; /* Parameters. */
107 void *arg
; /* User-supplied data pointer. */
108 dshash_table_control
*control
; /* Control object in DSM. */
109 dsa_pointer
*buckets
; /* Current bucket pointers in DSM. */
110 size_t size_log2
; /* log2(number of buckets) */
113 /* Given a pointer to an item, find the entry (user data) it holds. */
114 #define ENTRY_FROM_ITEM(item) \
115 ((char *)(item) + MAXALIGN(sizeof(dshash_table_item)))
117 /* Given a pointer to an entry, find the item that holds it. */
118 #define ITEM_FROM_ENTRY(entry) \
119 ((dshash_table_item *)((char *)(entry) - \
120 MAXALIGN(sizeof(dshash_table_item))))
122 /* How many resize operations (bucket splits) have there been? */
123 #define NUM_SPLITS(size_log2) \
124 (size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
126 /* How many buckets are there in a given size? */
127 #define NUM_BUCKETS(size_log2) \
128 (((size_t) 1) << (size_log2))
130 /* How many buckets are there in each partition at a given size? */
131 #define BUCKETS_PER_PARTITION(size_log2) \
132 (((size_t) 1) << NUM_SPLITS(size_log2))
134 /* Max entries before we need to grow. Half + quarter = 75% load factor. */
135 #define MAX_COUNT_PER_PARTITION(hash_table) \
136 (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \
137 BUCKETS_PER_PARTITION(hash_table->size_log2) / 4)
139 /* Choose partition based on the highest order bits of the hash. */
140 #define PARTITION_FOR_HASH(hash) \
141 (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2))
144 * Find the bucket index for a given hash and table size. Each time the table
145 * doubles in size, the appropriate bucket for a given hash value doubles and
146 * possibly adds one, depending on the newly revealed bit, so that all buckets
149 #define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2) \
150 (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2)))
152 /* The index of the first bucket in a given partition. */
153 #define BUCKET_INDEX_FOR_PARTITION(partition, size_log2) \
154 ((partition) << NUM_SPLITS(size_log2))
156 /* Choose partition based on bucket index. */
157 #define PARTITION_FOR_BUCKET_INDEX(bucket_idx, size_log2) \
158 ((bucket_idx) >> NUM_SPLITS(size_log2))
160 /* The head of the active bucket for a given hash value (lvalue). */
161 #define BUCKET_FOR_HASH(hash_table, hash) \
162 (hash_table->buckets[ \
163 BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, \
164 hash_table->size_log2)])
166 static void delete_item(dshash_table
*hash_table
,
167 dshash_table_item
*item
);
168 static void resize(dshash_table
*hash_table
, size_t new_size_log2
);
169 static inline void ensure_valid_bucket_pointers(dshash_table
*hash_table
);
170 static inline dshash_table_item
*find_in_bucket(dshash_table
*hash_table
,
172 dsa_pointer item_pointer
);
173 static void insert_item_into_bucket(dshash_table
*hash_table
,
174 dsa_pointer item_pointer
,
175 dshash_table_item
*item
,
176 dsa_pointer
*bucket
);
177 static dshash_table_item
*insert_into_bucket(dshash_table
*hash_table
,
179 dsa_pointer
*bucket
);
180 static bool delete_key_from_bucket(dshash_table
*hash_table
,
182 dsa_pointer
*bucket_head
);
183 static bool delete_item_from_bucket(dshash_table
*hash_table
,
184 dshash_table_item
*item
,
185 dsa_pointer
*bucket_head
);
186 static inline dshash_hash
hash_key(dshash_table
*hash_table
, const void *key
);
187 static inline bool equal_keys(dshash_table
*hash_table
,
188 const void *a
, const void *b
);
189 static inline void copy_key(dshash_table
*hash_table
, void *dest
,
192 #define PARTITION_LOCK(hash_table, i) \
193 (&(hash_table)->control->partitions[(i)].lock)
195 #define ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table) \
196 Assert(!LWLockAnyHeldByMe(&(hash_table)->control->partitions[0].lock, \
197 DSHASH_NUM_PARTITIONS, sizeof(dshash_partition)))
200 * Create a new hash table backed by the given dynamic shared area, with the
201 * given parameters. The returned object is allocated in backend-local memory
202 * using the current MemoryContext. 'arg' will be passed through to the
203 * compare, hash, and copy functions.
206 dshash_create(dsa_area
*area
, const dshash_parameters
*params
, void *arg
)
208 dshash_table
*hash_table
;
211 /* Allocate the backend-local object representing the hash table. */
212 hash_table
= palloc(sizeof(dshash_table
));
214 /* Allocate the control object in shared memory. */
215 control
= dsa_allocate(area
, sizeof(dshash_table_control
));
217 /* Set up the local and shared hash table structs. */
218 hash_table
->area
= area
;
219 hash_table
->params
= *params
;
220 hash_table
->arg
= arg
;
221 hash_table
->control
= dsa_get_address(area
, control
);
222 hash_table
->control
->handle
= control
;
223 hash_table
->control
->magic
= DSHASH_MAGIC
;
224 hash_table
->control
->lwlock_tranche_id
= params
->tranche_id
;
226 /* Set up the array of lock partitions. */
228 dshash_partition
*partitions
= hash_table
->control
->partitions
;
229 int tranche_id
= hash_table
->control
->lwlock_tranche_id
;
232 for (i
= 0; i
< DSHASH_NUM_PARTITIONS
; ++i
)
234 LWLockInitialize(&partitions
[i
].lock
, tranche_id
);
235 partitions
[i
].count
= 0;
240 * Set up the initial array of buckets. Our initial size is the same as
241 * the number of partitions.
243 hash_table
->control
->size_log2
= DSHASH_NUM_PARTITIONS_LOG2
;
244 hash_table
->control
->buckets
=
245 dsa_allocate_extended(area
,
246 sizeof(dsa_pointer
) * DSHASH_NUM_PARTITIONS
,
247 DSA_ALLOC_NO_OOM
| DSA_ALLOC_ZERO
);
248 if (!DsaPointerIsValid(hash_table
->control
->buckets
))
250 dsa_free(area
, control
);
252 (errcode(ERRCODE_OUT_OF_MEMORY
),
253 errmsg("out of memory"),
254 errdetail("Failed on DSA request of size %zu.",
255 sizeof(dsa_pointer
) * DSHASH_NUM_PARTITIONS
)));
257 hash_table
->buckets
= dsa_get_address(area
,
258 hash_table
->control
->buckets
);
259 hash_table
->size_log2
= hash_table
->control
->size_log2
;
265 * Attach to an existing hash table using a handle. The returned object is
266 * allocated in backend-local memory using the current MemoryContext. 'arg'
267 * will be passed through to the compare and hash functions.
270 dshash_attach(dsa_area
*area
, const dshash_parameters
*params
,
271 dshash_table_handle handle
, void *arg
)
273 dshash_table
*hash_table
;
276 /* Allocate the backend-local object representing the hash table. */
277 hash_table
= palloc(sizeof(dshash_table
));
279 /* Find the control object in shared memory. */
282 /* Set up the local hash table struct. */
283 hash_table
->area
= area
;
284 hash_table
->params
= *params
;
285 hash_table
->arg
= arg
;
286 hash_table
->control
= dsa_get_address(area
, control
);
287 Assert(hash_table
->control
->magic
== DSHASH_MAGIC
);
290 * These will later be set to the correct values by
291 * ensure_valid_bucket_pointers(), at which time we'll be holding a
292 * partition lock for interlocking against concurrent resizing.
294 hash_table
->buckets
= NULL
;
295 hash_table
->size_log2
= 0;
301 * Detach from a hash table. This frees backend-local resources associated
302 * with the hash table, but the hash table will continue to exist until it is
303 * either explicitly destroyed (by a backend that is still attached to it), or
304 * the area that backs it is returned to the operating system.
307 dshash_detach(dshash_table
*hash_table
)
309 ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table
);
311 /* The hash table may have been destroyed. Just free local memory. */
316 * Destroy a hash table, returning all memory to the area. The caller must be
317 * certain that no other backend will attempt to access the hash table before
318 * calling this function. Other backend must explicitly call dshash_detach to
319 * free up backend-local memory associated with the hash table. The backend
320 * that calls dshash_destroy must not call dshash_detach.
323 dshash_destroy(dshash_table
*hash_table
)
328 Assert(hash_table
->control
->magic
== DSHASH_MAGIC
);
329 ensure_valid_bucket_pointers(hash_table
);
331 /* Free all the entries. */
332 size
= NUM_BUCKETS(hash_table
->size_log2
);
333 for (i
= 0; i
< size
; ++i
)
335 dsa_pointer item_pointer
= hash_table
->buckets
[i
];
337 while (DsaPointerIsValid(item_pointer
))
339 dshash_table_item
*item
;
340 dsa_pointer next_item_pointer
;
342 item
= dsa_get_address(hash_table
->area
, item_pointer
);
343 next_item_pointer
= item
->next
;
344 dsa_free(hash_table
->area
, item_pointer
);
345 item_pointer
= next_item_pointer
;
350 * Vandalize the control block to help catch programming errors where
351 * other backends access the memory formerly occupied by this hash table.
353 hash_table
->control
->magic
= 0;
355 /* Free the active table and control object. */
356 dsa_free(hash_table
->area
, hash_table
->control
->buckets
);
357 dsa_free(hash_table
->area
, hash_table
->control
->handle
);
363 * Get a handle that can be used by other processes to attach to this hash
367 dshash_get_hash_table_handle(dshash_table
*hash_table
)
369 Assert(hash_table
->control
->magic
== DSHASH_MAGIC
);
371 return hash_table
->control
->handle
;
375 * Look up an entry, given a key. Returns a pointer to an entry if one can be
376 * found with the given key. Returns NULL if the key is not found. If a
377 * non-NULL value is returned, the entry is locked and must be released by
378 * calling dshash_release_lock. If an error is raised before
379 * dshash_release_lock is called, the lock will be released automatically, but
380 * the caller must take care to ensure that the entry is not left corrupted.
381 * The lock mode is either shared or exclusive depending on 'exclusive'.
383 * The caller must not hold a lock already.
385 * Note that the lock held is in fact an LWLock, so interrupts will be held on
386 * return from this function, and not resumed until dshash_release_lock is
387 * called. It is a very good idea for the caller to release the lock quickly.
390 dshash_find(dshash_table
*hash_table
, const void *key
, bool exclusive
)
394 dshash_table_item
*item
;
396 hash
= hash_key(hash_table
, key
);
397 partition
= PARTITION_FOR_HASH(hash
);
399 Assert(hash_table
->control
->magic
== DSHASH_MAGIC
);
400 ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table
);
402 LWLockAcquire(PARTITION_LOCK(hash_table
, partition
),
403 exclusive
? LW_EXCLUSIVE
: LW_SHARED
);
404 ensure_valid_bucket_pointers(hash_table
);
406 /* Search the active bucket. */
407 item
= find_in_bucket(hash_table
, key
, BUCKET_FOR_HASH(hash_table
, hash
));
412 LWLockRelease(PARTITION_LOCK(hash_table
, partition
));
417 /* The caller will free the lock by calling dshash_release_lock. */
418 return ENTRY_FROM_ITEM(item
);
423 * Returns a pointer to an exclusively locked item which must be released with
424 * dshash_release_lock. If the key is found in the hash table, 'found' is set
425 * to true and a pointer to the existing entry is returned. If the key is not
426 * found, 'found' is set to false, and a pointer to a newly created entry is
429 * Notes above dshash_find() regarding locking and error handling equally
433 dshash_find_or_insert(dshash_table
*hash_table
,
438 size_t partition_index
;
439 dshash_partition
*partition
;
440 dshash_table_item
*item
;
442 hash
= hash_key(hash_table
, key
);
443 partition_index
= PARTITION_FOR_HASH(hash
);
444 partition
= &hash_table
->control
->partitions
[partition_index
];
446 Assert(hash_table
->control
->magic
== DSHASH_MAGIC
);
447 ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table
);
450 LWLockAcquire(PARTITION_LOCK(hash_table
, partition_index
),
452 ensure_valid_bucket_pointers(hash_table
);
454 /* Search the active bucket. */
455 item
= find_in_bucket(hash_table
, key
, BUCKET_FOR_HASH(hash_table
, hash
));
463 /* Check if we are getting too full. */
464 if (partition
->count
> MAX_COUNT_PER_PARTITION(hash_table
))
467 * The load factor (= keys / buckets) for all buckets protected by
468 * this partition is > 0.75. Presumably the same applies
469 * generally across the whole hash table (though we don't attempt
470 * to track that directly to avoid contention on some kind of
471 * central counter; we just assume that this partition is
472 * representative). This is a good time to resize.
474 * Give up our existing lock first, because resizing needs to
475 * reacquire all the locks in the right order to avoid deadlocks.
477 LWLockRelease(PARTITION_LOCK(hash_table
, partition_index
));
478 resize(hash_table
, hash_table
->size_log2
+ 1);
483 /* Finally we can try to insert the new item. */
484 item
= insert_into_bucket(hash_table
, key
,
485 &BUCKET_FOR_HASH(hash_table
, hash
));
487 /* Adjust per-lock-partition counter for load factor knowledge. */
491 /* The caller must release the lock with dshash_release_lock. */
492 return ENTRY_FROM_ITEM(item
);
496 * Remove an entry by key. Returns true if the key was found and the
497 * corresponding entry was removed.
499 * To delete an entry that you already have a pointer to, see
500 * dshash_delete_entry.
503 dshash_delete_key(dshash_table
*hash_table
, const void *key
)
509 Assert(hash_table
->control
->magic
== DSHASH_MAGIC
);
510 ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table
);
512 hash
= hash_key(hash_table
, key
);
513 partition
= PARTITION_FOR_HASH(hash
);
515 LWLockAcquire(PARTITION_LOCK(hash_table
, partition
), LW_EXCLUSIVE
);
516 ensure_valid_bucket_pointers(hash_table
);
518 if (delete_key_from_bucket(hash_table
, key
,
519 &BUCKET_FOR_HASH(hash_table
, hash
)))
521 Assert(hash_table
->control
->partitions
[partition
].count
> 0);
523 --hash_table
->control
->partitions
[partition
].count
;
528 LWLockRelease(PARTITION_LOCK(hash_table
, partition
));
534 * Remove an entry. The entry must already be exclusively locked, and must
535 * have been obtained by dshash_find or dshash_find_or_insert. Note that this
536 * function releases the lock just like dshash_release_lock.
538 * To delete an entry by key, see dshash_delete_key.
541 dshash_delete_entry(dshash_table
*hash_table
, void *entry
)
543 dshash_table_item
*item
= ITEM_FROM_ENTRY(entry
);
544 size_t partition
= PARTITION_FOR_HASH(item
->hash
);
546 Assert(hash_table
->control
->magic
== DSHASH_MAGIC
);
547 Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table
, partition
),
550 delete_item(hash_table
, item
);
551 LWLockRelease(PARTITION_LOCK(hash_table
, partition
));
555 * Unlock an entry which was locked by dshash_find or dshash_find_or_insert.
558 dshash_release_lock(dshash_table
*hash_table
, void *entry
)
560 dshash_table_item
*item
= ITEM_FROM_ENTRY(entry
);
561 size_t partition_index
= PARTITION_FOR_HASH(item
->hash
);
563 Assert(hash_table
->control
->magic
== DSHASH_MAGIC
);
565 LWLockRelease(PARTITION_LOCK(hash_table
, partition_index
));
569 * A compare function that forwards to memcmp.
572 dshash_memcmp(const void *a
, const void *b
, size_t size
, void *arg
)
574 return memcmp(a
, b
, size
);
578 * A hash function that forwards to tag_hash.
581 dshash_memhash(const void *v
, size_t size
, void *arg
)
583 return tag_hash(v
, size
);
587 * A copy function that forwards to memcpy.
590 dshash_memcpy(void *dest
, const void *src
, size_t size
, void *arg
)
592 (void) memcpy(dest
, src
, size
);
596 * A compare function that forwards to strcmp.
599 dshash_strcmp(const void *a
, const void *b
, size_t size
, void *arg
)
601 Assert(strlen((const char *) a
) < size
);
602 Assert(strlen((const char *) b
) < size
);
604 return strcmp((const char *) a
, (const char *) b
);
608 * A hash function that forwards to string_hash.
611 dshash_strhash(const void *v
, size_t size
, void *arg
)
613 Assert(strlen((const char *) v
) < size
);
615 return string_hash((const char *) v
, size
);
619 * A copy function that forwards to strcpy.
622 dshash_strcpy(void *dest
, const void *src
, size_t size
, void *arg
)
624 Assert(strlen((const char *) src
) < size
);
626 (void) strcpy((char *) dest
, (const char *) src
);
630 * Sequentially scan through dshash table and return all the elements one by
631 * one, return NULL when all elements have been returned.
633 * dshash_seq_term needs to be called when a scan finished. The caller may
634 * delete returned elements midst of a scan by using dshash_delete_current()
635 * if exclusive = true.
638 dshash_seq_init(dshash_seq_status
*status
, dshash_table
*hash_table
,
641 status
->hash_table
= hash_table
;
642 status
->curbucket
= 0;
643 status
->nbuckets
= 0;
644 status
->curitem
= NULL
;
645 status
->pnextitem
= InvalidDsaPointer
;
646 status
->curpartition
= -1;
647 status
->exclusive
= exclusive
;
651 * Returns the next element.
653 * Returned elements are locked and the caller may not release the lock. It is
654 * released by future calls to dshash_seq_next() or dshash_seq_term().
657 dshash_seq_next(dshash_seq_status
*status
)
659 dsa_pointer next_item_pointer
;
662 * Not yet holding any partition locks. Need to determine the size of the
663 * hash table, it could have been resized since we were looking last.
664 * Since we iterate in partition order, we can start by unconditionally
667 * Once we hold the lock, no resizing can happen until the scan ends. So
668 * we don't need to repeatedly call ensure_valid_bucket_pointers().
670 if (status
->curpartition
== -1)
672 Assert(status
->curbucket
== 0);
673 ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(status
->hash_table
);
675 status
->curpartition
= 0;
677 LWLockAcquire(PARTITION_LOCK(status
->hash_table
,
678 status
->curpartition
),
679 status
->exclusive
? LW_EXCLUSIVE
: LW_SHARED
);
681 ensure_valid_bucket_pointers(status
->hash_table
);
684 NUM_BUCKETS(status
->hash_table
->control
->size_log2
);
685 next_item_pointer
= status
->hash_table
->buckets
[status
->curbucket
];
688 next_item_pointer
= status
->pnextitem
;
690 Assert(LWLockHeldByMeInMode(PARTITION_LOCK(status
->hash_table
,
691 status
->curpartition
),
692 status
->exclusive
? LW_EXCLUSIVE
: LW_SHARED
));
694 /* Move to the next bucket if we finished the current bucket */
695 while (!DsaPointerIsValid(next_item_pointer
))
699 if (++status
->curbucket
>= status
->nbuckets
)
701 /* all buckets have been scanned. finish. */
705 /* Check if move to the next partition */
707 PARTITION_FOR_BUCKET_INDEX(status
->curbucket
,
708 status
->hash_table
->size_log2
);
710 if (status
->curpartition
!= next_partition
)
713 * Move to the next partition. Lock the next partition then
714 * release the current, not in the reverse order to avoid
715 * concurrent resizing. Avoid dead lock by taking lock in the
716 * same order with resize().
718 LWLockAcquire(PARTITION_LOCK(status
->hash_table
,
720 status
->exclusive
? LW_EXCLUSIVE
: LW_SHARED
);
721 LWLockRelease(PARTITION_LOCK(status
->hash_table
,
722 status
->curpartition
));
723 status
->curpartition
= next_partition
;
726 next_item_pointer
= status
->hash_table
->buckets
[status
->curbucket
];
730 dsa_get_address(status
->hash_table
->area
, next_item_pointer
);
733 * The caller may delete the item. Store the next item in case of
736 status
->pnextitem
= status
->curitem
->next
;
738 return ENTRY_FROM_ITEM(status
->curitem
);
742 * Terminates the seqscan and release all locks.
744 * Needs to be called after finishing or when exiting a seqscan.
747 dshash_seq_term(dshash_seq_status
*status
)
749 if (status
->curpartition
>= 0)
750 LWLockRelease(PARTITION_LOCK(status
->hash_table
, status
->curpartition
));
754 * Remove the current entry of the seq scan.
757 dshash_delete_current(dshash_seq_status
*status
)
759 dshash_table
*hash_table
= status
->hash_table
;
760 dshash_table_item
*item
= status
->curitem
;
761 size_t partition PG_USED_FOR_ASSERTS_ONLY
;
763 partition
= PARTITION_FOR_HASH(item
->hash
);
765 Assert(status
->exclusive
);
766 Assert(hash_table
->control
->magic
== DSHASH_MAGIC
);
767 Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table
, partition
),
770 delete_item(hash_table
, item
);
774 * Print debugging information about the internal state of the hash table to
775 * stderr. The caller must hold no partition locks.
778 dshash_dump(dshash_table
*hash_table
)
783 Assert(hash_table
->control
->magic
== DSHASH_MAGIC
);
784 ASSERT_NO_PARTITION_LOCKS_HELD_BY_ME(hash_table
);
786 for (i
= 0; i
< DSHASH_NUM_PARTITIONS
; ++i
)
788 Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table
, i
)));
789 LWLockAcquire(PARTITION_LOCK(hash_table
, i
), LW_SHARED
);
792 ensure_valid_bucket_pointers(hash_table
);
795 "hash table size = %zu\n", (size_t) 1 << hash_table
->size_log2
);
796 for (i
= 0; i
< DSHASH_NUM_PARTITIONS
; ++i
)
798 dshash_partition
*partition
= &hash_table
->control
->partitions
[i
];
799 size_t begin
= BUCKET_INDEX_FOR_PARTITION(i
, hash_table
->size_log2
);
800 size_t end
= BUCKET_INDEX_FOR_PARTITION(i
+ 1, hash_table
->size_log2
);
802 fprintf(stderr
, " partition %zu\n", i
);
804 " active buckets (key count = %zu)\n", partition
->count
);
806 for (j
= begin
; j
< end
; ++j
)
809 dsa_pointer bucket
= hash_table
->buckets
[j
];
811 while (DsaPointerIsValid(bucket
))
813 dshash_table_item
*item
;
815 item
= dsa_get_address(hash_table
->area
, bucket
);
820 fprintf(stderr
, " bucket %zu (key count = %zu)\n", j
, count
);
824 for (i
= 0; i
< DSHASH_NUM_PARTITIONS
; ++i
)
825 LWLockRelease(PARTITION_LOCK(hash_table
, i
));
829 * Delete a locked item to which we have a pointer.
832 delete_item(dshash_table
*hash_table
, dshash_table_item
*item
)
834 size_t hash
= item
->hash
;
835 size_t partition
= PARTITION_FOR_HASH(hash
);
837 Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table
, partition
)));
839 if (delete_item_from_bucket(hash_table
, item
,
840 &BUCKET_FOR_HASH(hash_table
, hash
)))
842 Assert(hash_table
->control
->partitions
[partition
].count
> 0);
843 --hash_table
->control
->partitions
[partition
].count
;
852 * Grow the hash table if necessary to the requested number of buckets. The
853 * requested size must be double some previously observed size.
855 * Must be called without any partition lock held.
858 resize(dshash_table
*hash_table
, size_t new_size_log2
)
860 dsa_pointer old_buckets
;
861 dsa_pointer new_buckets_shared
;
862 dsa_pointer
*new_buckets
;
864 size_t new_size
= ((size_t) 1) << new_size_log2
;
868 * Acquire the locks for all lock partitions. This is expensive, but we
869 * shouldn't have to do it many times.
871 for (i
= 0; i
< DSHASH_NUM_PARTITIONS
; ++i
)
873 Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table
, i
)));
875 LWLockAcquire(PARTITION_LOCK(hash_table
, i
), LW_EXCLUSIVE
);
876 if (i
== 0 && hash_table
->control
->size_log2
>= new_size_log2
)
879 * Another backend has already increased the size; we can avoid
880 * obtaining all the locks and return early.
882 LWLockRelease(PARTITION_LOCK(hash_table
, 0));
887 Assert(new_size_log2
== hash_table
->control
->size_log2
+ 1);
889 /* Allocate the space for the new table. */
891 dsa_allocate_extended(hash_table
->area
,
892 sizeof(dsa_pointer
) * new_size
,
893 DSA_ALLOC_HUGE
| DSA_ALLOC_ZERO
);
894 new_buckets
= dsa_get_address(hash_table
->area
, new_buckets_shared
);
897 * We've allocated the new bucket array; all that remains to do now is to
898 * reinsert all items, which amounts to adjusting all the pointers.
900 size
= ((size_t) 1) << hash_table
->control
->size_log2
;
901 for (i
= 0; i
< size
; ++i
)
903 dsa_pointer item_pointer
= hash_table
->buckets
[i
];
905 while (DsaPointerIsValid(item_pointer
))
907 dshash_table_item
*item
;
908 dsa_pointer next_item_pointer
;
910 item
= dsa_get_address(hash_table
->area
, item_pointer
);
911 next_item_pointer
= item
->next
;
912 insert_item_into_bucket(hash_table
, item_pointer
, item
,
913 &new_buckets
[BUCKET_INDEX_FOR_HASH_AND_SIZE(item
->hash
,
915 item_pointer
= next_item_pointer
;
919 /* Swap the hash table into place and free the old one. */
920 old_buckets
= hash_table
->control
->buckets
;
921 hash_table
->control
->buckets
= new_buckets_shared
;
922 hash_table
->control
->size_log2
= new_size_log2
;
923 hash_table
->buckets
= new_buckets
;
924 dsa_free(hash_table
->area
, old_buckets
);
926 /* Release all the locks. */
927 for (i
= 0; i
< DSHASH_NUM_PARTITIONS
; ++i
)
928 LWLockRelease(PARTITION_LOCK(hash_table
, i
));
932 * Make sure that our backend-local bucket pointers are up to date. The
933 * caller must have locked one lock partition, which prevents resize() from
934 * running concurrently.
937 ensure_valid_bucket_pointers(dshash_table
*hash_table
)
939 if (hash_table
->size_log2
!= hash_table
->control
->size_log2
)
941 hash_table
->buckets
= dsa_get_address(hash_table
->area
,
942 hash_table
->control
->buckets
);
943 hash_table
->size_log2
= hash_table
->control
->size_log2
;
948 * Scan a locked bucket for a match, using the provided compare function.
950 static inline dshash_table_item
*
951 find_in_bucket(dshash_table
*hash_table
, const void *key
,
952 dsa_pointer item_pointer
)
954 while (DsaPointerIsValid(item_pointer
))
956 dshash_table_item
*item
;
958 item
= dsa_get_address(hash_table
->area
, item_pointer
);
959 if (equal_keys(hash_table
, key
, ENTRY_FROM_ITEM(item
)))
961 item_pointer
= item
->next
;
967 * Insert an already-allocated item into a bucket.
970 insert_item_into_bucket(dshash_table
*hash_table
,
971 dsa_pointer item_pointer
,
972 dshash_table_item
*item
,
975 Assert(item
== dsa_get_address(hash_table
->area
, item_pointer
));
977 item
->next
= *bucket
;
978 *bucket
= item_pointer
;
982 * Allocate space for an entry with the given key and insert it into the
985 static dshash_table_item
*
986 insert_into_bucket(dshash_table
*hash_table
,
990 dsa_pointer item_pointer
;
991 dshash_table_item
*item
;
993 item_pointer
= dsa_allocate(hash_table
->area
,
994 hash_table
->params
.entry_size
+
995 MAXALIGN(sizeof(dshash_table_item
)));
996 item
= dsa_get_address(hash_table
->area
, item_pointer
);
997 copy_key(hash_table
, ENTRY_FROM_ITEM(item
), key
);
998 insert_item_into_bucket(hash_table
, item_pointer
, item
, bucket
);
1003 * Search a bucket for a matching key and delete it.
1006 delete_key_from_bucket(dshash_table
*hash_table
,
1008 dsa_pointer
*bucket_head
)
1010 while (DsaPointerIsValid(*bucket_head
))
1012 dshash_table_item
*item
;
1014 item
= dsa_get_address(hash_table
->area
, *bucket_head
);
1016 if (equal_keys(hash_table
, key
, ENTRY_FROM_ITEM(item
)))
1021 dsa_free(hash_table
->area
, *bucket_head
);
1022 *bucket_head
= next
;
1026 bucket_head
= &item
->next
;
1032 * Delete the specified item from the bucket.
1035 delete_item_from_bucket(dshash_table
*hash_table
,
1036 dshash_table_item
*item
,
1037 dsa_pointer
*bucket_head
)
1039 while (DsaPointerIsValid(*bucket_head
))
1041 dshash_table_item
*bucket_item
;
1043 bucket_item
= dsa_get_address(hash_table
->area
, *bucket_head
);
1045 if (bucket_item
== item
)
1050 dsa_free(hash_table
->area
, *bucket_head
);
1051 *bucket_head
= next
;
1054 bucket_head
= &bucket_item
->next
;
1060 * Compute the hash value for a key.
1062 static inline dshash_hash
1063 hash_key(dshash_table
*hash_table
, const void *key
)
1065 return hash_table
->params
.hash_function(key
,
1066 hash_table
->params
.key_size
,
1071 * Check whether two keys compare equal.
1074 equal_keys(dshash_table
*hash_table
, const void *a
, const void *b
)
1076 return hash_table
->params
.compare_function(a
, b
,
1077 hash_table
->params
.key_size
,
1078 hash_table
->arg
) == 0;
1085 copy_key(dshash_table
*hash_table
, void *dest
, const void *src
)
1087 hash_table
->params
.copy_function(dest
, src
,
1088 hash_table
->params
.key_size
,