1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright 2023 Red Hat
9 * Hash table implementation of a map from integers to pointers, implemented using the Hopscotch
10 * Hashing algorithm by Herlihy, Shavit, and Tzafrir (see
11 * http://en.wikipedia.org/wiki/Hopscotch_hashing). This implementation does not contain any of the
12 * locking/concurrency features of the algorithm, just the collision resolution scheme.
14 * Hopscotch Hashing is based on hashing with open addressing and linear probing. All the entries
15 * are stored in a fixed array of buckets, with no dynamic allocation for collisions. Unlike linear
16 * probing, all the entries that hash to a given bucket are stored within a fixed neighborhood
17 * starting at that bucket. Chaining is effectively represented as a bit vector relative to each
18 * bucket instead of as pointers or explicit offsets.
20 * When an empty bucket cannot be found within a given neighborhood, subsequent neighborhoods are
21 * searched, and one or more entries will "hop" into those neighborhoods. When this process works,
22 * an empty bucket will move into the desired neighborhood, allowing the entry to be added. When
23 * that process fails (typically when the buckets are around 90% full), the table must be resized
24 * and the all entries rehashed and added to the expanded table.
26 * Unlike linear probing, the number of buckets that must be searched in the worst case has a fixed
27 * upper bound (the size of the neighborhood). Those entries occupy a small number of memory cache
28 * lines, leading to improved use of the cache (fewer misses on both successful and unsuccessful
29 * searches). Hopscotch hashing outperforms linear probing at much higher load factors, so even
30 * with the increased memory burden for maintaining the hop vectors, less memory is needed to
31 * achieve that performance. Hopscotch is also immune to "contamination" from deleting entries
32 * since entries are genuinely removed instead of being replaced by a placeholder.
34 * The published description of the algorithm used a bit vector, but the paper alludes to an offset
35 * scheme which is used by this implementation. Since the entries in the neighborhood are within N
36 * entries of the hash bucket at the start of the neighborhood, a pair of small offset fields each
37 * log2(N) bits wide is all that's needed to maintain the hops as a linked list. In order to encode
38 * "no next hop" (i.e. NULL) as the natural initial value of zero, the offsets are biased by one
39 * (i.e. 0 => NULL, 1 => offset=0, 2 => offset=1, etc.) We can represent neighborhoods of up to 255
40 * entries with just 8+8=16 bits per entry. The hop list is sorted by hop offset so the first entry
41 * in the list is always the bucket closest to the start of the neighborhood.
43 * While individual accesses tend to be very fast, the table resize operations are very, very
44 * expensive. If an upper bound on the latency of adding an entry to the table is needed, we either
45 * need to ensure the table is pre-sized to be large enough so no resize is ever needed, or we'll
46 * need to develop an approach to incrementally resize the table.
51 #include <linux/minmax.h>
55 #include "memory-alloc.h"
57 #include "permassert.h"
59 #define DEFAULT_CAPACITY 16 /* the number of neighborhoods in a new table */
60 #define NEIGHBORHOOD 255 /* the number of buckets in each neighborhood */
61 #define MAX_PROBES 1024 /* limit on the number of probes for a free bucket */
62 #define NULL_HOP_OFFSET 0 /* the hop offset value terminating the hop list */
63 #define DEFAULT_LOAD 75 /* a compromise between memory use and performance */
66 * struct bucket - hash bucket
68 * Buckets are packed together to reduce memory usage and improve cache efficiency. It would be
69 * tempting to encode the hop offsets separately and maintain alignment of key/value pairs, but
70 * it's crucial to keep the hop fields near the buckets that they use them so they'll tend to share
75 * @first_hop: The biased offset of the first entry in the hop list of the neighborhood
76 * that hashes to this bucket.
79 /** @next_hop: The biased offset of the next bucket in the hop list. */
81 /** @key: The key stored in this bucket. */
83 /** @value: The value stored in this bucket (NULL if empty). */
88 * struct int_map - The concrete definition of the opaque int_map type.
90 * To avoid having to wrap the neighborhoods of the last entries back around to the start of the
91 * bucket array, we allocate a few more buckets at the end of the array instead, which is why
92 * capacity and bucket_count are different.
95 /** @size: The number of entries stored in the map. */
97 /** @capacity: The number of neighborhoods in the map. */
99 /** @bucket_count: The number of buckets in the bucket array. */
101 /** @buckets: The array of hash buckets. */
102 struct bucket
*buckets
;
106 * mix() - The Google CityHash 16-byte hash mixing function.
107 * @input1: The first input value.
108 * @input2: The second input value.
110 * Return: A hash of the two inputs.
112 static u64
mix(u64 input1
, u64 input2
)
114 static const u64 CITY_MULTIPLIER
= 0x9ddfea08eb382d69ULL
;
115 u64 hash
= (input1
^ input2
);
117 hash
*= CITY_MULTIPLIER
;
118 hash
^= (hash
>> 47);
120 hash
*= CITY_MULTIPLIER
;
121 hash
^= (hash
>> 47);
122 hash
*= CITY_MULTIPLIER
;
127 * hash_key() - Calculate a 64-bit non-cryptographic hash value for the provided 64-bit integer
129 * @key: The mapping key.
131 * The implementation is based on Google's CityHash, only handling the specific case of an 8-byte
134 * Return: The hash of the mapping key.
136 static u64
hash_key(u64 key
)
139 * Aliasing restrictions forbid us from casting pointer types, so use a union to convert a
140 * single u64 to two u32 values.
145 } pun
= {.u64
= key
};
147 return mix(sizeof(key
) + (((u64
) pun
.u32
[0]) << 3), pun
.u32
[1]);
151 * allocate_buckets() - Initialize an int_map.
152 * @map: The map to initialize.
153 * @capacity: The initial capacity of the map.
155 * Return: VDO_SUCCESS or an error code.
157 static int allocate_buckets(struct int_map
*map
, size_t capacity
)
160 map
->capacity
= capacity
;
163 * Allocate NEIGHBORHOOD - 1 extra buckets so the last bucket can have a full neighborhood
164 * without have to wrap back around to element zero.
166 map
->bucket_count
= capacity
+ (NEIGHBORHOOD
- 1);
167 return vdo_allocate(map
->bucket_count
, struct bucket
,
168 "struct int_map buckets", &map
->buckets
);
172 * vdo_int_map_create() - Allocate and initialize an int_map.
173 * @initial_capacity: The number of entries the map should initially be capable of holding (zero
174 * tells the map to use its own small default).
175 * @map_ptr: Output, a pointer to hold the new int_map.
177 * Return: VDO_SUCCESS or an error code.
179 int vdo_int_map_create(size_t initial_capacity
, struct int_map
**map_ptr
)
185 result
= vdo_allocate(1, struct int_map
, "struct int_map", &map
);
186 if (result
!= VDO_SUCCESS
)
189 /* Use the default capacity if the caller did not specify one. */
190 capacity
= (initial_capacity
> 0) ? initial_capacity
: DEFAULT_CAPACITY
;
193 * Scale up the capacity by the specified initial load factor. (i.e to hold 1000 entries at
194 * 80% load we need a capacity of 1250)
196 capacity
= capacity
* 100 / DEFAULT_LOAD
;
198 result
= allocate_buckets(map
, capacity
);
199 if (result
!= VDO_SUCCESS
) {
200 vdo_int_map_free(vdo_forget(map
));
209 * vdo_int_map_free() - Free an int_map.
210 * @map: The int_map to free.
212 * NOTE: The map does not own the pointer values stored in the map and they are not freed by this
215 void vdo_int_map_free(struct int_map
*map
)
220 vdo_free(vdo_forget(map
->buckets
));
221 vdo_free(vdo_forget(map
));
225 * vdo_int_map_size() - Get the number of entries stored in an int_map.
226 * @map: The int_map to query.
228 * Return: The number of entries in the map.
230 size_t vdo_int_map_size(const struct int_map
*map
)
236 * dereference_hop() - Convert a biased hop offset within a neighborhood to a pointer to the bucket
238 * @neighborhood: The first bucket in the neighborhood.
239 * @hop_offset: The biased hop offset to the desired bucket.
241 * Return: NULL if hop_offset is zero, otherwise a pointer to the bucket in the neighborhood at
244 static struct bucket
*dereference_hop(struct bucket
*neighborhood
, unsigned int hop_offset
)
246 BUILD_BUG_ON(NULL_HOP_OFFSET
!= 0);
247 if (hop_offset
== NULL_HOP_OFFSET
)
250 return &neighborhood
[hop_offset
- 1];
254 * insert_in_hop_list() - Add a bucket into the hop list for the neighborhood.
255 * @neighborhood: The first bucket in the neighborhood.
256 * @new_bucket: The bucket to add to the hop list.
258 * The bucket is inserted it into the list so the hop list remains sorted by hop offset.
260 static void insert_in_hop_list(struct bucket
*neighborhood
, struct bucket
*new_bucket
)
262 /* Zero indicates a NULL hop offset, so bias the hop offset by one. */
263 int hop_offset
= 1 + (new_bucket
- neighborhood
);
265 /* Handle the special case of adding a bucket at the start of the list. */
266 int next_hop
= neighborhood
->first_hop
;
268 if ((next_hop
== NULL_HOP_OFFSET
) || (next_hop
> hop_offset
)) {
269 new_bucket
->next_hop
= next_hop
;
270 neighborhood
->first_hop
= hop_offset
;
274 /* Search the hop list for the insertion point that maintains the sort order. */
276 struct bucket
*bucket
= dereference_hop(neighborhood
, next_hop
);
278 next_hop
= bucket
->next_hop
;
280 if ((next_hop
== NULL_HOP_OFFSET
) || (next_hop
> hop_offset
)) {
281 new_bucket
->next_hop
= next_hop
;
282 bucket
->next_hop
= hop_offset
;
289 * select_bucket() - Select and return the hash bucket for a given search key.
290 * @map: The map to search.
291 * @key: The mapping key.
293 static struct bucket
*select_bucket(const struct int_map
*map
, u64 key
)
296 * Calculate a good hash value for the provided key. We want exactly 32 bits, so mask the
299 u64 hash
= hash_key(key
) & 0xFFFFFFFF;
302 * Scale the 32-bit hash to a bucket index by treating it as a binary fraction and
303 * multiplying that by the capacity. If the hash is uniformly distributed over [0 ..
304 * 2^32-1], then (hash * capacity / 2^32) should be uniformly distributed over [0 ..
305 * capacity-1]. The multiply and shift is much faster than a divide (modulus) on X86 CPUs.
307 return &map
->buckets
[(hash
* map
->capacity
) >> 32];
311 * search_hop_list() - Search the hop list associated with given hash bucket for a given search
313 * @bucket: The map bucket to search for the key.
314 * @key: The mapping key.
315 * @previous_ptr: Output. if not NULL, a pointer in which to store the bucket in the list preceding
316 * the one that had the matching key
318 * If the key is found, returns a pointer to the entry (bucket or collision), otherwise returns
321 * Return: An entry that matches the key, or NULL if not found.
323 static struct bucket
*search_hop_list(struct bucket
*bucket
, u64 key
,
324 struct bucket
**previous_ptr
)
326 struct bucket
*previous
= NULL
;
327 unsigned int next_hop
= bucket
->first_hop
;
329 while (next_hop
!= NULL_HOP_OFFSET
) {
331 * Check the neighboring bucket indexed by the offset for the
334 struct bucket
*entry
= dereference_hop(bucket
, next_hop
);
336 if ((key
== entry
->key
) && (entry
->value
!= NULL
)) {
337 if (previous_ptr
!= NULL
)
338 *previous_ptr
= previous
;
341 next_hop
= entry
->next_hop
;
349 * vdo_int_map_get() - Retrieve the value associated with a given key from the int_map.
350 * @map: The int_map to query.
351 * @key: The key to look up.
353 * Return: The value associated with the given key, or NULL if the key is not mapped to any value.
355 void *vdo_int_map_get(struct int_map
*map
, u64 key
)
357 struct bucket
*match
= search_hop_list(select_bucket(map
, key
), key
, NULL
);
359 return ((match
!= NULL
) ? match
->value
: NULL
);
363 * resize_buckets() - Increase the number of hash buckets.
364 * @map: The map to resize.
366 * Resizes and rehashes all the existing entries, storing them in the new buckets.
368 * Return: VDO_SUCCESS or an error code.
370 static int resize_buckets(struct int_map
*map
)
375 /* Copy the top-level map data to the stack. */
376 struct int_map old_map
= *map
;
378 /* Re-initialize the map to be empty and 50% larger. */
379 size_t new_capacity
= map
->capacity
/ 2 * 3;
381 vdo_log_info("%s: attempting resize from %zu to %zu, current size=%zu",
382 __func__
, map
->capacity
, new_capacity
, map
->size
);
383 result
= allocate_buckets(map
, new_capacity
);
384 if (result
!= VDO_SUCCESS
) {
389 /* Populate the new hash table from the entries in the old bucket array. */
390 for (i
= 0; i
< old_map
.bucket_count
; i
++) {
391 struct bucket
*entry
= &old_map
.buckets
[i
];
393 if (entry
->value
== NULL
)
396 result
= vdo_int_map_put(map
, entry
->key
, entry
->value
, true, NULL
);
397 if (result
!= VDO_SUCCESS
) {
398 /* Destroy the new partial map and restore the map from the stack. */
399 vdo_free(vdo_forget(map
->buckets
));
405 /* Destroy the old bucket array. */
406 vdo_free(vdo_forget(old_map
.buckets
));
411 * find_empty_bucket() - Probe the bucket array starting at the given bucket for the next empty
412 * bucket, returning a pointer to it.
413 * @map: The map containing the buckets to search.
414 * @bucket: The bucket at which to start probing.
415 * @max_probes: The maximum number of buckets to search.
417 * NULL will be returned if the search reaches the end of the bucket array or if the number of
418 * linear probes exceeds a specified limit.
420 * Return: The next empty bucket, or NULL if the search failed.
422 static struct bucket
*
423 find_empty_bucket(struct int_map
*map
, struct bucket
*bucket
, unsigned int max_probes
)
426 * Limit the search to either the nearer of the end of the bucket array or a fixed distance
427 * beyond the initial bucket.
429 ptrdiff_t remaining
= &map
->buckets
[map
->bucket_count
] - bucket
;
430 struct bucket
*sentinel
= &bucket
[min_t(ptrdiff_t, remaining
, max_probes
)];
431 struct bucket
*entry
;
433 for (entry
= bucket
; entry
< sentinel
; entry
++) {
434 if (entry
->value
== NULL
)
442 * move_empty_bucket() - Move an empty bucket closer to the start of the bucket array.
443 * @hole: The empty bucket to fill with an entry that precedes it in one of its enclosing
446 * This searches the neighborhoods that contain the empty bucket for a non-empty bucket closer to
447 * the start of the array. If such a bucket is found, this swaps the two buckets by moving the
448 * entry to the empty bucket.
450 * Return: The bucket that was vacated by moving its entry to the provided hole, or NULL if no
451 * entry could be moved.
453 static struct bucket
*move_empty_bucket(struct bucket
*hole
)
456 * Examine every neighborhood that the empty bucket is part of, starting with the one in
457 * which it is the last bucket. No boundary check is needed for the negative array
458 * arithmetic since this function is only called when hole is at least NEIGHBORHOOD cells
459 * deeper into the array than a valid bucket.
461 struct bucket
*bucket
;
463 for (bucket
= &hole
[1 - NEIGHBORHOOD
]; bucket
< hole
; bucket
++) {
465 * Find the entry that is nearest to the bucket, which means it will be nearest to
466 * the hash bucket whose neighborhood is full.
468 struct bucket
*new_hole
= dereference_hop(bucket
, bucket
->first_hop
);
470 if (new_hole
== NULL
) {
472 * There are no buckets in this neighborhood that are in use by this one
473 * (they must all be owned by overlapping neighborhoods).
479 * Skip this bucket if its first entry is actually further away than the hole that
480 * we're already trying to fill.
486 * We've found an entry in this neighborhood that we can "hop" further away, moving
487 * the hole closer to the hash bucket, if not all the way into its neighborhood.
491 * The entry that will be the new hole is the first bucket in the list, so setting
492 * first_hop is all that's needed remove it from the list.
494 bucket
->first_hop
= new_hole
->next_hop
;
495 new_hole
->next_hop
= NULL_HOP_OFFSET
;
497 /* Move the entry into the original hole. */
498 hole
->key
= new_hole
->key
;
499 hole
->value
= new_hole
->value
;
500 new_hole
->value
= NULL
;
502 /* Insert the filled hole into the hop list for the neighborhood. */
503 insert_in_hop_list(bucket
, hole
);
507 /* We couldn't find an entry to relocate to the hole. */
512 * update_mapping() - Find and update any existing mapping for a given key, returning the value
513 * associated with the key in the provided pointer.
514 * @neighborhood: The first bucket in the neighborhood that would contain the search key
515 * @key: The key with which to associate the new value.
516 * @new_value: The value to be associated with the key.
517 * @update: Whether to overwrite an existing value.
518 * @old_value_ptr: a pointer in which to store the old value (unmodified if no mapping was found)
520 * Return: true if the map contains a mapping for the key, false if it does not.
522 static bool update_mapping(struct bucket
*neighborhood
, u64 key
, void *new_value
,
523 bool update
, void **old_value_ptr
)
525 struct bucket
*bucket
= search_hop_list(neighborhood
, key
, NULL
);
527 if (bucket
== NULL
) {
528 /* There is no bucket containing the key in the neighborhood. */
533 * Return the value of the current mapping (if desired) and update the mapping with the new
534 * value (if desired).
536 if (old_value_ptr
!= NULL
)
537 *old_value_ptr
= bucket
->value
;
539 bucket
->value
= new_value
;
544 * find_or_make_vacancy() - Find an empty bucket.
545 * @map: The int_map to search or modify.
546 * @neighborhood: The first bucket in the neighborhood in which an empty bucket is needed for a new
549 * Find an empty bucket in a specified neighborhood for a new mapping or attempt to re-arrange
550 * mappings so there is such a bucket. This operation may fail (returning NULL) if an empty bucket
551 * is not available or could not be relocated to the neighborhood.
553 * Return: a pointer to an empty bucket in the desired neighborhood, or NULL if a vacancy could not
554 * be found or arranged.
556 static struct bucket
*find_or_make_vacancy(struct int_map
*map
,
557 struct bucket
*neighborhood
)
559 /* Probe within and beyond the neighborhood for the first empty bucket. */
560 struct bucket
*hole
= find_empty_bucket(map
, neighborhood
, MAX_PROBES
);
563 * Keep trying until the empty bucket is in the bucket's neighborhood or we are unable to
564 * move it any closer by swapping it with a filled bucket.
566 while (hole
!= NULL
) {
567 int distance
= hole
- neighborhood
;
569 if (distance
< NEIGHBORHOOD
) {
571 * We've found or relocated an empty bucket close enough to the initial
572 * hash bucket to be referenced by its hop vector.
578 * The nearest empty bucket isn't within the neighborhood that must contain the new
579 * entry, so try to swap it with bucket that is closer.
581 hole
= move_empty_bucket(hole
);
588 * vdo_int_map_put() - Try to associate a value with an integer.
589 * @map: The int_map to attempt to modify.
590 * @key: The key with which to associate the new value.
591 * @new_value: The value to be associated with the key.
592 * @update: Whether to overwrite an existing value.
593 * @old_value_ptr: A pointer in which to store either the old value (if the key was already mapped)
594 * or NULL if the map did not contain the key; NULL may be provided if the caller
595 * does not need to know the old value
597 * Try to associate a value (a pointer) with an integer in an int_map. If the map already contains
598 * a mapping for the provided key, the old value is only replaced with the specified value if
599 * update is true. In either case the old value is returned. If the map does not already contain a
600 * value for the specified key, the new value is added regardless of the value of update.
602 * Return: VDO_SUCCESS or an error code.
604 int vdo_int_map_put(struct int_map
*map
, u64 key
, void *new_value
, bool update
,
605 void **old_value_ptr
)
607 struct bucket
*neighborhood
, *bucket
;
609 if (unlikely(new_value
== NULL
))
613 * Select the bucket at the start of the neighborhood that must contain any entry for the
616 neighborhood
= select_bucket(map
, key
);
619 * Check whether the neighborhood already contains an entry for the key, in which case we
620 * optionally update it, returning the old value.
622 if (update_mapping(neighborhood
, key
, new_value
, update
, old_value_ptr
))
626 * Find an empty bucket in the desired neighborhood for the new entry or re-arrange entries
627 * in the map so there is such a bucket. This operation will usually succeed; the loop body
628 * will only be executed on the rare occasions that we have to resize the map.
630 while ((bucket
= find_or_make_vacancy(map
, neighborhood
)) == NULL
) {
634 * There is no empty bucket in which to put the new entry in the current map, so
635 * we're forced to allocate a new bucket array with a larger capacity, re-hash all
636 * the entries into those buckets, and try again (a very expensive operation for
639 result
= resize_buckets(map
);
640 if (result
!= VDO_SUCCESS
)
644 * Resizing the map invalidates all pointers to buckets, so recalculate the
645 * neighborhood pointer.
647 neighborhood
= select_bucket(map
, key
);
650 /* Put the new entry in the empty bucket, adding it to the neighborhood. */
652 bucket
->value
= new_value
;
653 insert_in_hop_list(neighborhood
, bucket
);
656 /* There was no existing entry, so there was no old value to be returned. */
657 if (old_value_ptr
!= NULL
)
658 *old_value_ptr
= NULL
;
663 * vdo_int_map_remove() - Remove the mapping for a given key from the int_map.
664 * @map: The int_map from which to remove the mapping.
665 * @key: The key whose mapping is to be removed.
667 * Return: the value that was associated with the key, or NULL if it was not mapped.
669 void *vdo_int_map_remove(struct int_map
*map
, u64 key
)
673 /* Select the bucket to search and search it for an existing entry. */
674 struct bucket
*bucket
= select_bucket(map
, key
);
675 struct bucket
*previous
;
676 struct bucket
*victim
= search_hop_list(bucket
, key
, &previous
);
678 if (victim
== NULL
) {
679 /* There is no matching entry to remove. */
684 * We found an entry to remove. Save the mapped value to return later and empty the bucket.
687 value
= victim
->value
;
688 victim
->value
= NULL
;
691 /* The victim bucket is now empty, but it still needs to be spliced out of the hop list. */
692 if (previous
== NULL
) {
693 /* The victim is the head of the list, so swing first_hop. */
694 bucket
->first_hop
= victim
->next_hop
;
696 previous
->next_hop
= victim
->next_hop
;
699 victim
->next_hop
= NULL_HOP_OFFSET
;