Cleanup: Subdiv: Remove common_ prefix
[blender.git] / source / blender / blenlib / BLI_probing_strategies.hh
blob4849c3180388d2b854fb25417b88e86c54e7ae45
1 /* SPDX-FileCopyrightText: 2023 Blender Authors
3 * SPDX-License-Identifier: GPL-2.0-or-later */
5 #pragma once
7 /** \file
8 * \ingroup bli
10 * This file implements different probing strategies. Those can be used by different hash table
11 * implementations like blender::Set and blender::Map. A probing strategy produces a sequence of
12 * values based on an initial hash value.
14 * A probing strategy has to implement the following methods:
15 * - Constructor(uint64_t hash): Start a new probing sequence based on the given hash.
16 * - get() const -> uint64_t: Get the current value in the sequence.
17 * - next() -> void: Update the internal state, so that the next value can be accessed with get().
18 * - linear_steps() -> int64_t: Returns number of linear probing steps that should be done.
20 * Using linear probing steps between larger jumps can result in better performance, due to
21 * improved cache usage. It's a way of getting the benefits or linear probing without the
22 * clustering issues. However, more linear steps can also make things slower when the initial hash
23 * produces many collisions.
25 * Every probing strategy has to guarantee that every possible uint64_t is returned eventually.
26 * This is necessary for correctness. If this is not the case, empty slots might not be found.
28 * The SLOT_PROBING_BEGIN and SLOT_PROBING_END macros can be used to implement a loop that iterates
29 * over a probing sequence.
31 * Probing strategies can be evaluated with many different criteria. Different use cases often
32 * have different optimal strategies. Examples:
33 * - If the hash function generates a well distributed initial hash value, the constructor should
34 * be as short as possible. This is because the hash value can be used as slot index almost
35 * immediately, without too many collisions. This is also a perfect use case for linear steps.
36 * - If the hash function is bad, it can help if the probing strategy remixes the hash value,
37 * before the first slot is accessed.
38 * - Different next() methods can remix the hash value in different ways. Depending on which bits
39 * of the hash value contain the most information, different rehashing strategies work best.
40 * - When the hash table is very small, having a trivial hash function and then doing linear
41 * probing might work best.
44 #include <limits>
46 #include "BLI_sys_types.h"
48 namespace blender {
50 /**
51 * The simplest probing strategy. It's bad in most cases, because it produces clusters in the hash
52 * table, which result in many collisions. However, if the hash function is very good or the hash
53 * table is small, this strategy might even work best.
55 class LinearProbingStrategy {
56 private:
57 uint64_t hash_;
59 public:
60 LinearProbingStrategy(const uint64_t hash) : hash_(hash) {}
62 void next()
64 hash_++;
67 uint64_t get() const
69 return hash_;
72 int64_t linear_steps() const
74 return std::numeric_limits<int64_t>::max();
78 /**
79 * A slightly adapted quadratic probing strategy. The distance to the original slot increases
80 * quadratically. This method also leads to clustering. Another disadvantage is that not all bits
81 * of the original hash are used.
83 * The distance i * i is not used, because it does not guarantee, that every slot is hit.
84 * Instead (i * i + i) / 2 is used, which has this desired property.
86 * In the first few steps, this strategy can have good cache performance. It largely depends on how
87 * many keys fit into a cache line in the hash table.
89 class QuadraticProbingStrategy {
90 private:
91 uint64_t original_hash_;
92 uint64_t current_hash_;
93 uint64_t iteration_ = 1;
95 public:
96 QuadraticProbingStrategy(const uint64_t hash) : original_hash_(hash), current_hash_(hash) {}
98 void next()
100 current_hash_ = original_hash_ + ((iteration_ * iteration_ + iteration_) >> 1);
101 iteration_++;
104 uint64_t get() const
106 return current_hash_;
109 int64_t linear_steps() const
111 return 1;
116 * This is the probing strategy used by CPython (in 2020).
118 * It is very fast when the original hash value is good. If there are collisions, more bits of the
119 * hash value are taken into account.
121 * LinearSteps: Can be set to something larger than 1 for improved cache performance in some cases.
122 * PreShuffle: When true, the initial call to next() will be done to the constructor. This can help
123 * when the hash function has put little information into the lower bits.
125 template<uint64_t LinearSteps = 1, bool PreShuffle = false> class PythonProbingStrategy {
126 private:
127 uint64_t hash_;
128 uint64_t perturb_;
130 public:
131 PythonProbingStrategy(const uint64_t hash) : hash_(hash), perturb_(hash)
133 if (PreShuffle) {
134 this->next();
138 void next()
140 perturb_ >>= 5;
141 hash_ = 5 * hash_ + 1 + perturb_;
144 uint64_t get() const
146 return hash_;
149 int64_t linear_steps() const
151 return LinearSteps;
156 * Similar to the Python probing strategy. However, it does a bit more shuffling in the next()
157 * method. This way more bits are taken into account earlier. After a couple of collisions (that
158 * should happen rarely), it will fallback to a sequence that hits every slot.
160 template<uint64_t LinearSteps = 2, bool PreShuffle = false> class ShuffleProbingStrategy {
161 private:
162 uint64_t hash_;
163 uint64_t perturb_;
165 public:
166 ShuffleProbingStrategy(const uint64_t hash) : hash_(hash), perturb_(hash)
168 if (PreShuffle) {
169 this->next();
173 void next()
175 if (perturb_ != 0) {
176 perturb_ >>= 10;
177 hash_ = ((hash_ >> 16) ^ hash_) * 0x45d9f3b + perturb_;
179 else {
180 hash_ = 5 * hash_ + 1;
184 uint64_t get() const
186 return hash_;
189 int64_t linear_steps() const
191 return LinearSteps;
196 * Having a specified default is convenient.
198 using DefaultProbingStrategy = PythonProbingStrategy<>;
200 /* Turning off clang format here, because otherwise it will mess up the alignment between the
201 * macros. */
202 // clang-format off
205 * Both macros together form a loop that iterates over slot indices in a hash table with a
206 * power-of-two size.
208 * You must not `break` out of this loop. Only `return` is permitted. If you don't return
209 * out of the loop, it will be an infinite loop. These loops should not be nested within the
210 * same function.
212 * PROBING_STRATEGY: Class describing the probing strategy.
213 * HASH: The initial hash as produced by a hash function.
214 * MASK: A bit mask such that (hash & MASK) is a valid slot index.
215 * R_SLOT_INDEX: Name of the variable that will contain the slot index.
217 #define SLOT_PROBING_BEGIN(PROBING_STRATEGY, HASH, MASK, R_SLOT_INDEX) \
218 PROBING_STRATEGY probing_strategy(HASH); \
219 do { \
220 int64_t linear_offset = 0; \
221 uint64_t current_hash = probing_strategy.get(); \
222 do { \
223 int64_t R_SLOT_INDEX = int64_t((current_hash + uint64_t(linear_offset)) & MASK);
225 #define SLOT_PROBING_END() \
226 } while (++linear_offset < probing_strategy.linear_steps()); \
227 probing_strategy.next(); \
228 } while (true)
230 // clang-format on
232 } // namespace blender