source/blender/blenlib/BLI_probing_strategies.hh

   1 /* SPDX-FileCopyrightText: 2023 Blender Authors
   2  *
   3  * SPDX-License-Identifier: GPL-2.0-or-later */
   4
   5 #pragma once
   6
   7 /** \file
   8  * \ingroup bli
   9  *
  10  * This file implements different probing strategies. Those can be used by different hash table
  11  * implementations like blender::Set and blender::Map. A probing strategy produces a sequence of
  12  * values based on an initial hash value.
  13  *
  14  * A probing strategy has to implement the following methods:
  15  * - Constructor(uint64_t hash): Start a new probing sequence based on the given hash.
  16  * - get() const -> uint64_t: Get the current value in the sequence.
  17  * - next() -> void: Update the internal state, so that the next value can be accessed with get().
  18  * - linear_steps() -> int64_t: Returns number of linear probing steps that should be done.
  19  *
  20  * Using linear probing steps between larger jumps can result in better performance, due to
  21  * improved cache usage. It's a way of getting the benefits or linear probing without the
  22  * clustering issues. However, more linear steps can also make things slower when the initial hash
  23  * produces many collisions.
  24  *
  25  * Every probing strategy has to guarantee that every possible uint64_t is returned eventually.
  26  * This is necessary for correctness. If this is not the case, empty slots might not be found.
  27  *
  28  * The SLOT_PROBING_BEGIN and SLOT_PROBING_END macros can be used to implement a loop that iterates
  29  * over a probing sequence.
  30  *
  31  * Probing strategies can be evaluated with many different criteria. Different use cases often
  32  * have different optimal strategies. Examples:
  33  * - If the hash function generates a well distributed initial hash value, the constructor should
  34  *   be as short as possible. This is because the hash value can be used as slot index almost
  35  *   immediately, without too many collisions. This is also a perfect use case for linear steps.
  36  * - If the hash function is bad, it can help if the probing strategy remixes the hash value,
  37  *   before the first slot is accessed.
  38  * - Different next() methods can remix the hash value in different ways. Depending on which bits
  39  *   of the hash value contain the most information, different rehashing strategies work best.
  40  * - When the hash table is very small, having a trivial hash function and then doing linear
  41  *   probing might work best.
  42  */
  43
  44 #include <limits>
  45
  46 #include "BLI_sys_types.h"
  47
  48 namespace blender {
  49
  50 /**
  51  * The simplest probing strategy. It's bad in most cases, because it produces clusters in the hash
  52  * table, which result in many collisions. However, if the hash function is very good or the hash
  53  * table is small, this strategy might even work best.
  54  */
  55 class LinearProbingStrategy {
  56  private:
  57   uint64_t hash_;
  58
  59  public:
  60   LinearProbingStrategy(const uint64_t hash) : hash_(hash) {}
  61
  62   void next()
  63   {
  64     hash_++;
  65   }
  66
  67   uint64_t get() const
  68   {
  69     return hash_;
  70   }
  71
  72   int64_t linear_steps() const
  73   {
  74     return std::numeric_limits<int64_t>::max();
  75   }
  76 };
  77
  78 /**
  79  * A slightly adapted quadratic probing strategy. The distance to the original slot increases
  80  * quadratically. This method also leads to clustering. Another disadvantage is that not all bits
  81  * of the original hash are used.
  82  *
  83  * The distance i * i is not used, because it does not guarantee, that every slot is hit.
  84  * Instead (i * i + i) / 2 is used, which has this desired property.
  85  *
  86  * In the first few steps, this strategy can have good cache performance. It largely depends on how
  87  * many keys fit into a cache line in the hash table.
  88  */
  89 class QuadraticProbingStrategy {
  90  private:
  91   uint64_t original_hash_;
  92   uint64_t current_hash_;
  93   uint64_t iteration_ = 1;
  94
  95  public:
  96   QuadraticProbingStrategy(const uint64_t hash) : original_hash_(hash), current_hash_(hash) {}
  97
  98   void next()
  99   {
 100     current_hash_ = original_hash_ + ((iteration_ * iteration_ + iteration_) >> 1);
 101     iteration_++;
 102   }
 103
 104   uint64_t get() const
 105   {
 106     return current_hash_;
 107   }
 108
 109   int64_t linear_steps() const
 110   {
 111     return 1;
 112   }
 113 };
 114
 115 /**
 116  * This is the probing strategy used by CPython (in 2020).
 117  *
 118  * It is very fast when the original hash value is good. If there are collisions, more bits of the
 119  * hash value are taken into account.
 120  *
 121  * LinearSteps: Can be set to something larger than 1 for improved cache performance in some cases.
 122  * PreShuffle: When true, the initial call to next() will be done to the constructor. This can help
 123  *   when the hash function has put little information into the lower bits.
 124  */
 125 template<uint64_t LinearSteps = 1, bool PreShuffle = false> class PythonProbingStrategy {
 126  private:
 127   uint64_t hash_;
 128   uint64_t perturb_;
 129
 130  public:
 131   PythonProbingStrategy(const uint64_t hash) : hash_(hash), perturb_(hash)
 132   {
 133     if (PreShuffle) {
 134       this->next();
 135     }
 136   }
 137
 138   void next()
 139   {
 140     perturb_ >>= 5;
 141     hash_ = 5 * hash_ + 1 + perturb_;
 142   }
 143
 144   uint64_t get() const
 145   {
 146     return hash_;
 147   }
 148
 149   int64_t linear_steps() const
 150   {
 151     return LinearSteps;
 152   }
 153 };
 154
 155 /**
 156  * Similar to the Python probing strategy. However, it does a bit more shuffling in the next()
 157  * method. This way more bits are taken into account earlier. After a couple of collisions (that
 158  * should happen rarely), it will fallback to a sequence that hits every slot.
 159  */
 160 template<uint64_t LinearSteps = 2, bool PreShuffle = false> class ShuffleProbingStrategy {
 161  private:
 162   uint64_t hash_;
 163   uint64_t perturb_;
 164
 165  public:
 166   ShuffleProbingStrategy(const uint64_t hash) : hash_(hash), perturb_(hash)
 167   {
 168     if (PreShuffle) {
 169       this->next();
 170     }
 171   }
 172
 173   void next()
 174   {
 175     if (perturb_ != 0) {
 176       perturb_ >>= 10;
 177       hash_ = ((hash_ >> 16) ^ hash_) * 0x45d9f3b + perturb_;
 178     }
 179     else {
 180       hash_ = 5 * hash_ + 1;
 181     }
 182   }
 183
 184   uint64_t get() const
 185   {
 186     return hash_;
 187   }
 188
 189   int64_t linear_steps() const
 190   {
 191     return LinearSteps;
 192   }
 193 };
 194
 195 /**
 196  * Having a specified default is convenient.
 197  */
 198 using DefaultProbingStrategy = PythonProbingStrategy<>;
 199
 200 /* Turning off clang format here, because otherwise it will mess up the alignment between the
 201  * macros. */
 202 // clang-format off
 203
 204 /**
 205  * Both macros together form a loop that iterates over slot indices in a hash table with a
 206  * power-of-two size.
 207  *
 208  * You must not `break` out of this loop. Only `return` is permitted. If you don't return
 209  * out of the loop, it will be an infinite loop. These loops should not be nested within the
 210  * same function.
 211  *
 212  * PROBING_STRATEGY: Class describing the probing strategy.
 213  * HASH: The initial hash as produced by a hash function.
 214  * MASK: A bit mask such that (hash & MASK) is a valid slot index.
 215  * R_SLOT_INDEX: Name of the variable that will contain the slot index.
 216  */
 217 #define SLOT_PROBING_BEGIN(PROBING_STRATEGY, HASH, MASK, R_SLOT_INDEX) \
 218   PROBING_STRATEGY probing_strategy(HASH); \
 219   do { \
 220     int64_t linear_offset = 0; \
 221     uint64_t current_hash = probing_strategy.get(); \
 222     do { \
 223       int64_t R_SLOT_INDEX = int64_t((current_hash + uint64_t(linear_offset)) & MASK);
 224
 225 #define SLOT_PROBING_END() \
 226     } while (++linear_offset < probing_strategy.linear_steps()); \
 227     probing_strategy.next(); \
 228   } while (true)
 229
 230 // clang-format on
 231
 232 }  // namespace blender