2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \libinternal \file
37 * Defines structures and functions for mapping from keys to entries
38 * indices using a hash table.
39 * The functions are performance critical and should be inlined.
42 * \ingroup module_domdec
44 * \author Berk Hess <hess@kth.se>
47 #ifndef GMX_DOMDEC_HASHEDMAP_H
48 #define GMX_DOMDEC_HASHEDMAP_H
56 #include "gromacs/compat/utility.h"
57 #include "gromacs/utility/basedefinitions.h"
58 #include "gromacs/utility/exceptions.h"
63 /*! \libinternal \brief Unordered key to value mapping
65 * Efficiently manages mapping from integer keys to values.
66 * Note that this basically implements a subset of the functionality of
67 * std::unordered_map, but is an order of magnitude faster.
73 /*! \libinternal \brief Structure for the key/value hash table */
76 int key
= -1; /**< The key */
77 T value
; /**< The value(s) */
78 int next
= -1; /**< Index in the list of the next element with the same hash, -1 if none */
81 /*! \brief The table size is set to at least this factor time the nr of keys */
82 static constexpr float c_relTableSizeSetMin
= 1.5;
83 /*! \brief Threshold for increasing the table size */
84 static constexpr float c_relTableSizeThresholdMin
= 1.3;
85 /*! \brief Threshold for decreasing the table size */
86 static constexpr float c_relTableSizeThresholdMax
= 3.5;
88 /*! \brief Resizes the table
90 * \param[in] numElementsEstimate An estimate of the number of elements that will be stored
92 void resize(int numElementsEstimate
)
94 GMX_RELEASE_ASSERT(numElements_
== 0, "Table needs to be empty for resize");
96 /* The fraction of table entries with 0 size lists is e^-f.
97 * The fraction of table entries with >=1 size lists is 1 - e^-f
98 * where f is: the #elements / tableSize
99 * The fraction of elements not in the direct list is: 1 - (1 - e^-f)/f.
100 * Thus the optimal table size is roughly double #elements.
102 /* Make the hash table a power of 2 and at least 1.5 * #elements */
104 while (tableSize
<= INT_MAX
/ 2
105 && static_cast<float>(numElementsEstimate
) * c_relTableSizeSetMin
> tableSize
)
109 table_
.resize(tableSize
);
111 /* Table size is a power of 2, so a binary mask gives the hash */
112 bitMask_
= tableSize
- 1;
113 startIndexForSpaceForListEntry_
= tableSize
;
117 /*! \brief Constructor
119 * \param[in] numElementsEstimate An estimate of the number of elements that will be stored, used for optimizing initial performance
121 * Note that the estimate of the number of elements is only relevant
122 * for the performance up until the first call to clear(), after which
123 * table size is optimized based on the actual number of elements.
125 HashedMap(int numElementsEstimate
) { resize(numElementsEstimate
); }
127 /*! \brief Returns the number of elements */
128 int size() const { return numElements_
; }
130 /*! \brief Returns the number of buckets, i.e. the number of possible hashes */
131 int bucket_count() const { return bitMask_
+ 1; }
134 /*! \brief Inserts or assigns a key and value
136 * \tparam allowAssign Sets whether assignment of a key that is present is allowed
137 * \param[in] key The key for the entry
138 * \param[in] value The value for the entry
139 * \throws InvalidInputError from a debug build when attempting to insert a duplicate key with \p allowAssign=true
141 // cppcheck-suppress unusedPrivateFunction
142 template<bool allowAssign
>
143 void insert_assign(int key
, const T
& value
)
145 size_t ind
= (key
& bitMask_
);
147 if (table_
[ind
].key
>= 0)
149 /* Loop over the entries for this hash.
150 * If we find the matching key, return the value.
153 if (table_
[ind_prev
].key
== key
)
157 table_
[ind
].value
= value
;
162 // Note: This is performance critical, so we only throw in debug mode
164 GMX_THROW(InvalidInputError("Attempt to insert duplicate key"));
168 while (table_
[ind_prev
].next
>= 0)
170 ind_prev
= table_
[ind_prev
].next
;
171 if (table_
[ind_prev
].key
== key
)
175 table_
[ind_prev
].value
= value
;
181 GMX_THROW(InvalidInputError("Attempt to insert duplicate key"));
186 /* Search for space in table_ */
187 ind
= startIndexForSpaceForListEntry_
;
188 while (ind
< table_
.size() && table_
[ind
].key
>= 0)
192 /* If we are at the end of the list we need to increase the size */
193 if (ind
== table_
.size())
195 table_
.resize(table_
.size() + 1);
197 table_
[ind_prev
].next
= ind
;
199 startIndexForSpaceForListEntry_
= ind
+ 1;
202 table_
[ind
].key
= key
;
203 table_
[ind
].value
= value
;
209 /*! \brief Inserts entry, key should not already be present
211 * \param[in] key The key for the entry
212 * \param[in] value The value for the entry
213 * \throws InvalidInputError from a debug build when attempting to inser */
214 void insert(int key
, const T
& value
) { insert_assign
<false>(key
, value
); }
216 /*! \brief Inserts an entry when the key is not present, otherwise sets the value
218 * \param[in] key The key for the entry
219 * \param[in] value The value for the entry
221 void insert_or_assign(int key
, const T
& value
) { insert_assign
<true>(key
, value
); }
223 /*! \brief Delete the entry for key \p key, when present
225 * \param[in] key The key
230 int ind
= (key
& bitMask_
);
233 if (table_
[ind
].key
== key
)
237 table_
[ind_prev
].next
= table_
[ind
].next
;
239 /* This index is a linked entry, so we free an entry.
240 * Check if we are creating the first empty space.
242 if (ind
< startIndexForSpaceForListEntry_
)
244 startIndexForSpaceForListEntry_
= ind
;
247 table_
[ind
].key
= -1;
248 table_
[ind
].next
= -1;
255 ind
= table_
[ind
].next
;
259 /*! \brief Returns a pointer to the value for the given key or nullptr when not present
261 * \todo Use std::as_const when CUDA 11 is a requirement.
263 * \param[in] key The key
264 * \return a pointer to value for the given key or nullptr when not present
266 T
* find(int key
) { return const_cast<T
*>(gmx::compat::as_const(*this).find(key
)); }
268 /*! \brief Returns a pointer to the value for the given key or nullptr when not present
270 * \param[in] key The key
271 * \return a pointer to value for the given key or nullptr when not present
273 const T
* find(int key
) const
275 int ind
= (key
& bitMask_
);
278 if (table_
[ind
].key
== key
)
280 return &table_
[ind
].value
;
282 ind
= table_
[ind
].next
;
288 /*! \brief Clear all the entries in the list
290 * Also optimizes the size of the table based on the current
291 * number of elements stored.
295 const int oldNumElements
= numElements_
;
297 for (hashEntry
& entry
: table_
)
302 startIndexForSpaceForListEntry_
= bucket_count();
305 /* Resize the hash table when the occupation is far from optimal.
306 * Do not resize with 0 elements to avoid minimal size when clear()
307 * is called twice in a row.
309 if (oldNumElements
> 0
310 && (oldNumElements
* c_relTableSizeThresholdMax
< bucket_count()
311 || oldNumElements
* c_relTableSizeThresholdMin
> bucket_count()))
313 resize(oldNumElements
);
318 /*! \brief The hash table list */
319 std::vector
<hashEntry
> table_
;
320 /*! \brief The bit mask for computing the hash of a key */
322 /*! \brief Index in table_ at which to start looking for empty space for a new linked list entry */
323 int startIndexForSpaceForListEntry_
= 0;
324 /*! \brief The number of elements currently stored in the table */
325 int numElements_
= 0;