Move important information up in -Si output
[pacman-ng.git] / lib / libalpm / pkghash.c
blob7b388004587015ef9be7bb634b5f5088cd483bbc
1 /*
2 * pkghash.c
4 * Copyright (c) 2011-2012 Pacman Development Team <pacman-dev@archlinux.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include <errno.h>
22 #include "pkghash.h"
23 #include "util.h"
25 /* List of primes for possible sizes of hash tables.
27 * The maximum table size is the last prime under 1,000,000. That is
28 * more than an order of magnitude greater than the number of packages
29 * in any Linux distribution, and well under UINT_MAX.
31 static const unsigned int prime_list[] =
33 11u, 13u, 17u, 19u, 23u, 29u, 31u, 37u, 41u, 43u, 47u,
34 53u, 59u, 61u, 67u, 71u, 73u, 79u, 83u, 89u, 97u, 103u,
35 109u, 113u, 127u, 137u, 139u, 149u, 157u, 167u, 179u, 193u,
36 199u, 211u, 227u, 241u, 257u, 277u, 293u, 313u, 337u, 359u,
37 383u, 409u, 439u, 467u, 503u, 541u, 577u, 619u, 661u, 709u,
38 761u, 823u, 887u, 953u, 1031u, 1109u, 1193u, 1289u, 1381u,
39 1493u, 1613u, 1741u, 1879u, 2029u, 2179u, 2357u, 2549u,
40 2753u, 2971u, 3209u, 3469u, 3739u, 4027u, 4349u, 4703u,
41 5087u, 5503u, 5953u, 6427u, 6949u, 7517u, 8123u, 8783u,
42 9497u, 10273u, 11113u, 12011u, 12983u, 14033u, 15173u,
43 16411u, 17749u, 19183u, 20753u, 22447u, 24281u, 26267u,
44 28411u, 30727u, 33223u, 35933u, 38873u, 42043u, 45481u,
45 49201u, 53201u, 57557u, 62233u, 67307u, 72817u, 78779u,
46 85229u, 92203u, 99733u, 107897u, 116731u, 126271u, 136607u,
47 147793u, 159871u, 172933u, 187091u, 202409u, 218971u, 236897u,
48 256279u, 277261u, 299951u, 324503u, 351061u, 379787u, 410857u,
49 444487u, 480881u, 520241u, 562841u, 608903u, 658753u, 712697u,
50 771049u, 834181u, 902483u, 976369u
53 /* How far forward do we look when linear probing for a spot? */
54 static const unsigned int stride = 1;
55 /* What is the maximum load percentage of our hash table? */
56 static const double max_hash_load = 0.68;
57 /* Initial load percentage given a certain size */
58 static const double initial_hash_load = 0.58;
60 /* Allocate a hash table with space for at least "size" elements */
61 alpm_pkghash_t *_alpm_pkghash_create(unsigned int size)
63 alpm_pkghash_t *hash = NULL;
64 unsigned int i, loopsize;
66 CALLOC(hash, 1, sizeof(alpm_pkghash_t), return NULL);
67 size = size / initial_hash_load + 1;
69 loopsize = sizeof(prime_list) / sizeof(*prime_list);
70 for(i = 0; i < loopsize; i++) {
71 if(prime_list[i] > size) {
72 hash->buckets = prime_list[i];
73 hash->limit = hash->buckets * max_hash_load;
74 break;
78 if(hash->buckets < size) {
79 errno = ERANGE;
80 free(hash);
81 return NULL;
84 CALLOC(hash->hash_table, hash->buckets, sizeof(alpm_list_t *), \
85 free(hash); return NULL);
87 return hash;
90 static unsigned int get_hash_position(unsigned long name_hash,
91 alpm_pkghash_t *hash)
93 unsigned int position;
95 position = name_hash % hash->buckets;
97 /* collision resolution using open addressing with linear probing */
98 while(hash->hash_table[position] != NULL) {
99 position += stride;
100 while(position >= hash->buckets) {
101 position -= hash->buckets;
105 return position;
108 /* Expand the hash table size to the next increment and rebin the entries */
109 static alpm_pkghash_t *rehash(alpm_pkghash_t *oldhash)
111 alpm_pkghash_t *newhash;
112 unsigned int newsize, i;
114 /* Hash tables will need resized in two cases:
115 * - adding packages to the local database
116 * - poor estimation of the number of packages in sync database
118 * For small hash tables sizes (<500) the increase in size is by a
119 * minimum of a factor of 2 for optimal rehash efficiency. For
120 * larger database sizes, this increase is reduced to avoid excess
121 * memory allocation as both scenarios requiring a rehash should not
122 * require a table size increase that large. */
123 if(oldhash->buckets < 500) {
124 newsize = oldhash->buckets * 2;
125 } else if(oldhash->buckets < 2000) {
126 newsize = oldhash->buckets * 3 / 2;
127 } else if(oldhash->buckets < 5000) {
128 newsize = oldhash->buckets * 4 / 3;
129 } else {
130 newsize = oldhash->buckets + 1;
133 newhash = _alpm_pkghash_create(newsize);
134 if(newhash == NULL) {
135 /* creation of newhash failed, stick with old one... */
136 return oldhash;
139 newhash->list = oldhash->list;
140 oldhash->list = NULL;
142 for(i = 0; i < oldhash->buckets; i++) {
143 if(oldhash->hash_table[i] != NULL) {
144 alpm_pkg_t *package = oldhash->hash_table[i]->data;
145 unsigned int position = get_hash_position(package->name_hash, newhash);
147 newhash->hash_table[position] = oldhash->hash_table[i];
148 oldhash->hash_table[i] = NULL;
152 newhash->entries = oldhash->entries;
154 _alpm_pkghash_free(oldhash);
156 return newhash;
159 static alpm_pkghash_t *pkghash_add_pkg(alpm_pkghash_t *hash, alpm_pkg_t *pkg,
160 int sorted)
162 alpm_list_t *ptr;
163 unsigned int position;
165 if(pkg == NULL || hash == NULL) {
166 return hash;
169 if(hash->entries >= hash->limit) {
170 hash = rehash(hash);
173 position = get_hash_position(pkg->name_hash, hash);
175 ptr = malloc(sizeof(alpm_list_t));
176 if(ptr == NULL) {
177 return hash;
180 ptr->data = pkg;
181 ptr->prev = ptr;
182 ptr->next = NULL;
184 hash->hash_table[position] = ptr;
185 if(!sorted) {
186 hash->list = alpm_list_join(hash->list, ptr);
187 } else {
188 hash->list = alpm_list_mmerge(hash->list, ptr, _alpm_pkg_cmp);
191 hash->entries += 1;
192 return hash;
195 alpm_pkghash_t *_alpm_pkghash_add(alpm_pkghash_t *hash, alpm_pkg_t *pkg)
197 return pkghash_add_pkg(hash, pkg, 0);
200 alpm_pkghash_t *_alpm_pkghash_add_sorted(alpm_pkghash_t *hash, alpm_pkg_t *pkg)
202 return pkghash_add_pkg(hash, pkg, 1);
205 static unsigned int move_one_entry(alpm_pkghash_t *hash,
206 unsigned int start, unsigned int end)
208 /* Iterate backwards from 'end' to 'start', seeing if any of the items
209 * would hash to 'start'. If we find one, we move it there and break. If
210 * we get all the way back to position and find none that hash to it, we
211 * also end iteration. Iterating backwards helps prevent needless shuffles;
212 * we will never need to move more than one item per function call. The
213 * return value is our current iteration location; if this is equal to
214 * 'start' we can stop this madness. */
215 while(end != start) {
216 alpm_list_t *i = hash->hash_table[end];
217 alpm_pkg_t *info = i->data;
218 unsigned int new_position = get_hash_position(info->name_hash, hash);
220 if(new_position == start) {
221 hash->hash_table[start] = i;
222 hash->hash_table[end] = NULL;
223 break;
226 /* the odd math ensures we are always positive, e.g.
227 * e.g. (0 - 1) % 47 == -1
228 * e.g. (47 + 0 - 1) % 47 == 46 */
229 end = (hash->buckets + end - stride) % hash->buckets;
231 return end;
235 * @brief Remove a package from a pkghash.
237 * @param hash the hash to remove the package from
238 * @param pkg the package we are removing
239 * @param data output parameter containing the removed item
241 * @return the resultant hash
243 alpm_pkghash_t *_alpm_pkghash_remove(alpm_pkghash_t *hash, alpm_pkg_t *pkg,
244 alpm_pkg_t **data)
246 alpm_list_t *i;
247 unsigned int position;
249 if(data) {
250 *data = NULL;
253 if(pkg == NULL || hash == NULL) {
254 return hash;
257 position = pkg->name_hash % hash->buckets;
258 while((i = hash->hash_table[position]) != NULL) {
259 alpm_pkg_t *info = i->data;
261 if(info->name_hash == pkg->name_hash &&
262 strcmp(info->name, pkg->name) == 0) {
263 unsigned int stop, prev;
265 /* remove from list and hash */
266 hash->list = alpm_list_remove_item(hash->list, i);
267 if(data) {
268 *data = info;
270 hash->hash_table[position] = NULL;
271 free(i);
272 hash->entries -= 1;
274 /* Potentially move entries following removed entry to keep open
275 * addressing collision resolution working. We start by finding the
276 * next null bucket to know how far we have to look. */
277 stop = position + stride;
278 while(stop >= hash->buckets) {
279 stop -= hash->buckets;
281 while(hash->hash_table[stop] != NULL && stop != position) {
282 stop += stride;
283 while(stop >= hash->buckets) {
284 stop -= hash->buckets;
287 stop = (hash->buckets + stop - stride) % hash->buckets;
289 /* We now search backwards from stop to position. If we find an
290 * item that now hashes to position, we will move it, and then try
291 * to plug the new hole we just opened up, until we finally don't
292 * move anything. */
293 while((prev = move_one_entry(hash, position, stop)) != position) {
294 position = prev;
297 return hash;
300 position += stride;
301 while(position >= hash->buckets) {
302 position -= hash->buckets;
306 return hash;
309 void _alpm_pkghash_free(alpm_pkghash_t *hash)
311 if(hash != NULL) {
312 unsigned int i;
313 for(i = 0; i < hash->buckets; i++) {
314 free(hash->hash_table[i]);
316 free(hash->hash_table);
318 free(hash);
321 alpm_pkg_t *_alpm_pkghash_find(alpm_pkghash_t *hash, const char *name)
323 alpm_list_t *lp;
324 unsigned long name_hash;
325 unsigned int position;
327 if(name == NULL || hash == NULL) {
328 return NULL;
331 name_hash = _alpm_hash_sdbm(name);
333 position = name_hash % hash->buckets;
335 while((lp = hash->hash_table[position]) != NULL) {
336 alpm_pkg_t *info = lp->data;
338 if(info->name_hash == name_hash && strcmp(info->name, name) == 0) {
339 return info;
342 position += stride;
343 while(position >= hash->buckets) {
344 position -= hash->buckets;
348 return NULL;
351 /* vim: set ts=2 sw=2 noet: */