4 * Copyright (c) 2011-2012 Pacman Development Team <pacman-dev@archlinux.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
25 /* List of primes for possible sizes of hash tables.
27 * The maximum table size is the last prime under 1,000,000. That is
28 * more than an order of magnitude greater than the number of packages
29 * in any Linux distribution, and well under UINT_MAX.
31 static const unsigned int prime_list
[] =
33 11u, 13u, 17u, 19u, 23u, 29u, 31u, 37u, 41u, 43u, 47u,
34 53u, 59u, 61u, 67u, 71u, 73u, 79u, 83u, 89u, 97u, 103u,
35 109u, 113u, 127u, 137u, 139u, 149u, 157u, 167u, 179u, 193u,
36 199u, 211u, 227u, 241u, 257u, 277u, 293u, 313u, 337u, 359u,
37 383u, 409u, 439u, 467u, 503u, 541u, 577u, 619u, 661u, 709u,
38 761u, 823u, 887u, 953u, 1031u, 1109u, 1193u, 1289u, 1381u,
39 1493u, 1613u, 1741u, 1879u, 2029u, 2179u, 2357u, 2549u,
40 2753u, 2971u, 3209u, 3469u, 3739u, 4027u, 4349u, 4703u,
41 5087u, 5503u, 5953u, 6427u, 6949u, 7517u, 8123u, 8783u,
42 9497u, 10273u, 11113u, 12011u, 12983u, 14033u, 15173u,
43 16411u, 17749u, 19183u, 20753u, 22447u, 24281u, 26267u,
44 28411u, 30727u, 33223u, 35933u, 38873u, 42043u, 45481u,
45 49201u, 53201u, 57557u, 62233u, 67307u, 72817u, 78779u,
46 85229u, 92203u, 99733u, 107897u, 116731u, 126271u, 136607u,
47 147793u, 159871u, 172933u, 187091u, 202409u, 218971u, 236897u,
48 256279u, 277261u, 299951u, 324503u, 351061u, 379787u, 410857u,
49 444487u, 480881u, 520241u, 562841u, 608903u, 658753u, 712697u,
50 771049u, 834181u, 902483u, 976369u
53 /* How far forward do we look when linear probing for a spot? */
54 static const unsigned int stride
= 1;
55 /* What is the maximum load percentage of our hash table? */
56 static const double max_hash_load
= 0.68;
57 /* Initial load percentage given a certain size */
58 static const double initial_hash_load
= 0.58;
60 /* Allocate a hash table with space for at least "size" elements */
61 alpm_pkghash_t
*_alpm_pkghash_create(unsigned int size
)
63 alpm_pkghash_t
*hash
= NULL
;
64 unsigned int i
, loopsize
;
66 CALLOC(hash
, 1, sizeof(alpm_pkghash_t
), return NULL
);
67 size
= size
/ initial_hash_load
+ 1;
69 loopsize
= sizeof(prime_list
) / sizeof(*prime_list
);
70 for(i
= 0; i
< loopsize
; i
++) {
71 if(prime_list
[i
] > size
) {
72 hash
->buckets
= prime_list
[i
];
73 hash
->limit
= hash
->buckets
* max_hash_load
;
78 if(hash
->buckets
< size
) {
84 CALLOC(hash
->hash_table
, hash
->buckets
, sizeof(alpm_list_t
*), \
85 free(hash
); return NULL
);
90 static unsigned int get_hash_position(unsigned long name_hash
,
93 unsigned int position
;
95 position
= name_hash
% hash
->buckets
;
97 /* collision resolution using open addressing with linear probing */
98 while(hash
->hash_table
[position
] != NULL
) {
100 while(position
>= hash
->buckets
) {
101 position
-= hash
->buckets
;
108 /* Expand the hash table size to the next increment and rebin the entries */
109 static alpm_pkghash_t
*rehash(alpm_pkghash_t
*oldhash
)
111 alpm_pkghash_t
*newhash
;
112 unsigned int newsize
, i
;
114 /* Hash tables will need resized in two cases:
115 * - adding packages to the local database
116 * - poor estimation of the number of packages in sync database
118 * For small hash tables sizes (<500) the increase in size is by a
119 * minimum of a factor of 2 for optimal rehash efficiency. For
120 * larger database sizes, this increase is reduced to avoid excess
121 * memory allocation as both scenarios requiring a rehash should not
122 * require a table size increase that large. */
123 if(oldhash
->buckets
< 500) {
124 newsize
= oldhash
->buckets
* 2;
125 } else if(oldhash
->buckets
< 2000) {
126 newsize
= oldhash
->buckets
* 3 / 2;
127 } else if(oldhash
->buckets
< 5000) {
128 newsize
= oldhash
->buckets
* 4 / 3;
130 newsize
= oldhash
->buckets
+ 1;
133 newhash
= _alpm_pkghash_create(newsize
);
134 if(newhash
== NULL
) {
135 /* creation of newhash failed, stick with old one... */
139 newhash
->list
= oldhash
->list
;
140 oldhash
->list
= NULL
;
142 for(i
= 0; i
< oldhash
->buckets
; i
++) {
143 if(oldhash
->hash_table
[i
] != NULL
) {
144 alpm_pkg_t
*package
= oldhash
->hash_table
[i
]->data
;
145 unsigned int position
= get_hash_position(package
->name_hash
, newhash
);
147 newhash
->hash_table
[position
] = oldhash
->hash_table
[i
];
148 oldhash
->hash_table
[i
] = NULL
;
152 newhash
->entries
= oldhash
->entries
;
154 _alpm_pkghash_free(oldhash
);
159 static alpm_pkghash_t
*pkghash_add_pkg(alpm_pkghash_t
*hash
, alpm_pkg_t
*pkg
,
163 unsigned int position
;
165 if(pkg
== NULL
|| hash
== NULL
) {
169 if(hash
->entries
>= hash
->limit
) {
173 position
= get_hash_position(pkg
->name_hash
, hash
);
175 ptr
= malloc(sizeof(alpm_list_t
));
184 hash
->hash_table
[position
] = ptr
;
186 hash
->list
= alpm_list_join(hash
->list
, ptr
);
188 hash
->list
= alpm_list_mmerge(hash
->list
, ptr
, _alpm_pkg_cmp
);
195 alpm_pkghash_t
*_alpm_pkghash_add(alpm_pkghash_t
*hash
, alpm_pkg_t
*pkg
)
197 return pkghash_add_pkg(hash
, pkg
, 0);
200 alpm_pkghash_t
*_alpm_pkghash_add_sorted(alpm_pkghash_t
*hash
, alpm_pkg_t
*pkg
)
202 return pkghash_add_pkg(hash
, pkg
, 1);
205 static unsigned int move_one_entry(alpm_pkghash_t
*hash
,
206 unsigned int start
, unsigned int end
)
208 /* Iterate backwards from 'end' to 'start', seeing if any of the items
209 * would hash to 'start'. If we find one, we move it there and break. If
210 * we get all the way back to position and find none that hash to it, we
211 * also end iteration. Iterating backwards helps prevent needless shuffles;
212 * we will never need to move more than one item per function call. The
213 * return value is our current iteration location; if this is equal to
214 * 'start' we can stop this madness. */
215 while(end
!= start
) {
216 alpm_list_t
*i
= hash
->hash_table
[end
];
217 alpm_pkg_t
*info
= i
->data
;
218 unsigned int new_position
= get_hash_position(info
->name_hash
, hash
);
220 if(new_position
== start
) {
221 hash
->hash_table
[start
] = i
;
222 hash
->hash_table
[end
] = NULL
;
226 /* the odd math ensures we are always positive, e.g.
227 * e.g. (0 - 1) % 47 == -1
228 * e.g. (47 + 0 - 1) % 47 == 46 */
229 end
= (hash
->buckets
+ end
- stride
) % hash
->buckets
;
235 * @brief Remove a package from a pkghash.
237 * @param hash the hash to remove the package from
238 * @param pkg the package we are removing
239 * @param data output parameter containing the removed item
241 * @return the resultant hash
243 alpm_pkghash_t
*_alpm_pkghash_remove(alpm_pkghash_t
*hash
, alpm_pkg_t
*pkg
,
247 unsigned int position
;
253 if(pkg
== NULL
|| hash
== NULL
) {
257 position
= pkg
->name_hash
% hash
->buckets
;
258 while((i
= hash
->hash_table
[position
]) != NULL
) {
259 alpm_pkg_t
*info
= i
->data
;
261 if(info
->name_hash
== pkg
->name_hash
&&
262 strcmp(info
->name
, pkg
->name
) == 0) {
263 unsigned int stop
, prev
;
265 /* remove from list and hash */
266 hash
->list
= alpm_list_remove_item(hash
->list
, i
);
270 hash
->hash_table
[position
] = NULL
;
274 /* Potentially move entries following removed entry to keep open
275 * addressing collision resolution working. We start by finding the
276 * next null bucket to know how far we have to look. */
277 stop
= position
+ stride
;
278 while(stop
>= hash
->buckets
) {
279 stop
-= hash
->buckets
;
281 while(hash
->hash_table
[stop
] != NULL
&& stop
!= position
) {
283 while(stop
>= hash
->buckets
) {
284 stop
-= hash
->buckets
;
287 stop
= (hash
->buckets
+ stop
- stride
) % hash
->buckets
;
289 /* We now search backwards from stop to position. If we find an
290 * item that now hashes to position, we will move it, and then try
291 * to plug the new hole we just opened up, until we finally don't
293 while((prev
= move_one_entry(hash
, position
, stop
)) != position
) {
301 while(position
>= hash
->buckets
) {
302 position
-= hash
->buckets
;
309 void _alpm_pkghash_free(alpm_pkghash_t
*hash
)
313 for(i
= 0; i
< hash
->buckets
; i
++) {
314 free(hash
->hash_table
[i
]);
316 free(hash
->hash_table
);
321 alpm_pkg_t
*_alpm_pkghash_find(alpm_pkghash_t
*hash
, const char *name
)
324 unsigned long name_hash
;
325 unsigned int position
;
327 if(name
== NULL
|| hash
== NULL
) {
331 name_hash
= _alpm_hash_sdbm(name
);
333 position
= name_hash
% hash
->buckets
;
335 while((lp
= hash
->hash_table
[position
]) != NULL
) {
336 alpm_pkg_t
*info
= lp
->data
;
338 if(info
->name_hash
== name_hash
&& strcmp(info
->name
, name
) == 0) {
343 while(position
>= hash
->buckets
) {
344 position
-= hash
->buckets
;
351 /* vim: set ts=2 sw=2 noet: */