1 /* $NetBSD: vfs_dirhash.c,v 1.9 2008/12/28 17:11:26 reinoud Exp $ */
4 * Copyright (c) 2008 Reinoud Zandijk
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: vfs_dirhash.c,v 1.9 2008/12/28 17:11:26 reinoud Exp $");
34 #include <sys/param.h>
35 #include <sys/kernel.h>
37 #include <sys/dirent.h>
39 #include <sys/mutex.h>
41 #include <sys/queue.h>
42 #include <sys/vnode.h>
43 #include <sys/sysctl.h>
45 #include <sys/dirhash.h>
50 # define DPRINTF(a) printf a;
54 * The locking protocol of the dirhash structures is fairly simple:
56 * The global dirhash_queue is protected by the dirhashmutex. This lock is
57 * internal only and is FS/mountpoint/vnode independent. On exit of the
58 * exported functions this mutex is not helt.
60 * The dirhash structure is considered part of the vnode/inode/udf_node
61 * structure and will thus use the lock that protects that vnode/inode.
63 * The dirhash entries are considered part of the dirhash structure and thus
64 * are on the same lock.
67 static struct sysctllog
*sysctl_log
;
68 static struct pool dirhash_pool
;
69 static struct pool dirhash_entry_pool
;
71 static kmutex_t dirhashmutex
;
72 static uint32_t maxdirhashsize
= DIRHASH_SIZE
;
73 static uint32_t dirhashsize
= 0;
74 static TAILQ_HEAD(_dirhash
, dirhash
) dirhash_queue
;
80 const struct sysctlnode
*rnode
, *cnode
;
84 /* initialise dirhash queue */
85 TAILQ_INIT(&dirhash_queue
);
87 /* init dirhash pools */
88 sz
= sizeof(struct dirhash
);
89 pool_init(&dirhash_pool
, sz
, 0, 0, 0,
90 "dirhpl", NULL
, IPL_NONE
);
92 sz
= sizeof(struct dirhash_entry
);
93 pool_init(&dirhash_entry_pool
, sz
, 0, 0, 0,
94 "dirhepl", NULL
, IPL_NONE
);
96 mutex_init(&dirhashmutex
, MUTEX_DEFAULT
, IPL_NONE
);
97 max_entries
= maxdirhashsize
/ sz
;
98 pool_sethiwat(&dirhash_entry_pool
, max_entries
);
101 /* create sysctl knobs and dials */
103 sysctl_createv(&sysctl_log
, 0, NULL
, &rnode
,
105 CTLTYPE_NODE
, "dirhash", NULL
,
107 CTL_VFS
, VFS_GENERIC
, CTL_CREATE
, CTL_EOL
);
108 sysctl_createv(&sysctl_log
, 0, &rnode
, &cnode
,
110 CTLTYPE_INT
, "memused",
111 SYSCTL_DESCR("current dirhash memory usage"),
112 NULL
, 0, &dirhashsize
, 0,
113 CTL_CREATE
, CTL_EOL
);
114 sysctl_createv(&sysctl_log
, 0, &rnode
, &cnode
,
115 CTLFLAG_PERMANENT
| CTLFLAG_READWRITE
,
116 CTLTYPE_INT
, "maxmem",
117 SYSCTL_DESCR("maximum dirhash memory usage"),
118 NULL
, 0, &maxdirhashsize
, 0,
119 CTL_CREATE
, CTL_EOL
);
127 pool_destroy(&dirhash_pool
);
128 pool_destroy(&dirhash_entry_pool
);
130 mutex_destroy(&dirhashmutex
);
132 /* sysctl_teardown(&sysctl_log); */
138 * generic dirhash implementation
142 dirhash_purge_entries(struct dirhash
*dirh
)
144 struct dirhash_entry
*dirh_e
;
153 for (hashline
= 0; hashline
< DIRHASH_HASHSIZE
; hashline
++) {
155 LIST_FIRST(&dirh
->entries
[hashline
])) != NULL
) {
156 LIST_REMOVE(dirh_e
, next
);
157 pool_put(&dirhash_entry_pool
, dirh_e
);
161 while ((dirh_e
= LIST_FIRST(&dirh
->free_entries
)) != NULL
) {
162 LIST_REMOVE(dirh_e
, next
);
163 pool_put(&dirhash_entry_pool
, dirh_e
);
166 dirh
->flags
&= ~DIRH_COMPLETE
;
167 dirh
->flags
|= DIRH_PURGED
;
169 dirhashsize
-= dirh
->size
;
175 dirhash_purge(struct dirhash
**dirhp
)
177 struct dirhash
*dirh
= *dirhp
;
182 /* purge its entries */
183 dirhash_purge_entries(dirh
);
186 mutex_enter(&dirhashmutex
);
187 TAILQ_REMOVE(&dirhash_queue
, dirh
, next
);
188 mutex_exit(&dirhashmutex
);
190 pool_put(&dirhash_pool
, dirh
);
196 dirhash_get(struct dirhash
**dirhp
)
198 struct dirhash
*dirh
;
201 /* if no dirhash was given, allocate one */
204 dirh
= pool_get(&dirhash_pool
, PR_WAITOK
);
205 memset(dirh
, 0, sizeof(struct dirhash
));
206 for (hashline
= 0; hashline
< DIRHASH_HASHSIZE
; hashline
++) {
207 LIST_INIT(&dirh
->entries
[hashline
]);
211 /* implement LRU on the dirhash queue */
212 mutex_enter(&dirhashmutex
);
214 /* remove from queue to be requeued */
215 TAILQ_REMOVE(&dirhash_queue
, dirh
, next
);
218 TAILQ_INSERT_HEAD(&dirhash_queue
, dirh
, next
);
219 mutex_exit(&dirhashmutex
);
226 dirhash_put(struct dirhash
*dirh
)
229 mutex_enter(&dirhashmutex
);
231 mutex_exit(&dirhashmutex
);
236 dirhash_enter(struct dirhash
*dirh
,
237 struct dirent
*dirent
, uint64_t offset
, uint32_t entry_size
, int new)
239 struct dirhash
*del_dirh
, *prev_dirh
;
240 struct dirhash_entry
*dirh_e
;
241 uint32_t hashvalue
, hashline
;
244 /* make sure we have a dirhash to work on */
246 KASSERT(dirh
->refcnt
> 0);
248 /* are we trying to re-enter an entry? */
249 if (!new && (dirh
->flags
& DIRH_COMPLETE
))
252 /* calculate our hash */
253 hashvalue
= hash32_strn(dirent
->d_name
, dirent
->d_namlen
, HASH32_STR_INIT
);
254 hashline
= hashvalue
& DIRHASH_HASHMASK
;
256 /* lookup and insert entry if not there yet */
257 LIST_FOREACH(dirh_e
, &dirh
->entries
[hashline
], next
) {
258 /* check for hash collision */
259 if (dirh_e
->hashvalue
!= hashvalue
)
261 if (dirh_e
->offset
!= offset
)
264 KASSERT(dirh_e
->d_namlen
== dirent
->d_namlen
);
265 KASSERT(dirh_e
->entry_size
== entry_size
);
269 DPRINTF(("dirhash enter %"PRIu64
", %d, %d for `%*.*s`\n",
270 offset
, entry_size
, dirent
->d_namlen
,
271 dirent
->d_namlen
, dirent
->d_namlen
, dirent
->d_name
));
273 /* check if entry is in free space list */
274 LIST_FOREACH(dirh_e
, &dirh
->free_entries
, next
) {
275 if (dirh_e
->offset
== offset
) {
276 DPRINTF(("\tremoving free entry\n"));
277 LIST_REMOVE(dirh_e
, next
);
278 pool_put(&dirhash_entry_pool
, dirh_e
);
283 /* ensure we are not passing the dirhash limit */
284 entrysize
= sizeof(struct dirhash_entry
);
285 if (dirhashsize
+ entrysize
> maxdirhashsize
) {
286 /* we walk the dirhash_queue, so need to lock it */
287 mutex_enter(&dirhashmutex
);
288 del_dirh
= TAILQ_LAST(&dirhash_queue
, _dirhash
);
290 while (dirhashsize
+ entrysize
> maxdirhashsize
) {
291 /* no use trying to delete myself */
292 if (del_dirh
== dirh
)
294 prev_dirh
= TAILQ_PREV(del_dirh
, _dirhash
, next
);
295 if (del_dirh
->refcnt
== 0)
296 dirhash_purge_entries(del_dirh
);
297 del_dirh
= prev_dirh
;
299 mutex_exit(&dirhashmutex
);
302 /* add to the hashline */
303 dirh_e
= pool_get(&dirhash_entry_pool
, PR_WAITOK
);
304 memset(dirh_e
, 0, sizeof(struct dirhash_entry
));
306 dirh_e
->hashvalue
= hashvalue
;
307 dirh_e
->offset
= offset
;
308 dirh_e
->d_namlen
= dirent
->d_namlen
;
309 dirh_e
->entry_size
= entry_size
;
311 dirh
->size
+= sizeof(struct dirhash_entry
);
312 dirhashsize
+= sizeof(struct dirhash_entry
);
313 LIST_INSERT_HEAD(&dirh
->entries
[hashline
], dirh_e
, next
);
318 dirhash_enter_freed(struct dirhash
*dirh
, uint64_t offset
,
321 struct dirhash_entry
*dirh_e
;
323 /* make sure we have a dirhash to work on */
325 KASSERT(dirh
->refcnt
> 0);
327 /* check for double entry of free space */
328 LIST_FOREACH(dirh_e
, &dirh
->free_entries
, next
) {
329 KASSERT(dirh_e
->offset
!= offset
);
332 DPRINTF(("dirhash enter FREED %"PRIu64
", %d\n",
333 offset
, entry_size
));
334 dirh_e
= pool_get(&dirhash_entry_pool
, PR_WAITOK
);
335 memset(dirh_e
, 0, sizeof(struct dirhash_entry
));
337 dirh_e
->hashvalue
= 0; /* not relevant */
338 dirh_e
->offset
= offset
;
339 dirh_e
->d_namlen
= 0; /* not relevant */
340 dirh_e
->entry_size
= entry_size
;
342 /* XXX it might be preferable to append them at the tail */
343 LIST_INSERT_HEAD(&dirh
->free_entries
, dirh_e
, next
);
344 dirh
->size
+= sizeof(struct dirhash_entry
);
345 dirhashsize
+= sizeof(struct dirhash_entry
);
350 dirhash_remove(struct dirhash
*dirh
, struct dirent
*dirent
,
351 uint64_t offset
, uint32_t entry_size
)
353 struct dirhash_entry
*dirh_e
;
354 uint32_t hashvalue
, hashline
;
356 DPRINTF(("dirhash remove %"PRIu64
", %d for `%*.*s`\n",
358 dirent
->d_namlen
, dirent
->d_namlen
, dirent
->d_name
));
360 /* make sure we have a dirhash to work on */
362 KASSERT(dirh
->refcnt
> 0);
364 /* calculate our hash */
365 hashvalue
= hash32_strn(dirent
->d_name
, dirent
->d_namlen
, HASH32_STR_INIT
);
366 hashline
= hashvalue
& DIRHASH_HASHMASK
;
369 LIST_FOREACH(dirh_e
, &dirh
->entries
[hashline
], next
) {
370 /* check for hash collision */
371 if (dirh_e
->hashvalue
!= hashvalue
)
373 if (dirh_e
->offset
!= offset
)
377 KASSERT(dirh_e
->d_namlen
== dirent
->d_namlen
);
378 KASSERT(dirh_e
->entry_size
== entry_size
);
379 LIST_REMOVE(dirh_e
, next
);
380 dirh
->size
-= sizeof(struct dirhash_entry
);
381 dirhashsize
-= sizeof(struct dirhash_entry
);
383 dirhash_enter_freed(dirh
, offset
, entry_size
);
388 panic("dirhash_remove couldn't find entry in hash table\n");
393 * BUGALERT: don't use result longer than needed, never past the node lock.
394 * Call with NULL *result initially and it will return nonzero if again.
397 dirhash_lookup(struct dirhash
*dirh
, const char *d_name
, int d_namlen
,
398 struct dirhash_entry
**result
)
400 struct dirhash_entry
*dirh_e
;
401 uint32_t hashvalue
, hashline
;
403 /* make sure we have a dirhash to work on */
405 KASSERT(dirh
->refcnt
> 0);
407 /* start where we were */
411 /* retrieve information to avoid recalculation and advance */
412 hashvalue
= dirh_e
->hashvalue
;
413 dirh_e
= LIST_NEXT(*result
, next
);
415 /* calculate our hash and lookup all entries in hashline */
416 hashvalue
= hash32_strn(d_name
, d_namlen
, HASH32_STR_INIT
);
417 hashline
= hashvalue
& DIRHASH_HASHMASK
;
418 dirh_e
= LIST_FIRST(&dirh
->entries
[hashline
]);
421 for (; dirh_e
; dirh_e
= LIST_NEXT(dirh_e
, next
)) {
422 /* check for hash collision */
423 if (dirh_e
->hashvalue
!= hashvalue
)
425 if (dirh_e
->d_namlen
!= d_namlen
)
427 /* might have an entry in the cache */
438 * BUGALERT: don't use result longer than needed, never past the node lock.
439 * Call with NULL *result initially and it will return nonzero if again.
443 dirhash_lookup_freed(struct dirhash
*dirh
, uint32_t min_entrysize
,
444 struct dirhash_entry
**result
)
446 struct dirhash_entry
*dirh_e
;
448 /* make sure we have a dirhash to work on */
450 KASSERT(dirh
->refcnt
> 0);
452 /* start where we were */
454 dirh_e
= LIST_NEXT(*result
, next
);
456 /* lookup all entries that match */
457 dirh_e
= LIST_FIRST(&dirh
->free_entries
);
460 for (; dirh_e
; dirh_e
= LIST_NEXT(dirh_e
, next
)) {
461 /* check for minimum size */
462 if (dirh_e
->entry_size
< min_entrysize
)
464 /* might be a candidate */