1 /* $NetBSD: subr_vmem.c,v 1.56 2009/02/18 13:33:46 yamt Exp $ */
4 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi,
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * - Magazines and Vmem: Extending the Slab Allocator
32 * to Many CPUs and Arbitrary Resources
33 * http://www.usenix.org/event/usenix01/bonwick.html
36 * - decide how to import segments for vmem_xalloc.
37 * - don't rely on malloc(9).
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.56 2009/02/18 13:33:46 yamt Exp $");
46 #endif /* defined(_KERNEL) */
48 #include <sys/param.h>
50 #include <sys/queue.h>
53 #include <sys/systm.h>
54 #include <sys/kernel.h> /* hz */
55 #include <sys/callout.h>
56 #include <sys/malloc.h>
60 #include <sys/workqueue.h>
61 #else /* defined(_KERNEL) */
62 #include "../sys/vmem.h"
63 #endif /* defined(_KERNEL) */
66 #define LOCK_DECL(name) \
67 kmutex_t name; char lockpad[COHERENCY_UNIT - sizeof(kmutex_t)]
68 #else /* defined(_KERNEL) */
74 #define KASSERT(a) assert(a)
75 #define LOCK_DECL(name) /* nothing */
76 #define mutex_init(a, b, c) /* nothing */
77 #define mutex_destroy(a) /* nothing */
78 #define mutex_enter(a) /* nothing */
79 #define mutex_tryenter(a) true
80 #define mutex_exit(a) /* nothing */
81 #define mutex_owned(a) /* nothing */
82 #define ASSERT_SLEEPABLE() /* nothing */
83 #define panic(...) printf(__VA_ARGS__); abort()
84 #endif /* defined(_KERNEL) */
89 #if defined(VMEM_SANITY)
90 static void vmem_check(vmem_t
*);
91 #else /* defined(VMEM_SANITY) */
92 #define vmem_check(vm) /* nothing */
93 #endif /* defined(VMEM_SANITY) */
95 #define VMEM_MAXORDER (sizeof(vmem_size_t) * CHAR_BIT)
97 #define VMEM_HASHSIZE_MIN 1 /* XXX */
98 #define VMEM_HASHSIZE_MAX 65536 /* XXX */
99 #define VMEM_HASHSIZE_INIT 128
101 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT)
103 CIRCLEQ_HEAD(vmem_seglist
, vmem_btag
);
104 LIST_HEAD(vmem_freelist
, vmem_btag
);
105 LIST_HEAD(vmem_hashlist
, vmem_btag
);
108 #define VMEM_QCACHE_IDX_MAX 32
110 #define QC_NAME_MAX 16
113 pool_cache_t qc_cache
;
115 char qc_name
[QC_NAME_MAX
];
117 typedef struct qcache qcache_t
;
118 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool->pr_qcache))
119 #endif /* defined(QCACHE) */
124 vmem_addr_t (*vm_allocfn
)(vmem_t
*, vmem_size_t
, vmem_size_t
*,
126 void (*vm_freefn
)(vmem_t
*, vmem_addr_t
, vmem_size_t
);
128 struct vmem_seglist vm_seglist
;
129 struct vmem_freelist vm_freelist
[VMEM_MAXORDER
];
132 struct vmem_hashlist
*vm_hashlist
;
133 size_t vm_quantum_mask
;
134 int vm_quantum_shift
;
136 LIST_ENTRY(vmem
) vm_alllist
;
140 size_t vm_qcache_max
;
141 struct pool_allocator vm_qcache_allocator
;
142 qcache_t vm_qcache_store
[VMEM_QCACHE_IDX_MAX
];
143 qcache_t
*vm_qcache
[VMEM_QCACHE_IDX_MAX
];
144 #endif /* defined(QCACHE) */
147 #define VMEM_LOCK(vm) mutex_enter(&vm->vm_lock)
148 #define VMEM_TRYLOCK(vm) mutex_tryenter(&vm->vm_lock)
149 #define VMEM_UNLOCK(vm) mutex_exit(&vm->vm_lock)
150 #define VMEM_LOCK_INIT(vm, ipl) mutex_init(&vm->vm_lock, MUTEX_DEFAULT, ipl)
151 #define VMEM_LOCK_DESTROY(vm) mutex_destroy(&vm->vm_lock)
152 #define VMEM_ASSERT_LOCKED(vm) KASSERT(mutex_owned(&vm->vm_lock))
156 CIRCLEQ_ENTRY(vmem_btag
) bt_seglist
;
158 LIST_ENTRY(vmem_btag
) u_freelist
; /* BT_TYPE_FREE */
159 LIST_ENTRY(vmem_btag
) u_hashlist
; /* BT_TYPE_BUSY */
161 #define bt_hashlist bt_u.u_hashlist
162 #define bt_freelist bt_u.u_freelist
163 vmem_addr_t bt_start
;
168 #define BT_TYPE_SPAN 1
169 #define BT_TYPE_SPAN_STATIC 2
170 #define BT_TYPE_FREE 3
171 #define BT_TYPE_BUSY 4
172 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC)
174 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size)
176 typedef struct vmem_btag bt_t
;
180 #define VMEM_ALIGNUP(addr, align) \
181 (-(-(addr) & -(align)))
182 #define VMEM_CROSS_P(addr1, addr2, boundary) \
183 ((((addr1) ^ (addr2)) & -(boundary)) != 0)
185 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order))
188 calc_order(vmem_size_t size
)
197 while (ORDER2SIZE(i
) <= target
) {
201 KASSERT(ORDER2SIZE(i
) <= size
);
202 KASSERT(size
< ORDER2SIZE(i
+ 1) || ORDER2SIZE(i
+ 1) < ORDER2SIZE(i
));
208 static MALLOC_DEFINE(M_VMEM
, "vmem", "vmem");
209 #endif /* defined(_KERNEL) */
212 xmalloc(size_t sz
, vm_flag_t flags
)
216 return malloc(sz
, M_VMEM
,
217 M_CANFAIL
| ((flags
& VM_SLEEP
) ? M_WAITOK
: M_NOWAIT
));
218 #else /* defined(_KERNEL) */
220 #endif /* defined(_KERNEL) */
228 return free(p
, M_VMEM
);
229 #else /* defined(_KERNEL) */
231 #endif /* defined(_KERNEL) */
234 /* ---- boundary tag */
237 static struct pool_cache bt_cache
;
238 #endif /* defined(_KERNEL) */
241 bt_alloc(vmem_t
*vm
, vm_flag_t flags
)
246 bt
= pool_cache_get(&bt_cache
,
247 (flags
& VM_SLEEP
) != 0 ? PR_WAITOK
: PR_NOWAIT
);
248 #else /* defined(_KERNEL) */
249 bt
= malloc(sizeof *bt
);
250 #endif /* defined(_KERNEL) */
256 bt_free(vmem_t
*vm
, bt_t
*bt
)
260 pool_cache_put(&bt_cache
, bt
);
261 #else /* defined(_KERNEL) */
263 #endif /* defined(_KERNEL) */
267 * freelist[0] ... [1, 1]
268 * freelist[1] ... [2, 3]
269 * freelist[2] ... [4, 7]
270 * freelist[3] ... [8, 15]
272 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1]
276 static struct vmem_freelist
*
277 bt_freehead_tofree(vmem_t
*vm
, vmem_size_t size
)
279 const vmem_size_t qsize
= size
>> vm
->vm_quantum_shift
;
282 KASSERT((size
& vm
->vm_quantum_mask
) == 0);
285 idx
= calc_order(qsize
);
287 KASSERT(idx
< VMEM_MAXORDER
);
289 return &vm
->vm_freelist
[idx
];
292 static struct vmem_freelist
*
293 bt_freehead_toalloc(vmem_t
*vm
, vmem_size_t size
, vm_flag_t strat
)
295 const vmem_size_t qsize
= size
>> vm
->vm_quantum_shift
;
298 KASSERT((size
& vm
->vm_quantum_mask
) == 0);
301 idx
= calc_order(qsize
);
302 if (strat
== VM_INSTANTFIT
&& ORDER2SIZE(idx
) != qsize
) {
304 /* check too large request? */
307 KASSERT(idx
< VMEM_MAXORDER
);
309 return &vm
->vm_freelist
[idx
];
312 /* ---- boundary tag hash */
314 static struct vmem_hashlist
*
315 bt_hashhead(vmem_t
*vm
, vmem_addr_t addr
)
317 struct vmem_hashlist
*list
;
320 hash
= hash32_buf(&addr
, sizeof(addr
), HASH32_BUF_INIT
);
321 list
= &vm
->vm_hashlist
[hash
% vm
->vm_hashsize
];
327 bt_lookupbusy(vmem_t
*vm
, vmem_addr_t addr
)
329 struct vmem_hashlist
*list
;
332 list
= bt_hashhead(vm
, addr
);
333 LIST_FOREACH(bt
, list
, bt_hashlist
) {
334 if (bt
->bt_start
== addr
) {
343 bt_rembusy(vmem_t
*vm
, bt_t
*bt
)
346 KASSERT(vm
->vm_nbusytag
> 0);
348 LIST_REMOVE(bt
, bt_hashlist
);
352 bt_insbusy(vmem_t
*vm
, bt_t
*bt
)
354 struct vmem_hashlist
*list
;
356 KASSERT(bt
->bt_type
== BT_TYPE_BUSY
);
358 list
= bt_hashhead(vm
, bt
->bt_start
);
359 LIST_INSERT_HEAD(list
, bt
, bt_hashlist
);
363 /* ---- boundary tag list */
366 bt_remseg(vmem_t
*vm
, bt_t
*bt
)
369 CIRCLEQ_REMOVE(&vm
->vm_seglist
, bt
, bt_seglist
);
373 bt_insseg(vmem_t
*vm
, bt_t
*bt
, bt_t
*prev
)
376 CIRCLEQ_INSERT_AFTER(&vm
->vm_seglist
, prev
, bt
, bt_seglist
);
380 bt_insseg_tail(vmem_t
*vm
, bt_t
*bt
)
383 CIRCLEQ_INSERT_TAIL(&vm
->vm_seglist
, bt
, bt_seglist
);
387 bt_remfree(vmem_t
*vm
, bt_t
*bt
)
390 KASSERT(bt
->bt_type
== BT_TYPE_FREE
);
392 LIST_REMOVE(bt
, bt_freelist
);
396 bt_insfree(vmem_t
*vm
, bt_t
*bt
)
398 struct vmem_freelist
*list
;
400 list
= bt_freehead_tofree(vm
, bt
->bt_size
);
401 LIST_INSERT_HEAD(list
, bt
, bt_freelist
);
404 /* ---- vmem internal functions */
407 static kmutex_t vmem_list_lock
;
408 static LIST_HEAD(, vmem
) vmem_list
= LIST_HEAD_INITIALIZER(vmem_list
);
409 #endif /* defined(_KERNEL) */
412 static inline vm_flag_t
413 prf_to_vmf(int prflags
)
417 KASSERT((prflags
& ~(PR_LIMITFAIL
| PR_WAITOK
| PR_NOWAIT
)) == 0);
418 if ((prflags
& PR_WAITOK
) != 0) {
421 vmflags
= VM_NOSLEEP
;
427 vmf_to_prf(vm_flag_t vmflags
)
431 if ((vmflags
& VM_SLEEP
) != 0) {
440 qc_poolpage_size(size_t qcache_max
)
444 for (i
= 0; ORDER2SIZE(i
) <= qcache_max
* 3; i
++) {
447 return ORDER2SIZE(i
);
451 qc_poolpage_alloc(struct pool
*pool
, int prflags
)
453 qcache_t
*qc
= QC_POOL_TO_QCACHE(pool
);
454 vmem_t
*vm
= qc
->qc_vmem
;
456 return (void *)vmem_alloc(vm
, pool
->pr_alloc
->pa_pagesz
,
457 prf_to_vmf(prflags
) | VM_INSTANTFIT
);
461 qc_poolpage_free(struct pool
*pool
, void *addr
)
463 qcache_t
*qc
= QC_POOL_TO_QCACHE(pool
);
464 vmem_t
*vm
= qc
->qc_vmem
;
466 vmem_free(vm
, (vmem_addr_t
)addr
, pool
->pr_alloc
->pa_pagesz
);
470 qc_init(vmem_t
*vm
, size_t qcache_max
, int ipl
)
473 struct pool_allocator
*pa
;
477 KASSERT((qcache_max
& vm
->vm_quantum_mask
) == 0);
478 if (qcache_max
> (VMEM_QCACHE_IDX_MAX
<< vm
->vm_quantum_shift
)) {
479 qcache_max
= VMEM_QCACHE_IDX_MAX
<< vm
->vm_quantum_shift
;
481 vm
->vm_qcache_max
= qcache_max
;
482 pa
= &vm
->vm_qcache_allocator
;
483 memset(pa
, 0, sizeof(*pa
));
484 pa
->pa_alloc
= qc_poolpage_alloc
;
485 pa
->pa_free
= qc_poolpage_free
;
486 pa
->pa_pagesz
= qc_poolpage_size(qcache_max
);
488 qcache_idx_max
= qcache_max
>> vm
->vm_quantum_shift
;
490 for (i
= qcache_idx_max
; i
> 0; i
--) {
491 qcache_t
*qc
= &vm
->vm_qcache_store
[i
- 1];
492 size_t size
= i
<< vm
->vm_quantum_shift
;
495 snprintf(qc
->qc_name
, sizeof(qc
->qc_name
), "%s-%zu",
497 qc
->qc_cache
= pool_cache_init(size
,
498 ORDER2SIZE(vm
->vm_quantum_shift
), 0,
499 PR_NOALIGN
| PR_NOTOUCH
/* XXX */,
500 qc
->qc_name
, pa
, ipl
, NULL
, NULL
, NULL
);
501 KASSERT(qc
->qc_cache
!= NULL
); /* XXX */
502 if (prevqc
!= NULL
&&
503 qc
->qc_cache
->pc_pool
.pr_itemsperpage
==
504 prevqc
->qc_cache
->pc_pool
.pr_itemsperpage
) {
505 pool_cache_destroy(qc
->qc_cache
);
506 vm
->vm_qcache
[i
- 1] = prevqc
;
509 qc
->qc_cache
->pc_pool
.pr_qcache
= qc
;
510 vm
->vm_qcache
[i
- 1] = qc
;
516 qc_destroy(vmem_t
*vm
)
518 const qcache_t
*prevqc
;
522 qcache_idx_max
= vm
->vm_qcache_max
>> vm
->vm_quantum_shift
;
524 for (i
= 0; i
< qcache_idx_max
; i
++) {
525 qcache_t
*qc
= vm
->vm_qcache
[i
];
530 pool_cache_destroy(qc
->qc_cache
);
538 const qcache_t
*prevqc
;
541 bool didsomething
= false;
543 qcache_idx_max
= vm
->vm_qcache_max
>> vm
->vm_quantum_shift
;
545 for (i
= 0; i
< qcache_idx_max
; i
++) {
546 qcache_t
*qc
= vm
->vm_qcache
[i
];
551 if (pool_cache_reclaim(qc
->qc_cache
) != 0) {
559 #endif /* defined(QCACHE) */
566 mutex_init(&vmem_list_lock
, MUTEX_DEFAULT
, IPL_NONE
);
567 pool_cache_bootstrap(&bt_cache
, sizeof(bt_t
), 0, 0, 0, "vmembt",
568 NULL
, IPL_VM
, NULL
, NULL
, NULL
);
571 #endif /* defined(_KERNEL) */
574 vmem_add1(vmem_t
*vm
, vmem_addr_t addr
, vmem_size_t size
, vm_flag_t flags
,
580 KASSERT((flags
& (VM_SLEEP
|VM_NOSLEEP
)) != 0);
581 KASSERT((~flags
& (VM_SLEEP
|VM_NOSLEEP
)) != 0);
582 KASSERT(spanbttype
== BT_TYPE_SPAN
|| spanbttype
== BT_TYPE_SPAN_STATIC
);
584 btspan
= bt_alloc(vm
, flags
);
585 if (btspan
== NULL
) {
586 return VMEM_ADDR_NULL
;
588 btfree
= bt_alloc(vm
, flags
);
589 if (btfree
== NULL
) {
591 return VMEM_ADDR_NULL
;
594 btspan
->bt_type
= spanbttype
;
595 btspan
->bt_start
= addr
;
596 btspan
->bt_size
= size
;
598 btfree
->bt_type
= BT_TYPE_FREE
;
599 btfree
->bt_start
= addr
;
600 btfree
->bt_size
= size
;
603 bt_insseg_tail(vm
, btspan
);
604 bt_insseg(vm
, btfree
, btspan
);
605 bt_insfree(vm
, btfree
);
612 vmem_destroy1(vmem_t
*vm
)
617 #endif /* defined(QCACHE) */
618 if (vm
->vm_hashlist
!= NULL
) {
621 for (i
= 0; i
< vm
->vm_hashsize
; i
++) {
624 while ((bt
= LIST_FIRST(&vm
->vm_hashlist
[i
])) != NULL
) {
625 KASSERT(bt
->bt_type
== BT_TYPE_SPAN_STATIC
);
629 xfree(vm
->vm_hashlist
);
631 VMEM_LOCK_DESTROY(vm
);
636 vmem_import(vmem_t
*vm
, vmem_size_t size
, vm_flag_t flags
)
640 if (vm
->vm_allocfn
== NULL
) {
644 addr
= (*vm
->vm_allocfn
)(vm
->vm_source
, size
, &size
, flags
);
645 if (addr
== VMEM_ADDR_NULL
) {
649 if (vmem_add1(vm
, addr
, size
, flags
, BT_TYPE_SPAN
) == VMEM_ADDR_NULL
) {
650 (*vm
->vm_freefn
)(vm
->vm_source
, addr
, size
);
658 vmem_rehash(vmem_t
*vm
, size_t newhashsize
, vm_flag_t flags
)
662 struct vmem_hashlist
*newhashlist
;
663 struct vmem_hashlist
*oldhashlist
;
666 KASSERT(newhashsize
> 0);
669 xmalloc(sizeof(struct vmem_hashlist
*) * newhashsize
, flags
);
670 if (newhashlist
== NULL
) {
673 for (i
= 0; i
< newhashsize
; i
++) {
674 LIST_INIT(&newhashlist
[i
]);
677 if (!VMEM_TRYLOCK(vm
)) {
681 oldhashlist
= vm
->vm_hashlist
;
682 oldhashsize
= vm
->vm_hashsize
;
683 vm
->vm_hashlist
= newhashlist
;
684 vm
->vm_hashsize
= newhashsize
;
685 if (oldhashlist
== NULL
) {
689 for (i
= 0; i
< oldhashsize
; i
++) {
690 while ((bt
= LIST_FIRST(&oldhashlist
[i
])) != NULL
) {
691 bt_rembusy(vm
, bt
); /* XXX */
703 * vmem_fit: check if a bt can satisfy the given restrictions.
707 vmem_fit(const bt_t
*bt
, vmem_size_t size
, vmem_size_t align
, vmem_size_t phase
,
708 vmem_size_t nocross
, vmem_addr_t minaddr
, vmem_addr_t maxaddr
)
713 KASSERT(bt
->bt_size
>= size
);
716 * XXX assumption: vmem_addr_t and vmem_size_t are
717 * unsigned integer of the same size.
720 start
= bt
->bt_start
;
721 if (start
< minaddr
) {
725 if (end
> maxaddr
- 1) {
729 return VMEM_ADDR_NULL
;
732 start
= VMEM_ALIGNUP(start
- phase
, align
) + phase
;
733 if (start
< bt
->bt_start
) {
736 if (VMEM_CROSS_P(start
, start
+ size
- 1, nocross
)) {
737 KASSERT(align
< nocross
);
738 start
= VMEM_ALIGNUP(start
- phase
, nocross
) + phase
;
740 if (start
< end
&& end
- start
>= size
) {
741 KASSERT((start
& (align
- 1)) == phase
);
742 KASSERT(!VMEM_CROSS_P(start
, start
+ size
- 1, nocross
));
743 KASSERT(minaddr
<= start
);
744 KASSERT(maxaddr
== 0 || start
+ size
<= maxaddr
);
745 KASSERT(bt
->bt_start
<= start
);
746 KASSERT(start
+ size
<= BT_END(bt
));
749 return VMEM_ADDR_NULL
;
755 * vmem_create: create an arena.
757 * => must not be called from interrupt context.
761 vmem_create(const char *name
, vmem_addr_t base
, vmem_size_t size
,
763 vmem_addr_t (*allocfn
)(vmem_t
*, vmem_size_t
, vmem_size_t
*, vm_flag_t
),
764 void (*freefn
)(vmem_t
*, vmem_addr_t
, vmem_size_t
),
765 vmem_t
*source
, vmem_size_t qcache_max
, vm_flag_t flags
,
771 static ONCE_DECL(control
);
772 #endif /* defined(_KERNEL) */
774 KASSERT((flags
& (VM_SLEEP
|VM_NOSLEEP
)) != 0);
775 KASSERT((~flags
& (VM_SLEEP
|VM_NOSLEEP
)) != 0);
778 if (RUN_ONCE(&control
, vmem_init
)) {
781 #endif /* defined(_KERNEL) */
782 vm
= xmalloc(sizeof(*vm
), flags
);
787 VMEM_LOCK_INIT(vm
, ipl
);
789 vm
->vm_quantum_mask
= quantum
- 1;
790 vm
->vm_quantum_shift
= calc_order(quantum
);
791 KASSERT(ORDER2SIZE(vm
->vm_quantum_shift
) == quantum
);
792 vm
->vm_allocfn
= allocfn
;
793 vm
->vm_freefn
= freefn
;
794 vm
->vm_source
= source
;
797 qc_init(vm
, qcache_max
, ipl
);
798 #endif /* defined(QCACHE) */
800 CIRCLEQ_INIT(&vm
->vm_seglist
);
801 for (i
= 0; i
< VMEM_MAXORDER
; i
++) {
802 LIST_INIT(&vm
->vm_freelist
[i
]);
804 vm
->vm_hashlist
= NULL
;
805 if (vmem_rehash(vm
, VMEM_HASHSIZE_INIT
, flags
)) {
811 if (vmem_add(vm
, base
, size
, flags
) == 0) {
818 mutex_enter(&vmem_list_lock
);
819 LIST_INSERT_HEAD(&vmem_list
, vm
, vm_alllist
);
820 mutex_exit(&vmem_list_lock
);
821 #endif /* defined(_KERNEL) */
827 vmem_destroy(vmem_t
*vm
)
831 mutex_enter(&vmem_list_lock
);
832 LIST_REMOVE(vm
, vm_alllist
);
833 mutex_exit(&vmem_list_lock
);
834 #endif /* defined(_KERNEL) */
840 vmem_roundup_size(vmem_t
*vm
, vmem_size_t size
)
843 return (size
+ vm
->vm_quantum_mask
) & ~vm
->vm_quantum_mask
;
849 * => caller must ensure appropriate spl,
850 * if the arena can be accessed from interrupt context.
854 vmem_alloc(vmem_t
*vm
, vmem_size_t size
, vm_flag_t flags
)
856 const vm_flag_t strat __unused
= flags
& VM_FITMASK
;
858 KASSERT((flags
& (VM_SLEEP
|VM_NOSLEEP
)) != 0);
859 KASSERT((~flags
& (VM_SLEEP
|VM_NOSLEEP
)) != 0);
862 KASSERT(strat
== VM_BESTFIT
|| strat
== VM_INSTANTFIT
);
863 if ((flags
& VM_SLEEP
) != 0) {
868 if (size
<= vm
->vm_qcache_max
) {
869 int qidx
= (size
+ vm
->vm_quantum_mask
) >> vm
->vm_quantum_shift
;
870 qcache_t
*qc
= vm
->vm_qcache
[qidx
- 1];
872 return (vmem_addr_t
)pool_cache_get(qc
->qc_cache
,
875 #endif /* defined(QCACHE) */
877 return vmem_xalloc(vm
, size
, 0, 0, 0, 0, 0, flags
);
881 vmem_xalloc(vmem_t
*vm
, vmem_size_t size0
, vmem_size_t align
, vmem_size_t phase
,
882 vmem_size_t nocross
, vmem_addr_t minaddr
, vmem_addr_t maxaddr
,
885 struct vmem_freelist
*list
;
886 struct vmem_freelist
*first
;
887 struct vmem_freelist
*end
;
891 const vmem_size_t size
= vmem_roundup_size(vm
, size0
);
892 vm_flag_t strat
= flags
& VM_FITMASK
;
897 KASSERT(strat
== VM_BESTFIT
|| strat
== VM_INSTANTFIT
);
898 if ((flags
& VM_SLEEP
) != 0) {
901 KASSERT((align
& vm
->vm_quantum_mask
) == 0);
902 KASSERT((align
& (align
- 1)) == 0);
903 KASSERT((phase
& vm
->vm_quantum_mask
) == 0);
904 KASSERT((nocross
& vm
->vm_quantum_mask
) == 0);
905 KASSERT((nocross
& (nocross
- 1)) == 0);
906 KASSERT((align
== 0 && phase
== 0) || phase
< align
);
907 KASSERT(nocross
== 0 || nocross
>= size
);
908 KASSERT(maxaddr
== 0 || minaddr
< maxaddr
);
909 KASSERT(!VMEM_CROSS_P(phase
, phase
+ size
- 1, nocross
));
912 align
= vm
->vm_quantum_mask
+ 1;
914 btnew
= bt_alloc(vm
, flags
);
916 return VMEM_ADDR_NULL
;
918 btnew2
= bt_alloc(vm
, flags
); /* XXX not necessary if no restrictions */
919 if (btnew2
== NULL
) {
921 return VMEM_ADDR_NULL
;
925 first
= bt_freehead_toalloc(vm
, size
, strat
);
926 end
= &vm
->vm_freelist
[VMEM_MAXORDER
];
931 if (strat
== VM_INSTANTFIT
) {
932 for (list
= first
; list
< end
; list
++) {
933 bt
= LIST_FIRST(list
);
935 start
= vmem_fit(bt
, size
, align
, phase
,
936 nocross
, minaddr
, maxaddr
);
937 if (start
!= VMEM_ADDR_NULL
) {
942 } else { /* VM_BESTFIT */
943 for (list
= first
; list
< end
; list
++) {
944 LIST_FOREACH(bt
, list
, bt_freelist
) {
945 if (bt
->bt_size
>= size
) {
946 start
= vmem_fit(bt
, size
, align
, phase
,
947 nocross
, minaddr
, maxaddr
);
948 if (start
!= VMEM_ADDR_NULL
) {
957 if (strat
== VM_INSTANTFIT
) {
962 if (align
!= vm
->vm_quantum_mask
+ 1 || phase
!= 0 ||
963 nocross
!= 0 || minaddr
!= 0 || maxaddr
!= 0) {
966 * XXX should try to import a region large enough to
967 * satisfy restrictions?
972 if (vmem_import(vm
, size
, flags
) == 0) {
979 return VMEM_ADDR_NULL
;
982 KASSERT(bt
->bt_type
== BT_TYPE_FREE
);
983 KASSERT(bt
->bt_size
>= size
);
986 if (bt
->bt_start
!= start
) {
987 btnew2
->bt_type
= BT_TYPE_FREE
;
988 btnew2
->bt_start
= bt
->bt_start
;
989 btnew2
->bt_size
= start
- bt
->bt_start
;
990 bt
->bt_start
= start
;
991 bt
->bt_size
-= btnew2
->bt_size
;
992 bt_insfree(vm
, btnew2
);
993 bt_insseg(vm
, btnew2
, CIRCLEQ_PREV(bt
, bt_seglist
));
997 KASSERT(bt
->bt_start
== start
);
998 if (bt
->bt_size
!= size
&& bt
->bt_size
- size
> vm
->vm_quantum_mask
) {
1000 btnew
->bt_type
= BT_TYPE_BUSY
;
1001 btnew
->bt_start
= bt
->bt_start
;
1002 btnew
->bt_size
= size
;
1003 bt
->bt_start
= bt
->bt_start
+ size
;
1004 bt
->bt_size
-= size
;
1006 bt_insseg(vm
, btnew
, CIRCLEQ_PREV(bt
, bt_seglist
));
1007 bt_insbusy(vm
, btnew
);
1011 bt
->bt_type
= BT_TYPE_BUSY
;
1018 if (btnew2
!= NULL
) {
1019 bt_free(vm
, btnew2
);
1021 KASSERT(btnew
->bt_size
>= size
);
1022 btnew
->bt_type
= BT_TYPE_BUSY
;
1024 return btnew
->bt_start
;
1030 * => caller must ensure appropriate spl,
1031 * if the arena can be accessed from interrupt context.
1035 vmem_free(vmem_t
*vm
, vmem_addr_t addr
, vmem_size_t size
)
1038 KASSERT(addr
!= VMEM_ADDR_NULL
);
1042 if (size
<= vm
->vm_qcache_max
) {
1043 int qidx
= (size
+ vm
->vm_quantum_mask
) >> vm
->vm_quantum_shift
;
1044 qcache_t
*qc
= vm
->vm_qcache
[qidx
- 1];
1046 return pool_cache_put(qc
->qc_cache
, (void *)addr
);
1048 #endif /* defined(QCACHE) */
1050 vmem_xfree(vm
, addr
, size
);
1054 vmem_xfree(vmem_t
*vm
, vmem_addr_t addr
, vmem_size_t size
)
1059 KASSERT(addr
!= VMEM_ADDR_NULL
);
1064 bt
= bt_lookupbusy(vm
, addr
);
1065 KASSERT(bt
!= NULL
);
1066 KASSERT(bt
->bt_start
== addr
);
1067 KASSERT(bt
->bt_size
== vmem_roundup_size(vm
, size
) ||
1068 bt
->bt_size
- vmem_roundup_size(vm
, size
) <= vm
->vm_quantum_mask
);
1069 KASSERT(bt
->bt_type
== BT_TYPE_BUSY
);
1071 bt
->bt_type
= BT_TYPE_FREE
;
1074 t
= CIRCLEQ_NEXT(bt
, bt_seglist
);
1075 if (t
!= NULL
&& t
->bt_type
== BT_TYPE_FREE
) {
1076 KASSERT(BT_END(bt
) == t
->bt_start
);
1079 bt
->bt_size
+= t
->bt_size
;
1082 t
= CIRCLEQ_PREV(bt
, bt_seglist
);
1083 if (t
!= NULL
&& t
->bt_type
== BT_TYPE_FREE
) {
1084 KASSERT(BT_END(t
) == bt
->bt_start
);
1087 bt
->bt_size
+= t
->bt_size
;
1088 bt
->bt_start
= t
->bt_start
;
1092 t
= CIRCLEQ_PREV(bt
, bt_seglist
);
1094 KASSERT(BT_ISSPAN_P(t
) || t
->bt_type
== BT_TYPE_BUSY
);
1095 if (vm
->vm_freefn
!= NULL
&& t
->bt_type
== BT_TYPE_SPAN
&&
1096 t
->bt_size
== bt
->bt_size
) {
1097 vmem_addr_t spanaddr
;
1098 vmem_size_t spansize
;
1100 KASSERT(t
->bt_start
== bt
->bt_start
);
1101 spanaddr
= bt
->bt_start
;
1102 spansize
= bt
->bt_size
;
1108 (*vm
->vm_freefn
)(vm
->vm_source
, spanaddr
, spansize
);
1118 * => caller must ensure appropriate spl,
1119 * if the arena can be accessed from interrupt context.
1123 vmem_add(vmem_t
*vm
, vmem_addr_t addr
, vmem_size_t size
, vm_flag_t flags
)
1126 return vmem_add1(vm
, addr
, size
, flags
, BT_TYPE_SPAN_STATIC
);
1130 * vmem_reap: reap unused resources.
1132 * => return true if we successfully reaped something.
1136 vmem_reap(vmem_t
*vm
)
1138 bool didsomething
= false;
1141 didsomething
= qc_reap(vm
);
1142 #endif /* defined(QCACHE) */
1143 return didsomething
;
1148 #if defined(_KERNEL)
1149 static struct callout vmem_rehash_ch
;
1150 static int vmem_rehash_interval
;
1151 static struct workqueue
*vmem_rehash_wq
;
1152 static struct work vmem_rehash_wk
;
1155 vmem_rehash_all(struct work
*wk
, void *dummy
)
1159 KASSERT(wk
== &vmem_rehash_wk
);
1160 mutex_enter(&vmem_list_lock
);
1161 LIST_FOREACH(vm
, &vmem_list
, vm_alllist
) {
1165 if (!VMEM_TRYLOCK(vm
)) {
1168 desired
= vm
->vm_nbusytag
;
1169 current
= vm
->vm_hashsize
;
1172 if (desired
> VMEM_HASHSIZE_MAX
) {
1173 desired
= VMEM_HASHSIZE_MAX
;
1174 } else if (desired
< VMEM_HASHSIZE_MIN
) {
1175 desired
= VMEM_HASHSIZE_MIN
;
1177 if (desired
> current
* 2 || desired
* 2 < current
) {
1178 vmem_rehash(vm
, desired
, VM_NOSLEEP
);
1181 mutex_exit(&vmem_list_lock
);
1183 callout_schedule(&vmem_rehash_ch
, vmem_rehash_interval
);
1187 vmem_rehash_all_kick(void *dummy
)
1190 workqueue_enqueue(vmem_rehash_wq
, &vmem_rehash_wk
, NULL
);
1194 vmem_rehash_start(void)
1198 error
= workqueue_create(&vmem_rehash_wq
, "vmem_rehash",
1199 vmem_rehash_all
, NULL
, PRI_VM
, IPL_SOFTCLOCK
, WQ_MPSAFE
);
1201 panic("%s: workqueue_create %d\n", __func__
, error
);
1203 callout_init(&vmem_rehash_ch
, CALLOUT_MPSAFE
);
1204 callout_setfunc(&vmem_rehash_ch
, vmem_rehash_all_kick
, NULL
);
1206 vmem_rehash_interval
= hz
* 10;
1207 callout_schedule(&vmem_rehash_ch
, vmem_rehash_interval
);
1209 #endif /* defined(_KERNEL) */
1213 #if defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY)
1215 static void bt_dump(const bt_t
*, void (*)(const char *, ...));
1218 bt_type_string(int type
)
1220 static const char * const table
[] = {
1221 [BT_TYPE_BUSY
] = "busy",
1222 [BT_TYPE_FREE
] = "free",
1223 [BT_TYPE_SPAN
] = "span",
1224 [BT_TYPE_SPAN_STATIC
] = "static span",
1227 if (type
>= __arraycount(table
)) {
1234 bt_dump(const bt_t
*bt
, void (*pr
)(const char *, ...))
1237 (*pr
)("\t%p: %" PRIu64
", %" PRIu64
", %d(%s)\n",
1238 bt
, (uint64_t)bt
->bt_start
, (uint64_t)bt
->bt_size
,
1239 bt
->bt_type
, bt_type_string(bt
->bt_type
));
1243 vmem_dump(const vmem_t
*vm
, void (*pr
)(const char *, ...))
1248 (*pr
)("vmem %p '%s'\n", vm
, vm
->vm_name
);
1249 CIRCLEQ_FOREACH(bt
, &vm
->vm_seglist
, bt_seglist
) {
1253 for (i
= 0; i
< VMEM_MAXORDER
; i
++) {
1254 const struct vmem_freelist
*fl
= &vm
->vm_freelist
[i
];
1256 if (LIST_EMPTY(fl
)) {
1260 (*pr
)("freelist[%d]\n", i
);
1261 LIST_FOREACH(bt
, fl
, bt_freelist
) {
1267 #endif /* defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) */
1271 vmem_whatis_lookup(vmem_t
*vm
, uintptr_t addr
)
1275 CIRCLEQ_FOREACH(bt
, &vm
->vm_seglist
, bt_seglist
) {
1276 if (BT_ISSPAN_P(bt
)) {
1279 if (bt
->bt_start
<= addr
&& addr
< BT_END(bt
)) {
1288 vmem_whatis(uintptr_t addr
, void (*pr
)(const char *, ...))
1292 LIST_FOREACH(vm
, &vmem_list
, vm_alllist
) {
1295 bt
= vmem_whatis_lookup(vm
, addr
);
1299 (*pr
)("%p is %p+%zu in VMEM '%s' (%s)\n",
1300 (void *)addr
, (void *)bt
->bt_start
,
1301 (size_t)(addr
- bt
->bt_start
), vm
->vm_name
,
1302 (bt
->bt_type
== BT_TYPE_BUSY
) ? "allocated" : "free");
1307 vmem_printall(const char *modif
, void (*pr
)(const char *, ...))
1311 LIST_FOREACH(vm
, &vmem_list
, vm_alllist
) {
1317 vmem_print(uintptr_t addr
, const char *modif
, void (*pr
)(const char *, ...))
1319 const vmem_t
*vm
= (const void *)addr
;
1323 #endif /* defined(DDB) */
1325 #if !defined(_KERNEL)
1327 #endif /* !defined(_KERNEL) */
1329 #if defined(VMEM_SANITY)
1332 vmem_check_sanity(vmem_t
*vm
)
1334 const bt_t
*bt
, *bt2
;
1336 KASSERT(vm
!= NULL
);
1338 CIRCLEQ_FOREACH(bt
, &vm
->vm_seglist
, bt_seglist
) {
1339 if (bt
->bt_start
>= BT_END(bt
)) {
1340 printf("corrupted tag\n");
1341 bt_dump(bt
, (void *)printf
);
1345 CIRCLEQ_FOREACH(bt
, &vm
->vm_seglist
, bt_seglist
) {
1346 CIRCLEQ_FOREACH(bt2
, &vm
->vm_seglist
, bt_seglist
) {
1350 if (BT_ISSPAN_P(bt
) != BT_ISSPAN_P(bt2
)) {
1353 if (bt
->bt_start
< BT_END(bt2
) &&
1354 bt2
->bt_start
< BT_END(bt
)) {
1355 printf("overwrapped tags\n");
1356 bt_dump(bt
, (void *)printf
);
1357 bt_dump(bt2
, (void *)printf
);
1367 vmem_check(vmem_t
*vm
)
1370 if (!vmem_check_sanity(vm
)) {
1371 panic("insanity vmem %p", vm
);
1375 #endif /* defined(VMEM_SANITY) */
1377 #if defined(UNITTEST)
1391 vmem_size_t total
= 0;
1393 vm_flag_t strat
= VM_INSTANTFIT
;
1395 vm_flag_t strat
= VM_BESTFIT
;
1398 vm
= vmem_create("test", VMEM_ADDR_NULL
, 0, 1,
1399 NULL
, NULL
, NULL
, 0, VM_SLEEP
, 0/*XXX*/);
1401 printf("vmem_create\n");
1404 vmem_dump(vm
, (void *)printf
);
1406 p
= vmem_add(vm
, 100, 200, VM_SLEEP
);
1407 p
= vmem_add(vm
, 2000, 1, VM_SLEEP
);
1408 p
= vmem_add(vm
, 40000, 0x10000000>>12, VM_SLEEP
);
1409 p
= vmem_add(vm
, 10000, 10000, VM_SLEEP
);
1410 p
= vmem_add(vm
, 500, 1000, VM_SLEEP
);
1411 vmem_dump(vm
, (void *)printf
);
1414 int t
= rand() % 100;
1418 vmem_size_t sz
= rand() % 500 + 1;
1420 vmem_size_t align
, phase
, nocross
;
1421 vmem_addr_t minaddr
, maxaddr
;
1426 align
= 1 << (rand() % 15);
1427 phase
= rand() % 65536;
1428 nocross
= 1 << (rand() % 15);
1429 if (align
<= phase
) {
1432 if (VMEM_CROSS_P(phase
, phase
+ sz
- 1,
1436 minaddr
= rand() % 50000;
1437 maxaddr
= rand() % 70000;
1438 if (minaddr
> maxaddr
) {
1442 printf("=== xalloc %" PRIu64
1443 " align=%" PRIu64
", phase=%" PRIu64
1444 ", nocross=%" PRIu64
", min=%" PRIu64
1445 ", max=%" PRIu64
"\n",
1452 p
= vmem_xalloc(vm
, sz
, align
, phase
, nocross
,
1453 minaddr
, maxaddr
, strat
|VM_SLEEP
);
1456 printf("=== alloc %" PRIu64
"\n", (uint64_t)sz
);
1457 p
= vmem_alloc(vm
, sz
, strat
|VM_SLEEP
);
1459 printf("-> %" PRIu64
"\n", (uint64_t)p
);
1460 vmem_dump(vm
, (void *)printf
);
1461 if (p
== VMEM_ADDR_NULL
) {
1468 reg
= realloc(reg
, sizeof(*reg
) * nreg
);
1475 } else if (nreg
!= 0) {
1477 r
= ®
[rand() % nreg
];
1478 printf("=== free %" PRIu64
", %" PRIu64
"\n",
1479 (uint64_t)r
->p
, (uint64_t)r
->sz
);
1481 vmem_xfree(vm
, r
->p
, r
->sz
);
1483 vmem_free(vm
, r
->p
, r
->sz
);
1486 vmem_dump(vm
, (void *)printf
);
1491 printf("total=%" PRIu64
"\n", (uint64_t)total
);
1493 fprintf(stderr
, "total=%" PRIu64
", nalloc=%d, nfree=%d\n",
1494 (uint64_t)total
, nalloc
, nfree
);
1497 #endif /* defined(UNITTEST) */