2 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved.
3 * Redistribution and use in source and binary forms, with or without
4 * modification, are permitted provided that the following conditions
6 * 1. Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * 2. Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 * 4. Neither the name of the University nor the names of its contributors
12 * may be used to endorse or promote products derived from this software
13 * without specific prior written permission.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
21 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting
31 * This module implements a general bitmap allocator/deallocator. The
32 * allocator eats around 2 bits per 'block'. The module does not
33 * try to interpret the meaning of a 'block' other then to return
34 * SWAPBLK_NONE on an allocation failure.
36 * A radix tree is used to maintain the bitmap. Two radix constants are
37 * involved: One for the bitmaps contained in the leaf nodes (typically
38 * 32), and one for the meta nodes (typically 16). Both meta and leaf
39 * nodes have a hint field. This field gives us a hint as to the largest
40 * free contiguous range of blocks under the node. It may contain a
41 * value that is too high, but will never contain a value that is too
42 * low. When the radix tree is searched, allocation failures in subtrees
45 * The radix tree also implements two collapsed states for meta nodes:
46 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is
47 * in either of these two states, all information contained underneath
48 * the node is considered stale. These states are used to optimize
49 * allocation and freeing operations.
51 * The hinting greatly increases code efficiency for allocations while
52 * the general radix structure optimizes both allocations and frees. The
53 * radix tree should be able to operate well no matter how much
54 * fragmentation there is and no matter how large a bitmap is used.
56 * Unlike the rlist code, the blist code wires all necessary memory at
57 * creation time. Neither allocations nor frees require interaction with
58 * the memory subsystem. In contrast, the rlist code may allocate memory
59 * on an rlist_free() call. The non-blocking features of the blist code
60 * are used to great advantage in the swap code (vm/nswap_pager.c). The
61 * rlist code uses a little less overall memory then the blist code (but
62 * due to swap interleaving not all that much less), but the blist code
63 * scales much, much better.
65 * LAYOUT: The radix tree is layed out recursively using a
66 * linear array. Each meta node is immediately followed (layed out
67 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This
68 * is a recursive structure but one that can be easily scanned through
69 * a very simple 'skip' calculation. In order to support large radixes,
70 * portions of the tree may reside outside our memory allocation. We
71 * handle this with an early-termination optimization (when bighint is
72 * set to -1) on the scan. The memory allocation is only large enough
73 * to cover the number of blocks requested at creation time even if it
74 * must be encompassed in larger root-node radix.
76 * NOTE: the allocator cannot currently allocate more then
77 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too
78 * large' if you try. This is an area that could use improvement. The
79 * radix is large enough that this restriction does not effect the swap
80 * system, though. Currently only the allocation code is effected by
81 * this algorithmic unfeature. The freeing code can handle arbitrary
84 * This code can be compiled stand-alone for debugging.
88 * NOTE: a few modifications is made in the orginal FreeBSD code:
89 * 1. change the name of some variables/constants
90 * 2. discard the code handling ALL_FREE and ALL_ALLOCATED state
91 * 3. allocate more than 32 slots won't lead to kernel panic
92 * 4. remove all the code for debugging
100 #include <util/RadixBitmap.h>
102 #define TERMINATOR -1
106 radix_bitmap_init(radix_node
*node
, uint32 radix
, uint32 skip
, uint32 slots
)
109 int32 big_hint
= radix
< slots
? radix
: slots
;
112 if (radix
== BITMAP_RADIX
) {
114 node
->big_hint
= big_hint
;
115 if (big_hint
== BITMAP_RADIX
)
118 node
->u
.bitmap
= (bitmap_t
)-1 << big_hint
;
125 node
->big_hint
= big_hint
;
126 node
->u
.available
= big_hint
;
130 uint32 next_skip
= skip
/ NODE_RADIX
;
133 for (i
= 1; i
<= skip
; i
+= next_skip
) {
134 if (slots
>= radix
) {
135 index
= i
+ radix_bitmap_init(node
? &node
[i
] : NULL
,
136 radix
, next_skip
- 1, radix
);
138 } else if (slots
> 0) {
139 index
= i
+ radix_bitmap_init(node
? &node
[i
] : NULL
,
140 radix
, next_skip
- 1, slots
);
142 } else { // add a terminator
144 node
[i
].big_hint
= TERMINATOR
;
157 radix_bitmap_create(uint32 slots
)
159 uint32 radix
= BITMAP_RADIX
;
162 while (radix
< slots
) {
164 skip
= (skip
+ 1) * NODE_RADIX
;
167 radix_bitmap
*bmp
= (radix_bitmap
*)malloc(sizeof(radix_bitmap
));
173 bmp
->free_slots
= slots
;
174 bmp
->root_size
= 1 + radix_bitmap_init(NULL
, radix
, skip
, slots
);
176 bmp
->root
= (radix_node
*)malloc(bmp
->root_size
* sizeof(radix_node
));
177 if (bmp
->root
== NULL
) {
182 radix_bitmap_init(bmp
->root
, radix
, skip
, slots
);
189 radix_bitmap_destroy(radix_bitmap
*bmp
)
197 radix_leaf_alloc(radix_node
*leaf
, radix_slot_t slotIndex
, int32 count
)
199 if (count
<= (int32
)BITMAP_RADIX
) {
200 bitmap_t bitmap
= ~leaf
->u
.bitmap
;
201 uint32 n
= BITMAP_RADIX
- count
;
202 bitmap_t mask
= (bitmap_t
)-1 >> n
;
203 for (uint32 j
= 0; j
<= n
; j
++) {
204 if ((bitmap
& mask
) == mask
) {
205 leaf
->u
.bitmap
|= mask
;
206 return (slotIndex
+ j
);
212 // we could not allocate count here, update big_hint
213 if (leaf
->big_hint
>= count
)
214 leaf
->big_hint
= count
- 1;
215 return RADIX_SLOT_NONE
;
220 radix_node_alloc(radix_node
*node
, radix_slot_t slotIndex
, int32 count
,
221 uint32 radix
, uint32 skip
)
223 uint32 next_skip
= skip
/ NODE_RADIX
;
226 for (uint32 i
= 1; i
<= skip
; i
+= next_skip
) {
227 if (node
[i
].big_hint
== TERMINATOR
) // TERMINATOR
230 if (count
<= node
[i
].big_hint
) {
231 radix_slot_t addr
= RADIX_SLOT_NONE
;
233 addr
= radix_leaf_alloc(&node
[i
], slotIndex
, count
);
235 addr
= radix_node_alloc(&node
[i
], slotIndex
, count
, radix
,
237 if (addr
!= RADIX_SLOT_NONE
) {
238 node
->u
.available
-= count
;
239 if (node
->big_hint
> node
->u
.available
)
240 node
->big_hint
= node
->u
.available
;
248 // we could not allocate count in the subtree, update big_hint
249 if (node
->big_hint
>= count
)
250 node
->big_hint
= count
- 1;
251 return RADIX_SLOT_NONE
;
256 radix_bitmap_alloc(radix_bitmap
*bmp
, uint32 count
)
258 radix_slot_t addr
= RADIX_SLOT_NONE
;
260 if (bmp
->radix
== BITMAP_RADIX
)
261 addr
= radix_leaf_alloc(bmp
->root
, 0, count
);
263 addr
= radix_node_alloc(bmp
->root
, 0, count
, bmp
->radix
, bmp
->skip
);
265 if (addr
!= RADIX_SLOT_NONE
)
266 bmp
->free_slots
-= count
;
273 radix_leaf_dealloc(radix_node
*leaf
, radix_slot_t slotIndex
, uint32 count
)
275 uint32 n
= slotIndex
& (BITMAP_RADIX
- 1);
276 bitmap_t mask
= ((bitmap_t
)-1 >> (BITMAP_RADIX
- count
- n
))
277 & ((bitmap_t
)-1 << n
);
278 leaf
->u
.bitmap
&= ~mask
;
280 leaf
->big_hint
= BITMAP_RADIX
;
285 radix_node_dealloc(radix_node
*node
, radix_slot_t slotIndex
, uint32 count
,
286 uint32 radix
, uint32 skip
, radix_slot_t index
)
288 node
->u
.available
+= count
;
290 uint32 next_skip
= skip
/ NODE_RADIX
;
293 uint32 i
= (slotIndex
- index
) / radix
;
295 i
= i
* next_skip
+ 1;
297 while (i
<= skip
&& index
< slotIndex
+ count
) {
298 uint32 v
= index
+ radix
- slotIndex
;
303 radix_leaf_dealloc(&node
[i
], slotIndex
, v
);
305 radix_node_dealloc(&node
[i
], slotIndex
, v
, radix
,
306 next_skip
- 1, index
);
308 if (node
->big_hint
< node
[i
].big_hint
)
309 node
->big_hint
= node
[i
].big_hint
;
320 radix_bitmap_dealloc(radix_bitmap
*bmp
, radix_slot_t slotIndex
, uint32 count
)
322 if (bmp
->radix
== BITMAP_RADIX
)
323 radix_leaf_dealloc(bmp
->root
, slotIndex
, count
);
325 radix_node_dealloc(bmp
->root
, slotIndex
, count
, bmp
->radix
,
328 bmp
->free_slots
+= count
;