4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #pragma ident "%Z%%M% %I% %E% SMI"
33 /* #define DEBUG ON */
37 * kmem_alloc and kmem_free must not be called from interrupt level,
38 * except from software interrupt level. This is because they are
39 * not reentrant, and only block out software interrupts. They take
40 * too long to block any real devices. There is a routine
41 * kmem_free_intr that can be used to free blocks at interrupt level,
42 * but only up to splimp, not higher. This is because kmem_free_intr
43 * only spl's to splimp.
45 * Also, these routines are not that fast, so they should not be used
46 * in very frequent operations (e.g. operations that happen more often
47 * than, say, once every few seconds).
52 * Yet another memory allocator, this one based on a method
53 * described in C.J. Stephenson, "Fast Fits", IBM Sys. Journal
55 * The basic data structure is a "Cartesian" binary tree, in which
56 * nodes are ordered by ascending addresses (thus minimizing free
57 * list insertion time) and block sizes decrease with depth in the
58 * tree (thus minimizing search time for a block of a given size).
60 * In other words, for any node s, letting D(s) denote
61 * the set of descendents of s, we have:
63 * a. addr(D(left(s))) < addr(s) < addr(D(right(s)))
64 * b. len(D(left(s))) <= len(s) >= len(D(right(s)))
67 #include <sys/param.h>
68 #include <sys/sysmacros.h>
69 #include <sys/salib.h>
71 #include <sys/promif.h>
74 * The node header structure.
76 * To reduce storage consumption, a header block is associated with
77 * free blocks only, not allocated blocks.
78 * When a free block is allocated, its header block is put on
79 * a free header block list.
81 * This creates a header space and a free block space.
82 * The left pointer of a header blocks is used to chain free header
86 typedef enum {false, true} bool;
87 typedef struct freehdr
*Freehdr
;
88 typedef struct dblk
*Dblk
;
91 * Description of a header for a free block
92 * Only free blocks have such headers.
95 Freehdr left
; /* Left tree pointer */
96 Freehdr right
; /* Right tree pointer */
97 Dblk block
; /* Ptr to the data block */
98 size_t size
; /* Size of the data block */
101 #define NIL ((Freehdr) 0)
102 #define WORDSIZE sizeof (int)
103 #define SMALLEST_BLK 1 /* Size of smallest block */
106 * Description of a data block.
109 char data
[1]; /* Addr returned to the caller */
113 * weight(x) is the size of a block, in bytes; or 0 if and only if x
114 * is a null pointer. It is the responsibility of kmem_alloc() and
115 * kmem_free() to keep zero-length blocks out of the arena.
118 #define weight(x) ((x) == NIL? 0: (x->size))
119 #define nextblk(p, size) ((Dblk) ((char *)(p) + (size)))
120 #define max(a, b) ((a) < (b)? (b): (a))
122 void *kmem_alloc(size_t, int);
123 void kmem_free(void *ptr
, size_t nbytes
);
124 Freehdr
getfreehdr(void);
125 static bool morecore(size_t);
126 void insert(Dblk p
, size_t len
, Freehdr
*tree
);
127 void freehdr(Freehdr p
);
128 void delete(Freehdr
*p
);
129 static void check_need_to_free(void);
130 extern caddr_t
resalloc(enum RESOURCES
, size_t, caddr_t
, int);
131 extern int splnet(void);
132 extern int splimp(void);
133 extern void splx(int);
136 * Structure containing various info about allocated memory.
138 #define NEED_TO_FREE_SIZE 5
141 Freehdr free_hdr_list
;
145 struct need_to_free
{
148 } need_to_free_list
, need_to_free
[NEED_TO_FREE_SIZE
];
152 struct map
*kernelmap
;
155 static void prtree(Freehdr
, char *);
159 * Initialize kernel memory allocator
166 struct need_to_free
*ntf
;
169 printf("kmem_init entered\n");
173 kmem_info
.free_root
= NIL
;
174 kmem_info
.free_hdr_list
= NULL
;
175 kmem_info
.map
= kernelmap
;
176 kmem_info
.need_to_free_list
.addr
= 0;
177 ntf
= kmem_info
.need_to_free
;
178 for (i
= 0; i
< NEED_TO_FREE_SIZE
; i
++) {
182 printf("kmem_init returning\n");
183 prtree(kmem_info
.free_root
, "kmem_init");
188 * Insert a new node in a cartesian tree or subtree, placing it
189 * in the correct position with respect to the existing nodes.
192 * Starting from the root, a binary search is made for the new
193 * node. If this search were allowed to continue, it would
194 * eventually fail (since there cannot already be a node at the
195 * given address); but in fact it stops when it reaches a node in
196 * the tree which has a length less than that of the new node (or
197 * when it reaches a null tree pointer). The new node is then
198 * inserted at the root of the subtree for which the shorter node
199 * forms the old root (or in place of the null pointer).
204 insert(Dblk p
, /* Ptr to the block to insert */
205 size_t len
, /* Length of new node */
206 Freehdr
*tree
) /* Address of ptr to root */
209 Freehdr
*left_hook
; /* Temp for insertion */
210 Freehdr
*right_hook
; /* Temp for insertion */
215 * Search for the first node which has a weight less
216 * than that of the new node; this will be the
217 * point at which we insert the new node.
220 while (weight(x
) >= len
) {
229 * Perform root insertion. The variable x traces a path through
230 * the tree, and with the help of left_hook and right_hook,
231 * rewrites all links that cross the territory occupied
232 * by p. Note that this improves performance under
236 newhdr
= getfreehdr();
238 left_hook
= &newhdr
->left
;
239 right_hook
= &newhdr
->right
;
249 * The name 'left_hook' is somewhat confusing, since
250 * it is always set to the address of a .right link
251 * field. However, its value is always an address
252 * below (i.e., to the left of) p. Similarly
253 * for right_hook. The values of left_hook and
254 * right_hook converge toward the value of p,
255 * as in a classical binary search.
259 * rewrite link crossing from the left
262 left_hook
= &x
->right
;
266 * rewrite link crossing from the right
269 right_hook
= &x
->left
;
274 *left_hook
= *right_hook
= NIL
; /* clear remaining hooks */
280 * Delete a node from a cartesian tree. p is the address of
281 * a pointer to the node which is to be deleted.
284 * The left and right sons of the node to be deleted define two
285 * subtrees which are to be merged and attached in place of the
286 * deleted node. Each node on the inside edges of these two
287 * subtrees is examined and longer nodes are placed above the
291 * *p is assumed to be non-null.
298 Freehdr left_branch
; /* left subtree of deleted node */
299 Freehdr right_branch
; /* right subtree of deleted node */
302 left_branch
= x
->left
;
303 right_branch
= x
->right
;
305 while (left_branch
!= right_branch
) {
307 * iterate until left branch and right branch are
310 if (weight(left_branch
) >= weight(right_branch
)) {
312 * promote the left branch
315 p
= &left_branch
->right
;
316 left_branch
= left_branch
->right
;
319 * promote the right branch
322 p
= &right_branch
->left
;
323 right_branch
= right_branch
->left
;
332 * Demote a node in a cartesian tree, if necessary, to establish
333 * the required vertical ordering.
336 * The left and right subtrees of the node to be demoted are to
337 * be partially merged and attached in place of the demoted node.
338 * The nodes on the inside edges of these two subtrees are
339 * examined and the longer nodes are placed above the shorter
340 * ones, until a node is reached which has a length no greater
341 * than that of the node being demoted (or until a null pointer
342 * is reached). The node is then attached at this point, and
343 * the remaining subtrees (if any) become its descendants.
346 * a. All the nodes in the tree, including the one to be demoted,
347 * must be correctly ordered horizontally;
348 * b. All the nodes except the one to be demoted must also be
349 * correctly positioned vertically. The node to be demoted
350 * may be already correctly positioned vertically, or it may
351 * have a length which is less than that of one or both of
360 Freehdr x
; /* addr of node to be demoted */
362 Freehdr right_branch
;
366 left_branch
= x
->left
;
367 right_branch
= x
->right
;
370 while (weight(left_branch
) > wx
|| weight(right_branch
) > wx
) {
372 * select a descendant branch for promotion
374 if (weight(left_branch
) >= weight(right_branch
)) {
376 * promote the left branch
379 p
= &left_branch
->right
;
383 * promote the right branch
386 p
= &right_branch
->left
;
391 *p
= x
; /* attach demoted node here */
392 x
->left
= left_branch
;
393 x
->right
= right_branch
;
397 * Allocate a block of storage
400 * The freelist is searched by descending the tree from the root
401 * so that at each decision point the "better fitting" child node
402 * is chosen (i.e., the shorter one, if it is long enough, or
403 * the longer one, otherwise). The descent stops when both
404 * child nodes are too short.
407 * kmem_alloc returns a pointer to the allocated block; a null
408 * pointer indicates storage could not be allocated.
411 * We need to return blocks that are on word boundaries so that callers
412 * that are putting int's into the area will work. Since we allow
413 * arbitrary free'ing, we need a weight function that considers
414 * free blocks starting on an odd boundary special. Allocation is
415 * aligned to 8 byte boundaries (ALIGN).
417 #define ALIGN 8 /* doubleword aligned .. */
418 #define ALIGNMASK (ALIGN-1)
419 #define ALIGNMORE(addr) (ALIGN - ((uintptr_t)(addr) & ALIGNMASK))
422 * If it is empty then weight == 0
423 * If it is aligned then weight == size
425 * if not enough room to align then weight == 0
426 * else weight == aligned size
428 #define mweight(x) ((x) == NIL ? 0 : \
429 ((((uintptr_t)(x)->block) & ALIGNMASK) == 0 ? (x)->size : \
430 (((x)->size <= ALIGNMORE((x)->block)) ? 0 : \
431 (x)->size - ALIGNMORE((x)->block))))
435 kmem_alloc(size_t nbytes
, int kmflag
)
437 Freehdr a
; /* ptr to node to be allocated */
438 Freehdr
*p
; /* address of ptr to node */
443 char *retblock
; /* Address returned to the user */
446 printf("kmem_alloc(nbytes 0x%lx)\n", nbytes
);
454 if (nbytes
< SMALLEST_BLK
) {
455 printf("illegal kmem_alloc call for %lx bytes\n", nbytes
);
456 prom_panic("kmem_alloc");
458 check_need_to_free();
461 * ensure that at least one block is big enough to satisfy
465 if (mweight(kmem_info
.free_root
) <= nbytes
) {
467 * the largest block is not enough.
469 if (!morecore(nbytes
)) {
470 printf("kmem_alloc failed, nbytes %lx\n", nbytes
);
471 prom_panic("kmem_alloc");
476 * search down through the tree until a suitable block is
477 * found. At each decision point, select the better
481 p
= (Freehdr
*) &kmem_info
.free_root
;
484 right_son
= a
->right
;
485 left_weight
= mweight(left_son
);
486 right_weight
= mweight(right_son
);
488 while (left_weight
>= nbytes
|| right_weight
>= nbytes
) {
489 if (left_weight
<= right_weight
) {
490 if (left_weight
>= nbytes
) {
498 if (right_weight
>= nbytes
) {
507 right_son
= a
->right
;
508 left_weight
= mweight(left_son
);
509 right_weight
= mweight(right_son
);
513 * allocate storage from the selected node.
516 if (a
->size
- nbytes
< SMALLEST_BLK
) {
518 * not big enough to split; must leave at least
519 * a dblk's worth of space.
521 retblock
= a
->block
->data
;
526 * split the node, allocating nbytes from the top.
527 * Remember we've already accounted for the
528 * allocated node's header space.
532 if ((uintptr_t)a
->block
->data
& ALIGNMASK
) {
534 if (a
->size
<= ALIGNMORE(a
->block
->data
))
535 prom_panic("kmem_alloc: short block allocated");
536 size
= nbytes
+ ALIGNMORE(a
->block
->data
);
538 x
->size
= ALIGNMORE(a
->block
->data
);
542 * the node pointed to by *p has become smaller;
543 * move it down to its appropriate place in
548 retblock
= a
->block
->data
+ ALIGNMORE(a
->block
->data
);
549 if (a
->size
> size
) {
550 kmem_free((caddr_t
)nextblk(a
->block
, size
),
551 (size_t)(a
->size
- size
));
555 x
->block
= nextblk(a
->block
, nbytes
);
556 x
->size
= a
->size
- nbytes
;
560 * the node pointed to by *p has become smaller;
561 * move it down to its appropriate place in
566 retblock
= a
->block
->data
;
571 prtree(kmem_info
.free_root
, "kmem_alloc");
575 bzero(retblock
, nbytes
);
577 printf("kmem_alloc bzero complete - returning %p\n", retblock
);
584 * Return a block to the free space tree.
587 * Starting at the root, search for and coalesce free blocks
588 * adjacent to one given. When the appropriate place in the
589 * tree is found, insert the given block.
591 * Do some sanity checks to avoid total confusion in the tree.
592 * If the block has already been freed, prom_panic.
593 * If the ptr is not from the arena, prom_panic.
596 kmem_free(void *ptr
, size_t nbytes
)
598 Freehdr
*np
; /* For deletion from free list */
599 Freehdr neighbor
; /* Node to be coalesced */
600 char *neigh_block
; /* Ptr to potential neighbor */
601 size_t neigh_size
; /* Size of potential neighbor */
605 printf("kmem_free (ptr %p nbytes %lx)\n", ptr
, nbytes
);
606 prtree(kmem_info
.free_root
, "kmem_free");
614 prom_panic("kmem_free of 0");
619 * Search the tree for the correct insertion point for this
620 * node, coalescing adjacent free blocks along the way.
622 np
= &kmem_info
.free_root
;
624 while (neighbor
!= NIL
) {
625 neigh_block
= (char *)neighbor
->block
;
626 neigh_size
= neighbor
->size
;
627 if ((char *)ptr
< neigh_block
) {
628 if ((char *)ptr
+ nbytes
== neigh_block
) {
630 * Absorb and delete right neighbor
632 nbytes
+= neigh_size
;
634 } else if ((char *)ptr
+ nbytes
> neigh_block
) {
636 * The block being freed overlaps
637 * another block in the tree. This
640 printf("kmem_free: free block overlap %p+%lx"
641 " over %p\n", (void *)ptr
, nbytes
,
642 (void *)neigh_block
);
643 prom_panic("kmem_free: free block overlap");
648 np
= &neighbor
->left
;
650 } else if ((char *)ptr
> neigh_block
) {
651 if (neigh_block
+ neigh_size
== ptr
) {
653 * Absorb and delete left neighbor
656 nbytes
+= neigh_size
;
658 } else if (neigh_block
+ neigh_size
> (char *)ptr
) {
660 * This block has already been freed
662 prom_panic("kmem_free block already free");
665 * search to the right
667 np
= &neighbor
->right
;
671 * This block has already been freed
672 * as "ptr == neigh_block"
674 prom_panic("kmem_free: block already free as neighbor");
681 * Insert the new node into the free space tree
683 insert((Dblk
) ptr
, nbytes
, &kmem_info
.free_root
);
685 printf("exiting kmem_free\n");
686 prtree(kmem_info
.free_root
, "kmem_free");
692 * Sigh. We include a header file which the kernel
693 * uses to declare (one of its many) kmem_free prototypes.
694 * In order not to use the kernel's namespace, then, we must
695 * define another name here for use by boot.
698 bkmem_alloc(size_t size
)
700 return (kmem_alloc(size
, 0));
704 * Boot's kmem_alloc is really kmem_zalloc().
707 bkmem_zalloc(size_t size
)
709 return (kmem_alloc(size
, 0));
713 bkmem_free(void *p
, size_t bytes
)
719 check_need_to_free(void)
722 struct need_to_free
*ntf
;
729 ntf
= &kmem_info
.need_to_free_list
;
732 nbytes
= ntf
->nbytes
;
733 *ntf
= *(struct need_to_free
*)ntf
->addr
;
735 kmem_free(addr
, nbytes
);
738 ntf
= kmem_info
.need_to_free
;
739 for (i
= 0; i
< NEED_TO_FREE_SIZE
; i
++) {
742 nbytes
= ntf
[i
].nbytes
;
745 kmem_free(addr
, nbytes
);
753 * Add a block of at least nbytes to the free space tree.
756 * true if at least nbytes can be allocated
760 * free space (delimited by the static variable ubound) is
761 * extended by an amount determined by rounding nbytes up to
762 * a multiple of the system page size.
766 morecore(size_t nbytes
)
768 enum RESOURCES type
= RES_BOOTSCRATCH
;
771 printf("morecore(nbytes 0x%lx)\n", nbytes
);
775 nbytes
= roundup(nbytes
, PAGESIZE
);
776 p
= (Dblk
) resalloc(type
, nbytes
, (caddr_t
)0, 0);
780 kmem_free((caddr_t
)p
, nbytes
);
782 printf("morecore() returing, p = %p\n", p
);
789 * Get a free block header
790 * There is a list of available free block headers.
791 * When the list is empty, allocate another pagefull.
799 printf("getfreehdr()\n");
802 if (kmem_info
.free_hdr_list
!= NIL
) {
803 r
= kmem_info
.free_hdr_list
;
804 kmem_info
.free_hdr_list
= kmem_info
.free_hdr_list
->left
;
806 r
= (Freehdr
)resalloc(RES_BOOTSCRATCH
, PAGESIZE
, (caddr_t
)0, 0);
808 prom_panic("getfreehdr");
810 for (n
= 1; n
< PAGESIZE
/ sizeof (*r
); n
++) {
815 printf("getfreehdr: freed %x headers\n", n
);
816 printf("getfreehdr: returning %p\n", r
);
822 * Free a free block header
823 * Add it to the list of available headers.
830 printf("freehdr(%p)\n", p
);
832 p
->left
= kmem_info
.free_hdr_list
;
835 kmem_info
.free_hdr_list
= p
;
840 * Diagnostic routines
842 static int depth
= 0;
845 prtree(Freehdr p
, char *cp
)
849 printf("prtree(p %p cp %s)\n", p
, cp
);
853 prtree(p
->left
, NULL
);
856 for (n
= 0; n
< depth
; n
++) {
860 "(%p): (left = %p, right = %p, block = %p, size = %lx)\n",
861 p
, p
->left
, p
->right
, p
->block
, p
->size
);
864 prtree(p
->right
, NULL
);