1 /* $NetBSD: uvm_pglist.c,v 1.44 2009/03/09 09:53:55 reinoud Exp $ */
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
34 * uvm_pglist.c: pglist functions
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.44 2009/03/09 09:53:55 reinoud Exp $");
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/malloc.h>
46 #include <uvm/uvm_pdpolicy.h>
48 #ifdef VM_PAGE_ALLOC_MEMORY_STATS
49 #define STAT_INCR(v) (v)++
50 #define STAT_DECR(v) do { \
52 printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \
55 } while (/*CONSTCOND*/ 0)
56 u_long uvm_pglistalloc_npages
;
63 * uvm_pglistalloc: allocate a list of pages
65 * => allocated pages are placed onto an rlist. rlist is
66 * initialized by uvm_pglistalloc.
67 * => returns 0 on success or errno on failure
68 * => implementation allocates a single segment if any constraints are
69 * imposed by call arguments.
70 * => doesn't take into account clean non-busy pages on inactive list
71 * that could be used(?)
73 * size the size of the allocation, rounded to page size.
74 * low the low address of the allowed allocation range.
75 * high the high address of the allowed allocation range.
76 * alignment memory must be aligned to this power-of-two boundary.
77 * boundary no segment in the allocation may cross this
78 * power-of-two boundary (relative to zero).
82 uvm_pglist_add(struct vm_page
*pg
, struct pglist
*rlist
)
84 int free_list
, color
, pgflidx
;
89 KASSERT(mutex_owned(&uvm_fpageqlock
));
92 #error uvm_pglistalloc needs to be updated
95 free_list
= uvm_page_lookup_freelist(pg
);
96 color
= VM_PGCOLOR_BUCKET(pg
);
97 pgflidx
= (pg
->flags
& PG_ZERO
) ? PGFL_ZEROS
: PGFL_UNKNOWN
;
99 for (tp
= LIST_FIRST(&uvm
.page_free
[
100 free_list
].pgfl_buckets
[color
].pgfl_queues
[pgflidx
]);
102 tp
= LIST_NEXT(tp
, pageq
.list
)) {
107 panic("uvm_pglistalloc: page not on freelist");
109 LIST_REMOVE(pg
, pageq
.list
); /* global */
110 LIST_REMOVE(pg
, listq
.list
); /* cpu */
112 if (pg
->flags
& PG_ZERO
)
114 VM_FREE_PAGE_TO_CPU(pg
)->pages
[pgflidx
]--;
115 pg
->flags
= PG_CLEAN
;
119 TAILQ_INSERT_TAIL(rlist
, pg
, pageq
.queue
);
120 STAT_INCR(uvm_pglistalloc_npages
);
124 uvm_pglistalloc_c_ps(struct vm_physseg
*ps
, int num
, paddr_t low
, paddr_t high
,
125 paddr_t alignment
, paddr_t boundary
, struct pglist
*rlist
)
127 int try, limit
, tryidx
, end
, idx
;
131 paddr_t idxpa
, lastidxpa
;
132 int cidx
= 0; /* XXX: GCC */
134 #ifdef PGALLOC_VERBOSE
135 printf("pgalloc: contig %d pgs from psi %ld\n", num
,
136 (long)(ps
- vm_physmem
));
139 KASSERT(mutex_owned(&uvm_fpageqlock
));
141 try = roundup(max(atop(low
), ps
->avail_start
), atop(alignment
));
142 limit
= min(atop(high
), ps
->avail_end
);
143 pagemask
= ~((boundary
>> PAGE_SHIFT
) - 1);
146 if (try + num
> limit
) {
148 * We've run past the allowable range.
150 return (0); /* FAIL */
153 ((try ^ (try + num
- 1)) & pagemask
) != 0) {
155 * Region crosses boundary. Jump to the boundary
156 * just crossed and ensure alignment.
158 try = (try + num
- 1) & pagemask
;
159 try = roundup(try, atop(alignment
));
164 * Make sure this is a managed physical page.
167 if (vm_physseg_find(try, &cidx
) != ps
- vm_physmem
)
168 panic("pgalloc contig: botch1");
169 if (cidx
!= try - ps
->start
)
170 panic("pgalloc contig: botch2");
171 if (vm_physseg_find(try + num
- 1, &cidx
) != ps
- vm_physmem
)
172 panic("pgalloc contig: botch3");
173 if (cidx
!= try - ps
->start
+ num
- 1)
174 panic("pgalloc contig: botch4");
176 tryidx
= try - ps
->start
;
181 * Found a suitable starting page. See if the range is free.
183 for (idx
= tryidx
; idx
< end
; idx
++) {
184 if (VM_PAGE_IS_FREE(&pgs
[idx
]) == 0)
188 idxpa
= VM_PAGE_TO_PHYS(&pgs
[idx
]);
190 lastidxpa
= VM_PAGE_TO_PHYS(&pgs
[idx
- 1]);
191 if ((lastidxpa
+ PAGE_SIZE
) != idxpa
) {
193 * Region not contiguous.
195 panic("pgalloc contig: botch5");
198 ((lastidxpa
^ idxpa
) & ~(boundary
- 1))
201 * Region crosses boundary.
203 panic("pgalloc contig: botch6");
211 try += atop(alignment
);
215 * we have a chunk of memory that conforms to the requested constraints.
219 uvm_pglist_add(&pgs
[idx
++], rlist
);
221 #ifdef PGALLOC_VERBOSE
222 printf("got %d pgs\n", num
);
224 return (num
); /* number of pages allocated */
228 uvm_pglistalloc_contig(int num
, paddr_t low
, paddr_t high
, paddr_t alignment
,
229 paddr_t boundary
, struct pglist
*rlist
)
232 struct vm_physseg
*ps
;
235 /* Default to "lose". */
239 * Block all memory allocation and lock the free list.
241 mutex_spin_enter(&uvm_fpageqlock
);
243 /* Are there even any free pages? */
244 if (uvmexp
.free
<= (uvmexp
.reserve_pagedaemon
+ uvmexp
.reserve_kernel
))
247 for (fl
= 0; fl
< VM_NFREELIST
; fl
++) {
248 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
249 for (psi
= vm_nphysseg
- 1 ; psi
>= 0 ; psi
--)
251 for (psi
= 0 ; psi
< vm_nphysseg
; psi
++)
254 ps
= &vm_physmem
[psi
];
256 if (ps
->free_list
!= fl
)
259 num
-= uvm_pglistalloc_c_ps(ps
, num
, low
, high
,
260 alignment
, boundary
, rlist
);
262 #ifdef PGALLOC_VERBOSE
263 printf("pgalloc: %"PRIxMAX
"-%"PRIxMAX
"\n",
264 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist
)),
265 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist
, pglist
)));
275 * check to see if we need to generate some free pages waking
280 mutex_spin_exit(&uvm_fpageqlock
);
285 uvm_pglistalloc_s_ps(struct vm_physseg
*ps
, int num
, paddr_t low
, paddr_t high
,
286 struct pglist
*rlist
)
288 int todo
, limit
, try;
291 int cidx
= 0; /* XXX: GCC */
293 #ifdef PGALLOC_VERBOSE
294 printf("pgalloc: simple %d pgs from psi %ld\n", num
,
295 (long)(ps
- vm_physmem
));
298 KASSERT(mutex_owned(&uvm_fpageqlock
));
301 limit
= min(atop(high
), ps
->avail_end
);
303 for (try = max(atop(low
), ps
->avail_start
);
304 try < limit
; try ++) {
306 if (vm_physseg_find(try, &cidx
) != ps
- vm_physmem
)
307 panic("pgalloc simple: botch1");
308 if (cidx
!= (try - ps
->start
))
309 panic("pgalloc simple: botch2");
311 pg
= &ps
->pgs
[try - ps
->start
];
312 if (VM_PAGE_IS_FREE(pg
) == 0)
315 uvm_pglist_add(pg
, rlist
);
320 #ifdef PGALLOC_VERBOSE
321 printf("got %d pgs\n", num
- todo
);
323 return (num
- todo
); /* number of pages allocated */
327 uvm_pglistalloc_simple(int num
, paddr_t low
, paddr_t high
,
328 struct pglist
*rlist
, int waitok
)
331 struct vm_physseg
*ps
;
333 /* Default to "lose". */
338 * Block all memory allocation and lock the free list.
340 mutex_spin_enter(&uvm_fpageqlock
);
342 /* Are there even any free pages? */
343 if (uvmexp
.free
<= (uvmexp
.reserve_pagedaemon
+ uvmexp
.reserve_kernel
))
346 for (fl
= 0; fl
< VM_NFREELIST
; fl
++) {
347 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
348 for (psi
= vm_nphysseg
- 1 ; psi
>= 0 ; psi
--)
350 for (psi
= 0 ; psi
< vm_nphysseg
; psi
++)
353 ps
= &vm_physmem
[psi
];
355 if (ps
->free_list
!= fl
)
358 num
-= uvm_pglistalloc_s_ps(ps
, num
, low
, high
, rlist
);
369 * check to see if we need to generate some free pages waking
374 mutex_spin_exit(&uvm_fpageqlock
);
378 /* XXX perhaps some time limitation? */
380 printf("pglistalloc waiting\n");
382 uvm_wait("pglalloc");
385 uvm_pglistfree(rlist
);
387 #ifdef PGALLOC_VERBOSE
389 printf("pgalloc: %"PRIxMAX
"..%"PRIxMAX
"\n",
390 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_FIRST(rlist
)),
391 (uintmax_t) VM_PAGE_TO_PHYS(TAILQ_LAST(rlist
, pglist
)));
397 uvm_pglistalloc(psize_t size
, paddr_t low
, paddr_t high
, paddr_t alignment
,
398 paddr_t boundary
, struct pglist
*rlist
, int nsegs
, int waitok
)
402 KASSERT((alignment
& (alignment
- 1)) == 0);
403 KASSERT((boundary
& (boundary
- 1)) == 0);
406 * Our allocations are always page granularity, so our alignment
409 if (alignment
< PAGE_SIZE
)
410 alignment
= PAGE_SIZE
;
411 if (boundary
!= 0 && boundary
< size
)
413 num
= atop(round_page(size
));
414 low
= roundup(low
, alignment
);
418 if ((nsegs
< size
>> PAGE_SHIFT
) || (alignment
!= PAGE_SIZE
) ||
420 res
= uvm_pglistalloc_contig(num
, low
, high
, alignment
,
423 res
= uvm_pglistalloc_simple(num
, low
, high
, rlist
, waitok
);
429 * uvm_pglistfree: free a list of pages
431 * => pages should already be unmapped
435 uvm_pglistfree(struct pglist
*list
)
437 struct uvm_cpu
*ucpu
;
439 int index
, color
, queue
;
443 * Lock the free list and free each page.
446 mutex_spin_enter(&uvm_fpageqlock
);
447 ucpu
= curcpu()->ci_data
.cpu_uvm
;
448 while ((pg
= TAILQ_FIRST(list
)) != NULL
) {
449 KASSERT(!uvmpdpol_pageisqueued_p(pg
));
450 TAILQ_REMOVE(list
, pg
, pageq
.queue
);
451 iszero
= (pg
->flags
& PG_ZERO
);
452 pg
->pqflags
= PQ_FREE
;
454 pg
->uobject
= (void *)0xdeadbeef;
455 pg
->uanon
= (void *)0xdeadbeef;
459 uvm_pagezerocheck(pg
);
461 index
= uvm_page_lookup_freelist(pg
);
462 color
= VM_PGCOLOR_BUCKET(pg
);
463 queue
= iszero
? PGFL_ZEROS
: PGFL_UNKNOWN
;
464 pg
->offset
= (uintptr_t)ucpu
;
465 LIST_INSERT_HEAD(&uvm
.page_free
[index
].pgfl_buckets
[color
].
466 pgfl_queues
[queue
], pg
, pageq
.list
);
467 LIST_INSERT_HEAD(&ucpu
->page_free
[index
].pgfl_buckets
[color
].
468 pgfl_queues
[queue
], pg
, listq
.list
);
472 ucpu
->pages
[queue
]++;
473 STAT_DECR(uvm_pglistalloc_npages
);
475 if (ucpu
->pages
[PGFL_ZEROS
] < ucpu
->pages
[PGFL_UNKNOWN
])
476 ucpu
->page_idle_zero
= vm_page_zero_enable
;
477 mutex_spin_exit(&uvm_fpageqlock
);