1 /* $NetBSD: uvm_page.c,v 1.151 2009/08/18 19:08:39 thorpej Exp $ */
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Charles D. Cranor,
23 * Washington University, the University of California, Berkeley and
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94
42 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
45 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
46 * All rights reserved.
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
58 * Carnegie Mellon requests users of this software to return to
60 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
61 * School of Computer Science
62 * Carnegie Mellon University
63 * Pittsburgh PA 15213-3890
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
70 * uvm_page.c: page ops.
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.151 2009/08/18 19:08:39 thorpej Exp $");
77 #include "opt_uvmhist.h"
78 #include "opt_readahead.h"
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/malloc.h>
83 #include <sys/sched.h>
84 #include <sys/kernel.h>
85 #include <sys/vnode.h>
87 #include <sys/atomic.h>
91 #include <uvm/uvm_ddb.h>
92 #include <uvm/uvm_pdpolicy.h>
95 * global vars... XXXCDC: move to uvm. structure.
99 * physical memory config is stored in vm_physmem.
102 struct vm_physseg vm_physmem
[VM_PHYSSEG_MAX
]; /* XXXCDC: uvm.physmem */
103 int vm_nphysseg
= 0; /* XXXCDC: uvm.nphysseg */
106 * Some supported CPUs in a given architecture don't support all
107 * of the things necessary to do idle page zero'ing efficiently.
108 * We therefore provide a way to disable it from machdep code here.
111 * XXX disabled until we can find a way to do this without causing
112 * problems for either CPU caches or DMA latency.
114 bool vm_page_zero_enable
= false;
117 * number of pages per-CPU to reserve for the kernel.
119 int vm_page_reserve_kernel
= 5;
122 * physical memory size;
131 * these variables record the values returned by vm_page_bootstrap,
132 * for debugging purposes. The implementation of uvm_pageboot_alloc
133 * and pmap_startup here also uses them internally.
136 static vaddr_t virtual_space_start
;
137 static vaddr_t virtual_space_end
;
140 * we allocate an initial number of page colors in uvm_page_init(),
141 * and remember them. We may re-color pages as cache sizes are
142 * discovered during the autoconfiguration phase. But we can never
143 * free the initial set of buckets, since they are allocated using
144 * uvm_pageboot_alloc().
147 static bool have_recolored_pages
/* = false */;
149 MALLOC_DEFINE(M_VMPAGE
, "VM page", "VM page");
152 vaddr_t uvm_zerocheckkva
;
159 static void uvm_pageinsert(struct vm_page
*);
160 static void uvm_pageremove(struct vm_page
*);
163 * per-object tree of pages
167 uvm_page_compare_nodes(const struct rb_node
*n1
, const struct rb_node
*n2
)
169 const struct vm_page
*pg1
= (const void *)n1
;
170 const struct vm_page
*pg2
= (const void *)n2
;
171 const voff_t a
= pg1
->offset
;
172 const voff_t b
= pg2
->offset
;
182 uvm_page_compare_key(const struct rb_node
*n
, const void *key
)
184 const struct vm_page
*pg
= (const void *)n
;
185 const voff_t a
= pg
->offset
;
186 const voff_t b
= *(const voff_t
*)key
;
195 const struct rb_tree_ops uvm_page_tree_ops
= {
196 .rbto_compare_nodes
= uvm_page_compare_nodes
,
197 .rbto_compare_key
= uvm_page_compare_key
,
205 * uvm_pageinsert: insert a page in the object.
207 * => caller must lock object
208 * => caller must lock page queues
209 * => call should have already set pg's object and offset pointers
210 * and bumped the version counter
214 uvm_pageinsert_list(struct uvm_object
*uobj
, struct vm_page
*pg
,
215 struct vm_page
*where
)
218 KASSERT(uobj
== pg
->uobject
);
219 KASSERT(mutex_owned(&uobj
->vmobjlock
));
220 KASSERT((pg
->flags
& PG_TABLED
) == 0);
221 KASSERT(where
== NULL
|| (where
->flags
& PG_TABLED
));
222 KASSERT(where
== NULL
|| (where
->uobject
== uobj
));
224 if (UVM_OBJ_IS_VNODE(uobj
)) {
225 if (uobj
->uo_npages
== 0) {
226 struct vnode
*vp
= (struct vnode
*)uobj
;
230 if (UVM_OBJ_IS_VTEXT(uobj
)) {
231 atomic_inc_uint(&uvmexp
.execpages
);
233 atomic_inc_uint(&uvmexp
.filepages
);
235 } else if (UVM_OBJ_IS_AOBJ(uobj
)) {
236 atomic_inc_uint(&uvmexp
.anonpages
);
240 TAILQ_INSERT_AFTER(&uobj
->memq
, where
, pg
, listq
.queue
);
242 TAILQ_INSERT_TAIL(&uobj
->memq
, pg
, listq
.queue
);
243 pg
->flags
|= PG_TABLED
;
249 uvm_pageinsert_tree(struct uvm_object
*uobj
, struct vm_page
*pg
)
253 KASSERT(uobj
== pg
->uobject
);
254 success
= rb_tree_insert_node(&uobj
->rb_tree
, &pg
->rb_node
);
259 uvm_pageinsert(struct vm_page
*pg
)
261 struct uvm_object
*uobj
= pg
->uobject
;
263 uvm_pageinsert_tree(uobj
, pg
);
264 uvm_pageinsert_list(uobj
, pg
, NULL
);
268 * uvm_page_remove: remove page from object.
270 * => caller must lock object
271 * => caller must lock page queues
275 uvm_pageremove_list(struct uvm_object
*uobj
, struct vm_page
*pg
)
278 KASSERT(uobj
== pg
->uobject
);
279 KASSERT(mutex_owned(&uobj
->vmobjlock
));
280 KASSERT(pg
->flags
& PG_TABLED
);
282 if (UVM_OBJ_IS_VNODE(uobj
)) {
283 if (uobj
->uo_npages
== 1) {
284 struct vnode
*vp
= (struct vnode
*)uobj
;
288 if (UVM_OBJ_IS_VTEXT(uobj
)) {
289 atomic_dec_uint(&uvmexp
.execpages
);
291 atomic_dec_uint(&uvmexp
.filepages
);
293 } else if (UVM_OBJ_IS_AOBJ(uobj
)) {
294 atomic_dec_uint(&uvmexp
.anonpages
);
297 /* object should be locked */
299 TAILQ_REMOVE(&uobj
->memq
, pg
, listq
.queue
);
300 pg
->flags
&= ~PG_TABLED
;
305 uvm_pageremove_tree(struct uvm_object
*uobj
, struct vm_page
*pg
)
308 KASSERT(uobj
== pg
->uobject
);
309 rb_tree_remove_node(&uobj
->rb_tree
, &pg
->rb_node
);
313 uvm_pageremove(struct vm_page
*pg
)
315 struct uvm_object
*uobj
= pg
->uobject
;
317 uvm_pageremove_tree(uobj
, pg
);
318 uvm_pageremove_list(uobj
, pg
);
322 uvm_page_init_buckets(struct pgfreelist
*pgfl
)
326 for (color
= 0; color
< uvmexp
.ncolors
; color
++) {
327 for (i
= 0; i
< PGFL_NQUEUES
; i
++) {
328 LIST_INIT(&pgfl
->pgfl_buckets
[color
].pgfl_queues
[i
]);
334 * uvm_page_init: init the page system. called from uvm_init().
336 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
340 uvm_page_init(vaddr_t
*kvm_startp
, vaddr_t
*kvm_endp
)
342 vsize_t freepages
, pagecount
, bucketcount
, n
;
343 struct pgflbucket
*bucketarray
, *cpuarray
;
344 struct vm_page
*pagearray
;
350 CTASSERT(sizeof(pagearray
->offset
) >= sizeof(struct uvm_cpu
*));
353 * init the page queues and page queue locks, except the free
354 * list; we allocate that later (with the initial vm_page
358 curcpu()->ci_data
.cpu_uvm
= &uvm
.cpus
[0];
361 mutex_init(&uvm_pageqlock
, MUTEX_DRIVER
, IPL_NONE
);
362 mutex_init(&uvm_fpageqlock
, MUTEX_DRIVER
, IPL_VM
);
365 * allocate vm_page structures.
370 * before calling this function the MD code is expected to register
371 * some free RAM with the uvm_page_physload() function. our job
372 * now is to allocate vm_page structures for this memory.
375 if (vm_nphysseg
== 0)
376 panic("uvm_page_bootstrap: no memory pre-allocated");
379 * first calculate the number of free pages...
381 * note that we use start/end rather than avail_start/avail_end.
382 * this allows us to allocate extra vm_page structures in case we
383 * want to return some memory to the pool after booting.
387 for (lcv
= 0 ; lcv
< vm_nphysseg
; lcv
++)
388 freepages
+= (vm_physmem
[lcv
].end
- vm_physmem
[lcv
].start
);
391 * Let MD code initialize the number of colors, or default
392 * to 1 color if MD code doesn't care.
394 if (uvmexp
.ncolors
== 0)
396 uvmexp
.colormask
= uvmexp
.ncolors
- 1;
399 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
400 * use. for each page of memory we use we need a vm_page structure.
401 * thus, the total number of pages we can use is the total size of
402 * the memory divided by the PAGE_SIZE plus the size of the vm_page
403 * structure. we add one to freepages as a fudge factor to avoid
404 * truncation errors (since we can only allocate in terms of whole
408 bucketcount
= uvmexp
.ncolors
* VM_NFREELIST
;
409 pagecount
= ((freepages
+ 1) << PAGE_SHIFT
) /
410 (PAGE_SIZE
+ sizeof(struct vm_page
));
412 bucketarray
= (void *)uvm_pageboot_alloc((bucketcount
*
413 sizeof(struct pgflbucket
) * 2) + (pagecount
*
414 sizeof(struct vm_page
)));
415 cpuarray
= bucketarray
+ bucketcount
;
416 pagearray
= (struct vm_page
*)(bucketarray
+ bucketcount
* 2);
418 for (lcv
= 0; lcv
< VM_NFREELIST
; lcv
++) {
419 uvm
.page_free
[lcv
].pgfl_buckets
=
420 (bucketarray
+ (lcv
* uvmexp
.ncolors
));
421 uvm_page_init_buckets(&uvm
.page_free
[lcv
]);
422 uvm
.cpus
[0].page_free
[lcv
].pgfl_buckets
=
423 (cpuarray
+ (lcv
* uvmexp
.ncolors
));
424 uvm_page_init_buckets(&uvm
.cpus
[0].page_free
[lcv
]);
426 memset(pagearray
, 0, pagecount
* sizeof(struct vm_page
));
429 * init the vm_page structures and put them in the correct place.
432 for (lcv
= 0 ; lcv
< vm_nphysseg
; lcv
++) {
433 n
= vm_physmem
[lcv
].end
- vm_physmem
[lcv
].start
;
435 /* set up page array pointers */
436 vm_physmem
[lcv
].pgs
= pagearray
;
439 vm_physmem
[lcv
].lastpg
= vm_physmem
[lcv
].pgs
+ (n
- 1);
441 /* init and free vm_pages (we've already zeroed them) */
442 paddr
= ptoa(vm_physmem
[lcv
].start
);
443 for (i
= 0 ; i
< n
; i
++, paddr
+= PAGE_SIZE
) {
444 vm_physmem
[lcv
].pgs
[i
].phys_addr
= paddr
;
445 #ifdef __HAVE_VM_PAGE_MD
446 VM_MDPAGE_INIT(&vm_physmem
[lcv
].pgs
[i
]);
448 if (atop(paddr
) >= vm_physmem
[lcv
].avail_start
&&
449 atop(paddr
) <= vm_physmem
[lcv
].avail_end
) {
451 /* add page to free pool */
452 uvm_pagefree(&vm_physmem
[lcv
].pgs
[i
]);
458 * pass up the values of virtual_space_start and
459 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
463 *kvm_startp
= round_page(virtual_space_start
);
464 *kvm_endp
= trunc_page(virtual_space_end
);
467 * steal kva for uvm_pagezerocheck().
469 uvm_zerocheckkva
= *kvm_startp
;
470 *kvm_startp
+= PAGE_SIZE
;
474 * init various thresholds.
477 uvmexp
.reserve_pagedaemon
= 1;
478 uvmexp
.reserve_kernel
= vm_page_reserve_kernel
;
481 * determine if we should zero pages in the idle loop.
484 uvm
.cpus
[0].page_idle_zero
= vm_page_zero_enable
;
490 uvm
.page_init_done
= true;
494 * uvm_setpagesize: set the page size
496 * => sets page_shift and page_mask from uvmexp.pagesize.
500 uvm_setpagesize(void)
504 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE
505 * to be a constant (indicated by being a non-zero value).
507 if (uvmexp
.pagesize
== 0) {
509 panic("uvm_setpagesize: uvmexp.pagesize not set");
510 uvmexp
.pagesize
= PAGE_SIZE
;
512 uvmexp
.pagemask
= uvmexp
.pagesize
- 1;
513 if ((uvmexp
.pagemask
& uvmexp
.pagesize
) != 0)
514 panic("uvm_setpagesize: page size not a power of two");
515 for (uvmexp
.pageshift
= 0; ; uvmexp
.pageshift
++)
516 if ((1 << uvmexp
.pageshift
) == uvmexp
.pagesize
)
521 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
525 uvm_pageboot_alloc(vsize_t size
)
527 static bool initialized
= false;
529 #if !defined(PMAP_STEAL_MEMORY)
535 * on first call to this function, initialize ourselves.
537 if (initialized
== false) {
538 pmap_virtual_space(&virtual_space_start
, &virtual_space_end
);
540 /* round it the way we like it */
541 virtual_space_start
= round_page(virtual_space_start
);
542 virtual_space_end
= trunc_page(virtual_space_end
);
547 /* round to page size */
548 size
= round_page(size
);
550 #if defined(PMAP_STEAL_MEMORY)
553 * defer bootstrap allocation to MD code (it may want to allocate
554 * from a direct-mapped segment). pmap_steal_memory should adjust
555 * virtual_space_start/virtual_space_end if necessary.
558 addr
= pmap_steal_memory(size
, &virtual_space_start
,
563 #else /* !PMAP_STEAL_MEMORY */
566 * allocate virtual memory for this request
568 if (virtual_space_start
== virtual_space_end
||
569 (virtual_space_end
- virtual_space_start
) < size
)
570 panic("uvm_pageboot_alloc: out of virtual space");
572 addr
= virtual_space_start
;
574 #ifdef PMAP_GROWKERNEL
576 * If the kernel pmap can't map the requested space,
577 * then allocate more resources for it.
579 if (uvm_maxkaddr
< (addr
+ size
)) {
580 uvm_maxkaddr
= pmap_growkernel(addr
+ size
);
581 if (uvm_maxkaddr
< (addr
+ size
))
582 panic("uvm_pageboot_alloc: pmap_growkernel() failed");
586 virtual_space_start
+= size
;
589 * allocate and mapin physical pages to back new virtual pages
592 for (vaddr
= round_page(addr
) ; vaddr
< addr
+ size
;
593 vaddr
+= PAGE_SIZE
) {
595 if (!uvm_page_physget(&paddr
))
596 panic("uvm_pageboot_alloc: out of memory");
599 * Note this memory is no longer managed, so using
600 * pmap_kenter is safe.
602 pmap_kenter_pa(vaddr
, paddr
, VM_PROT_READ
|VM_PROT_WRITE
, 0);
604 pmap_update(pmap_kernel());
606 #endif /* PMAP_STEAL_MEMORY */
609 #if !defined(PMAP_STEAL_MEMORY)
611 * uvm_page_physget: "steal" one page from the vm_physmem structure.
613 * => attempt to allocate it off the end of a segment in which the "avail"
614 * values match the start/end values. if we can't do that, then we
615 * will advance both values (making them equal, and removing some
616 * vm_page structures from the non-avail area).
617 * => return false if out of memory.
620 /* subroutine: try to allocate from memory chunks on the specified freelist */
621 static bool uvm_page_physget_freelist(paddr_t
*, int);
624 uvm_page_physget_freelist(paddr_t
*paddrp
, int freelist
)
628 /* pass 1: try allocating from a matching end */
629 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
630 for (lcv
= vm_nphysseg
- 1 ; lcv
>= 0 ; lcv
--)
632 for (lcv
= 0 ; lcv
< vm_nphysseg
; lcv
++)
636 if (uvm
.page_init_done
== true)
637 panic("uvm_page_physget: called _after_ bootstrap");
639 if (vm_physmem
[lcv
].free_list
!= freelist
)
643 if (vm_physmem
[lcv
].avail_start
== vm_physmem
[lcv
].start
&&
644 vm_physmem
[lcv
].avail_start
< vm_physmem
[lcv
].avail_end
) {
645 *paddrp
= ptoa(vm_physmem
[lcv
].avail_start
);
646 vm_physmem
[lcv
].avail_start
++;
647 vm_physmem
[lcv
].start
++;
648 /* nothing left? nuke it */
649 if (vm_physmem
[lcv
].avail_start
==
650 vm_physmem
[lcv
].end
) {
651 if (vm_nphysseg
== 1)
652 panic("uvm_page_physget: out of memory!");
654 for (x
= lcv
; x
< vm_nphysseg
; x
++)
656 vm_physmem
[x
] = vm_physmem
[x
+1];
662 if (vm_physmem
[lcv
].avail_end
== vm_physmem
[lcv
].end
&&
663 vm_physmem
[lcv
].avail_start
< vm_physmem
[lcv
].avail_end
) {
664 *paddrp
= ptoa(vm_physmem
[lcv
].avail_end
- 1);
665 vm_physmem
[lcv
].avail_end
--;
666 vm_physmem
[lcv
].end
--;
667 /* nothing left? nuke it */
668 if (vm_physmem
[lcv
].avail_end
==
669 vm_physmem
[lcv
].start
) {
670 if (vm_nphysseg
== 1)
671 panic("uvm_page_physget: out of memory!");
673 for (x
= lcv
; x
< vm_nphysseg
; x
++)
675 vm_physmem
[x
] = vm_physmem
[x
+1];
681 /* pass2: forget about matching ends, just allocate something */
682 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
683 for (lcv
= vm_nphysseg
- 1 ; lcv
>= 0 ; lcv
--)
685 for (lcv
= 0 ; lcv
< vm_nphysseg
; lcv
++)
689 /* any room in this bank? */
690 if (vm_physmem
[lcv
].avail_start
>= vm_physmem
[lcv
].avail_end
)
693 *paddrp
= ptoa(vm_physmem
[lcv
].avail_start
);
694 vm_physmem
[lcv
].avail_start
++;
696 vm_physmem
[lcv
].start
= vm_physmem
[lcv
].avail_start
;
698 /* nothing left? nuke it */
699 if (vm_physmem
[lcv
].avail_start
== vm_physmem
[lcv
].end
) {
700 if (vm_nphysseg
== 1)
701 panic("uvm_page_physget: out of memory!");
703 for (x
= lcv
; x
< vm_nphysseg
; x
++)
705 vm_physmem
[x
] = vm_physmem
[x
+1];
710 return (false); /* whoops! */
714 uvm_page_physget(paddr_t
*paddrp
)
718 /* try in the order of freelist preference */
719 for (i
= 0; i
< VM_NFREELIST
; i
++)
720 if (uvm_page_physget_freelist(paddrp
, i
) == true)
724 #endif /* PMAP_STEAL_MEMORY */
727 * uvm_page_physload: load physical memory into VM system
729 * => all args are PFs
730 * => all pages in start/end get vm_page structures
731 * => areas marked by avail_start/avail_end get added to the free page pool
732 * => we are limited to VM_PHYSSEG_MAX physical memory segments
736 uvm_page_physload(paddr_t start
, paddr_t end
, paddr_t avail_start
,
737 paddr_t avail_end
, int free_list
)
742 struct vm_physseg
*ps
;
744 if (uvmexp
.pagesize
== 0)
745 panic("uvm_page_physload: page size not set!");
746 if (free_list
>= VM_NFREELIST
|| free_list
< VM_FREELIST_DEFAULT
)
747 panic("uvm_page_physload: bad free list %d", free_list
);
749 panic("uvm_page_physload: start >= end");
755 if (vm_nphysseg
== VM_PHYSSEG_MAX
) {
756 printf("uvm_page_physload: unable to load physical memory "
758 printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
759 VM_PHYSSEG_MAX
, (long long)start
, (long long)end
);
760 printf("\tincrease VM_PHYSSEG_MAX\n");
765 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
766 * called yet, so malloc is not available).
769 for (lcv
= 0 ; lcv
< vm_nphysseg
; lcv
++) {
770 if (vm_physmem
[lcv
].pgs
)
773 preload
= (lcv
== vm_nphysseg
);
776 * if VM is already running, attempt to malloc() vm_page structures
780 #if defined(VM_PHYSSEG_NOADD)
781 panic("uvm_page_physload: tried to add RAM after vm_mem_init");
783 /* XXXCDC: need some sort of lockout for this case */
785 npages
= end
- start
; /* # of pages */
786 pgs
= malloc(sizeof(struct vm_page
) * npages
,
789 printf("uvm_page_physload: can not malloc vm_page "
790 "structs for segment\n");
791 printf("\tignoring 0x%lx -> 0x%lx\n", start
, end
);
794 /* zero data, init phys_addr and free_list, and free pages */
795 memset(pgs
, 0, sizeof(struct vm_page
) * npages
);
796 for (lcv
= 0, paddr
= ptoa(start
) ;
797 lcv
< npages
; lcv
++, paddr
+= PAGE_SIZE
) {
798 pgs
[lcv
].phys_addr
= paddr
;
799 pgs
[lcv
].free_list
= free_list
;
800 if (atop(paddr
) >= avail_start
&&
801 atop(paddr
) <= avail_end
)
802 uvm_pagefree(&pgs
[lcv
]);
804 /* XXXCDC: incomplete: need to update uvmexp.free, what else? */
805 /* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
813 * now insert us in the proper place in vm_physmem[]
816 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
817 /* random: put it at the end (easy!) */
818 ps
= &vm_physmem
[vm_nphysseg
];
819 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
822 /* sort by address for binary search */
823 for (lcv
= 0 ; lcv
< vm_nphysseg
; lcv
++)
824 if (start
< vm_physmem
[lcv
].start
)
826 ps
= &vm_physmem
[lcv
];
827 /* move back other entries, if necessary ... */
828 for (x
= vm_nphysseg
; x
> lcv
; x
--)
830 vm_physmem
[x
] = vm_physmem
[x
- 1];
832 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
835 /* sort by largest segment first */
836 for (lcv
= 0 ; lcv
< vm_nphysseg
; lcv
++)
838 (vm_physmem
[lcv
].end
- vm_physmem
[lcv
].start
))
840 ps
= &vm_physmem
[lcv
];
841 /* move back other entries, if necessary ... */
842 for (x
= vm_nphysseg
; x
> lcv
; x
--)
844 vm_physmem
[x
] = vm_physmem
[x
- 1];
847 panic("uvm_page_physload: unknown physseg strategy selected!");
852 ps
->avail_start
= avail_start
;
853 ps
->avail_end
= avail_end
;
858 ps
->lastpg
= pgs
+ npages
- 1;
860 ps
->free_list
= free_list
;
869 * uvm_page_recolor: Recolor the pages if the new bucket count is
870 * larger than the old one.
874 uvm_page_recolor(int newncolors
)
876 struct pgflbucket
*bucketarray
, *cpuarray
, *oldbucketarray
;
877 struct pgfreelist gpgfl
, pgfl
;
880 int lcv
, color
, i
, ocolors
;
881 struct uvm_cpu
*ucpu
;
883 if (newncolors
<= uvmexp
.ncolors
)
886 if (uvm
.page_init_done
== false) {
887 uvmexp
.ncolors
= newncolors
;
891 bucketcount
= newncolors
* VM_NFREELIST
;
892 bucketarray
= malloc(bucketcount
* sizeof(struct pgflbucket
) * 2,
894 cpuarray
= bucketarray
+ bucketcount
;
895 if (bucketarray
== NULL
) {
896 printf("WARNING: unable to allocate %ld page color buckets\n",
901 mutex_spin_enter(&uvm_fpageqlock
);
903 /* Make sure we should still do this. */
904 if (newncolors
<= uvmexp
.ncolors
) {
905 mutex_spin_exit(&uvm_fpageqlock
);
906 free(bucketarray
, M_VMPAGE
);
910 oldbucketarray
= uvm
.page_free
[0].pgfl_buckets
;
911 ocolors
= uvmexp
.ncolors
;
913 uvmexp
.ncolors
= newncolors
;
914 uvmexp
.colormask
= uvmexp
.ncolors
- 1;
916 ucpu
= curcpu()->ci_data
.cpu_uvm
;
917 for (lcv
= 0; lcv
< VM_NFREELIST
; lcv
++) {
918 gpgfl
.pgfl_buckets
= (bucketarray
+ (lcv
* newncolors
));
919 pgfl
.pgfl_buckets
= (cpuarray
+ (lcv
* uvmexp
.ncolors
));
920 uvm_page_init_buckets(&gpgfl
);
921 uvm_page_init_buckets(&pgfl
);
922 for (color
= 0; color
< ocolors
; color
++) {
923 for (i
= 0; i
< PGFL_NQUEUES
; i
++) {
924 while ((pg
= LIST_FIRST(&uvm
.page_free
[
925 lcv
].pgfl_buckets
[color
].pgfl_queues
[i
]))
927 LIST_REMOVE(pg
, pageq
.list
); /* global */
928 LIST_REMOVE(pg
, listq
.list
); /* cpu */
929 LIST_INSERT_HEAD(&gpgfl
.pgfl_buckets
[
930 VM_PGCOLOR_BUCKET(pg
)].pgfl_queues
[
932 LIST_INSERT_HEAD(&pgfl
.pgfl_buckets
[
933 VM_PGCOLOR_BUCKET(pg
)].pgfl_queues
[
938 uvm
.page_free
[lcv
].pgfl_buckets
= gpgfl
.pgfl_buckets
;
939 ucpu
->page_free
[lcv
].pgfl_buckets
= pgfl
.pgfl_buckets
;
942 if (have_recolored_pages
) {
943 mutex_spin_exit(&uvm_fpageqlock
);
944 free(oldbucketarray
, M_VMPAGE
);
948 have_recolored_pages
= true;
949 mutex_spin_exit(&uvm_fpageqlock
);
953 * uvm_cpu_attach: initialize per-CPU data structures.
957 uvm_cpu_attach(struct cpu_info
*ci
)
959 struct pgflbucket
*bucketarray
;
960 struct pgfreelist pgfl
;
961 struct uvm_cpu
*ucpu
;
965 if (CPU_IS_PRIMARY(ci
)) {
966 /* Already done in uvm_page_init(). */
970 /* Add more reserve pages for this CPU. */
971 uvmexp
.reserve_kernel
+= vm_page_reserve_kernel
;
973 /* Configure this CPU's free lists. */
974 bucketcount
= uvmexp
.ncolors
* VM_NFREELIST
;
975 bucketarray
= malloc(bucketcount
* sizeof(struct pgflbucket
),
977 ucpu
= &uvm
.cpus
[cpu_index(ci
)];
978 ci
->ci_data
.cpu_uvm
= ucpu
;
979 for (lcv
= 0; lcv
< VM_NFREELIST
; lcv
++) {
980 pgfl
.pgfl_buckets
= (bucketarray
+ (lcv
* uvmexp
.ncolors
));
981 uvm_page_init_buckets(&pgfl
);
982 ucpu
->page_free
[lcv
].pgfl_buckets
= pgfl
.pgfl_buckets
;
987 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat
990 static struct vm_page
*
991 uvm_pagealloc_pgfl(struct uvm_cpu
*ucpu
, int flist
, int try1
, int try2
,
994 struct pgflist
*freeq
;
996 int color
, trycolor
= *trycolorp
;
997 struct pgfreelist
*gpgfl
, *pgfl
;
999 KASSERT(mutex_owned(&uvm_fpageqlock
));
1002 pgfl
= &ucpu
->page_free
[flist
];
1003 gpgfl
= &uvm
.page_free
[flist
];
1006 if ((pg
= LIST_FIRST((freeq
=
1007 &pgfl
->pgfl_buckets
[color
].pgfl_queues
[try1
]))) != NULL
) {
1008 VM_FREE_PAGE_TO_CPU(pg
)->pages
[try1
]--;
1013 if ((pg
= LIST_FIRST((freeq
=
1014 &gpgfl
->pgfl_buckets
[color
].pgfl_queues
[try1
]))) != NULL
) {
1015 VM_FREE_PAGE_TO_CPU(pg
)->pages
[try1
]--;
1020 if ((pg
= LIST_FIRST((freeq
=
1021 &pgfl
->pgfl_buckets
[color
].pgfl_queues
[try2
]))) != NULL
) {
1022 VM_FREE_PAGE_TO_CPU(pg
)->pages
[try2
]--;
1027 if ((pg
= LIST_FIRST((freeq
=
1028 &gpgfl
->pgfl_buckets
[color
].pgfl_queues
[try2
]))) != NULL
) {
1029 VM_FREE_PAGE_TO_CPU(pg
)->pages
[try2
]--;
1033 color
= (color
+ 1) & uvmexp
.colormask
;
1034 } while (color
!= trycolor
);
1039 LIST_REMOVE(pg
, pageq
.list
); /* global list */
1040 LIST_REMOVE(pg
, listq
.list
); /* per-cpu list */
1043 /* update zero'd page count */
1044 if (pg
->flags
& PG_ZERO
)
1047 if (color
== trycolor
)
1058 * uvm_pagealloc_strat: allocate vm_page from a particular free list.
1060 * => return null if no pages free
1061 * => wake up pagedaemon if number of free pages drops below low water mark
1062 * => if obj != NULL, obj must be locked (to put in obj's tree)
1063 * => if anon != NULL, anon must be locked (to put in anon)
1064 * => only one of obj or anon can be non-null
1065 * => caller must activate/deactivate page if it is not wired.
1066 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
1067 * => policy decision: it is more important to pull a page off of the
1068 * appropriate priority free list than it is to get a zero'd or
1069 * unknown contents page. This is because we live with the
1070 * consequences of a bad free list decision for the entire
1071 * lifetime of the page, e.g. if the page comes from memory that
1072 * is slower to access.
1076 uvm_pagealloc_strat(struct uvm_object
*obj
, voff_t off
, struct vm_anon
*anon
,
1077 int flags
, int strat
, int free_list
)
1079 int lcv
, try1
, try2
, zeroit
= 0, color
;
1080 struct uvm_cpu
*ucpu
;
1084 KASSERT(obj
== NULL
|| anon
== NULL
);
1085 KASSERT(anon
== NULL
|| off
== 0);
1086 KASSERT(off
== trunc_page(off
));
1087 KASSERT(obj
== NULL
|| mutex_owned(&obj
->vmobjlock
));
1088 KASSERT(anon
== NULL
|| mutex_owned(&anon
->an_lock
));
1090 mutex_spin_enter(&uvm_fpageqlock
);
1093 * This implements a global round-robin page coloring
1096 * XXXJRT: What about virtually-indexed caches?
1099 ucpu
= curcpu()->ci_data
.cpu_uvm
;
1100 color
= ucpu
->page_free_nextcolor
;
1103 * check to see if we need to generate some free pages waking
1110 * fail if any of these conditions is true:
1111 * [1] there really are no free pages, or
1112 * [2] only kernel "reserved" pages remain and
1113 * reserved pages have not been requested.
1114 * [3] only pagedaemon "reserved" pages remain and
1115 * the requestor isn't the pagedaemon.
1116 * we make kernel reserve pages available if called by a
1117 * kernel thread or a realtime thread.
1120 if (__predict_true(l
!= NULL
) && lwp_eprio(l
) >= PRI_KTHREAD
) {
1121 flags
|= UVM_PGA_USERESERVE
;
1123 if ((uvmexp
.free
<= uvmexp
.reserve_kernel
&&
1124 (flags
& UVM_PGA_USERESERVE
) == 0) ||
1125 (uvmexp
.free
<= uvmexp
.reserve_pagedaemon
&&
1126 curlwp
!= uvm
.pagedaemon_lwp
))
1129 #if PGFL_NQUEUES != 2
1130 #error uvm_pagealloc_strat needs to be updated
1134 * If we want a zero'd page, try the ZEROS queue first, otherwise
1135 * we try the UNKNOWN queue first.
1137 if (flags
& UVM_PGA_ZERO
) {
1139 try2
= PGFL_UNKNOWN
;
1141 try1
= PGFL_UNKNOWN
;
1147 case UVM_PGA_STRAT_NORMAL
:
1148 /* Check freelists: descending priority (ascending id) order */
1149 for (lcv
= 0; lcv
< VM_NFREELIST
; lcv
++) {
1150 pg
= uvm_pagealloc_pgfl(ucpu
, lcv
,
1151 try1
, try2
, &color
);
1156 /* No pages free! */
1159 case UVM_PGA_STRAT_ONLY
:
1160 case UVM_PGA_STRAT_FALLBACK
:
1161 /* Attempt to allocate from the specified free list. */
1162 KASSERT(free_list
>= 0 && free_list
< VM_NFREELIST
);
1163 pg
= uvm_pagealloc_pgfl(ucpu
, free_list
,
1164 try1
, try2
, &color
);
1168 /* Fall back, if possible. */
1169 if (strat
== UVM_PGA_STRAT_FALLBACK
) {
1170 strat
= UVM_PGA_STRAT_NORMAL
;
1174 /* No pages free! */
1178 panic("uvm_pagealloc_strat: bad strat %d", strat
);
1184 * We now know which color we actually allocated from; set
1185 * the next color accordingly.
1188 ucpu
->page_free_nextcolor
= (color
+ 1) & uvmexp
.colormask
;
1191 * update allocation statistics and remember if we have to
1195 if (flags
& UVM_PGA_ZERO
) {
1196 if (pg
->flags
& PG_ZERO
) {
1197 uvmexp
.pga_zerohit
++;
1200 uvmexp
.pga_zeromiss
++;
1203 if (ucpu
->pages
[PGFL_ZEROS
] < ucpu
->pages
[PGFL_UNKNOWN
]) {
1204 ucpu
->page_idle_zero
= vm_page_zero_enable
;
1207 KASSERT(pg
->pqflags
== PQ_FREE
);
1212 pg
->flags
= PG_BUSY
|PG_CLEAN
|PG_FAKE
;
1215 pg
->pqflags
= PQ_ANON
;
1216 atomic_inc_uint(&uvmexp
.anonpages
);
1223 mutex_spin_exit(&uvm_fpageqlock
);
1225 #if defined(UVM_PAGE_TRKOWN)
1226 pg
->owner_tag
= NULL
;
1228 UVM_PAGE_OWN(pg
, "new alloc");
1230 if (flags
& UVM_PGA_ZERO
) {
1232 * A zero'd page is not clean. If we got a page not already
1233 * zero'd, then we have to zero it ourselves.
1235 pg
->flags
&= ~PG_CLEAN
;
1237 pmap_zero_page(VM_PAGE_TO_PHYS(pg
));
1243 mutex_spin_exit(&uvm_fpageqlock
);
1248 * uvm_pagereplace: replace a page with another
1250 * => object must be locked
1254 uvm_pagereplace(struct vm_page
*oldpg
, struct vm_page
*newpg
)
1256 struct uvm_object
*uobj
= oldpg
->uobject
;
1258 KASSERT((oldpg
->flags
& PG_TABLED
) != 0);
1259 KASSERT(uobj
!= NULL
);
1260 KASSERT((newpg
->flags
& PG_TABLED
) == 0);
1261 KASSERT(newpg
->uobject
== NULL
);
1262 KASSERT(mutex_owned(&uobj
->vmobjlock
));
1264 newpg
->uobject
= uobj
;
1265 newpg
->offset
= oldpg
->offset
;
1267 uvm_pageremove_tree(uobj
, oldpg
);
1268 uvm_pageinsert_tree(uobj
, newpg
);
1269 uvm_pageinsert_list(uobj
, newpg
, oldpg
);
1270 uvm_pageremove_list(uobj
, oldpg
);
1274 * uvm_pagerealloc: reallocate a page from one object to another
1276 * => both objects must be locked
1280 uvm_pagerealloc(struct vm_page
*pg
, struct uvm_object
*newobj
, voff_t newoff
)
1283 * remove it from the old object
1291 * put it in the new object
1295 pg
->uobject
= newobj
;
1296 pg
->offset
= newoff
;
1303 * check if page is zero-filled
1305 * - called with free page queue lock held.
1308 uvm_pagezerocheck(struct vm_page
*pg
)
1312 KASSERT(uvm_zerocheckkva
!= 0);
1313 KASSERT(mutex_owned(&uvm_fpageqlock
));
1316 * XXX assuming pmap_kenter_pa and pmap_kremove never call
1317 * uvm page allocator.
1319 * it might be better to have "CPU-local temporary map" pmap interface.
1321 pmap_kenter_pa(uvm_zerocheckkva
, VM_PAGE_TO_PHYS(pg
), VM_PROT_READ
, 0);
1322 p
= (int *)uvm_zerocheckkva
;
1323 ep
= (int *)((char *)p
+ PAGE_SIZE
);
1324 pmap_update(pmap_kernel());
1327 panic("PG_ZERO page isn't zero-filled");
1330 pmap_kremove(uvm_zerocheckkva
, PAGE_SIZE
);
1332 * pmap_update() is not necessary here because no one except us
1339 * uvm_pagefree: free page
1341 * => erase page's identity (i.e. remove from object)
1342 * => put page on free list
1343 * => caller must lock owning object (either anon or uvm_object)
1344 * => caller must lock page queues
1345 * => assumes all valid mappings of pg are gone
1349 uvm_pagefree(struct vm_page
*pg
)
1351 struct pgflist
*pgfl
;
1352 struct uvm_cpu
*ucpu
;
1353 int index
, color
, queue
;
1357 if (pg
->uobject
== (void *)0xdeadbeef &&
1358 pg
->uanon
== (void *)0xdeadbeef) {
1359 panic("uvm_pagefree: freeing free page %p", pg
);
1363 KASSERT((pg
->flags
& PG_PAGEOUT
) == 0);
1364 KASSERT(!(pg
->pqflags
& PQ_FREE
));
1365 KASSERT(mutex_owned(&uvm_pageqlock
) || !uvmpdpol_pageisqueued_p(pg
));
1366 KASSERT(pg
->uobject
== NULL
|| mutex_owned(&pg
->uobject
->vmobjlock
));
1367 KASSERT(pg
->uobject
!= NULL
|| pg
->uanon
== NULL
||
1368 mutex_owned(&pg
->uanon
->an_lock
));
1371 * if the page is loaned, resolve the loan instead of freeing.
1374 if (pg
->loan_count
) {
1375 KASSERT(pg
->wire_count
== 0);
1378 * if the page is owned by an anon then we just want to
1379 * drop anon ownership. the kernel will free the page when
1380 * it is done with it. if the page is owned by an object,
1381 * remove it from the object and mark it dirty for the benefit
1382 * of possible anon owners.
1384 * regardless of previous ownership, wakeup any waiters,
1385 * unbusy the page, and we're done.
1388 if (pg
->uobject
!= NULL
) {
1390 pg
->flags
&= ~PG_CLEAN
;
1391 } else if (pg
->uanon
!= NULL
) {
1392 if ((pg
->pqflags
& PQ_ANON
) == 0) {
1395 pg
->pqflags
&= ~PQ_ANON
;
1396 atomic_dec_uint(&uvmexp
.anonpages
);
1398 pg
->uanon
->an_page
= NULL
;
1401 if (pg
->flags
& PG_WANTED
) {
1404 pg
->flags
&= ~(PG_WANTED
|PG_BUSY
|PG_RELEASED
|PG_PAGER1
);
1405 #ifdef UVM_PAGE_TRKOWN
1406 pg
->owner_tag
= NULL
;
1408 if (pg
->loan_count
) {
1409 KASSERT(pg
->uobject
== NULL
);
1410 if (pg
->uanon
== NULL
) {
1411 uvm_pagedequeue(pg
);
1418 * remove page from its object or anon.
1421 if (pg
->uobject
!= NULL
) {
1423 } else if (pg
->uanon
!= NULL
) {
1424 pg
->uanon
->an_page
= NULL
;
1425 atomic_dec_uint(&uvmexp
.anonpages
);
1429 * now remove the page from the queues.
1432 uvm_pagedequeue(pg
);
1435 * if the page was wired, unwire it now.
1438 if (pg
->wire_count
) {
1444 * and put on free queue
1447 iszero
= (pg
->flags
& PG_ZERO
);
1448 index
= uvm_page_lookup_freelist(pg
);
1449 color
= VM_PGCOLOR_BUCKET(pg
);
1450 queue
= (iszero
? PGFL_ZEROS
: PGFL_UNKNOWN
);
1453 pg
->uobject
= (void *)0xdeadbeef;
1454 pg
->uanon
= (void *)0xdeadbeef;
1457 mutex_spin_enter(&uvm_fpageqlock
);
1458 pg
->pqflags
= PQ_FREE
;
1462 uvm_pagezerocheck(pg
);
1467 pgfl
= &uvm
.page_free
[index
].pgfl_buckets
[color
].pgfl_queues
[queue
];
1468 LIST_INSERT_HEAD(pgfl
, pg
, pageq
.list
);
1475 ucpu
= curcpu()->ci_data
.cpu_uvm
;
1476 pg
->offset
= (uintptr_t)ucpu
;
1477 pgfl
= &ucpu
->page_free
[index
].pgfl_buckets
[color
].pgfl_queues
[queue
];
1478 LIST_INSERT_HEAD(pgfl
, pg
, listq
.list
);
1479 ucpu
->pages
[queue
]++;
1480 if (ucpu
->pages
[PGFL_ZEROS
] < ucpu
->pages
[PGFL_UNKNOWN
]) {
1481 ucpu
->page_idle_zero
= vm_page_zero_enable
;
1484 mutex_spin_exit(&uvm_fpageqlock
);
1488 * uvm_page_unbusy: unbusy an array of pages.
1490 * => pages must either all belong to the same object, or all belong to anons.
1491 * => if pages are object-owned, object must be locked.
1492 * => if pages are anon-owned, anons must be locked.
1493 * => caller must lock page queues if pages may be released.
1494 * => caller must make sure that anon-owned pages are not PG_RELEASED.
1498 uvm_page_unbusy(struct vm_page
**pgs
, int npgs
)
1502 UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist
);
1504 for (i
= 0; i
< npgs
; i
++) {
1506 if (pg
== NULL
|| pg
== PGO_DONTCARE
) {
1510 KASSERT(pg
->uobject
== NULL
||
1511 mutex_owned(&pg
->uobject
->vmobjlock
));
1512 KASSERT(pg
->uobject
!= NULL
||
1513 (pg
->uanon
!= NULL
&& mutex_owned(&pg
->uanon
->an_lock
)));
1515 KASSERT(pg
->flags
& PG_BUSY
);
1516 KASSERT((pg
->flags
& PG_PAGEOUT
) == 0);
1517 if (pg
->flags
& PG_WANTED
) {
1520 if (pg
->flags
& PG_RELEASED
) {
1521 UVMHIST_LOG(ubchist
, "releasing pg %p", pg
,0,0,0);
1522 KASSERT(pg
->uobject
!= NULL
||
1523 (pg
->uanon
!= NULL
&& pg
->uanon
->an_ref
> 0));
1524 pg
->flags
&= ~PG_RELEASED
;
1527 UVMHIST_LOG(ubchist
, "unbusying pg %p", pg
,0,0,0);
1528 KASSERT((pg
->flags
& PG_FAKE
) == 0);
1529 pg
->flags
&= ~(PG_WANTED
|PG_BUSY
);
1530 UVM_PAGE_OWN(pg
, NULL
);
1535 #if defined(UVM_PAGE_TRKOWN)
1537 * uvm_page_own: set or release page ownership
1539 * => this is a debugging function that keeps track of who sets PG_BUSY
1540 * and where they do it. it can be used to track down problems
1541 * such a process setting "PG_BUSY" and never releasing it.
1542 * => page's object [if any] must be locked
1543 * => if "tag" is NULL then we are releasing page ownership
1546 uvm_page_own(struct vm_page
*pg
, const char *tag
)
1548 struct uvm_object
*uobj
;
1549 struct vm_anon
*anon
;
1551 KASSERT((pg
->flags
& (PG_PAGEOUT
|PG_RELEASED
)) == 0);
1556 KASSERT(mutex_owned(&uobj
->vmobjlock
));
1557 } else if (anon
!= NULL
) {
1558 KASSERT(mutex_owned(&anon
->an_lock
));
1561 KASSERT((pg
->flags
& PG_WANTED
) == 0);
1563 /* gain ownership? */
1565 KASSERT((pg
->flags
& PG_BUSY
) != 0);
1566 if (pg
->owner_tag
) {
1567 printf("uvm_page_own: page %p already owned "
1568 "by proc %d [%s]\n", pg
,
1569 pg
->owner
, pg
->owner_tag
);
1570 panic("uvm_page_own");
1572 pg
->owner
= (curproc
) ? curproc
->p_pid
: (pid_t
) -1;
1573 pg
->lowner
= (curlwp
) ? curlwp
->l_lid
: (lwpid_t
) -1;
1574 pg
->owner_tag
= tag
;
1578 /* drop ownership */
1579 KASSERT((pg
->flags
& PG_BUSY
) == 0);
1580 if (pg
->owner_tag
== NULL
) {
1581 printf("uvm_page_own: dropping ownership of an non-owned "
1583 panic("uvm_page_own");
1585 if (!uvmpdpol_pageisqueued_p(pg
)) {
1586 KASSERT((pg
->uanon
== NULL
&& pg
->uobject
== NULL
) ||
1587 pg
->wire_count
> 0);
1589 KASSERT(pg
->wire_count
== 0);
1591 pg
->owner_tag
= NULL
;
1596 * uvm_pageidlezero: zero free pages while the system is idle.
1598 * => try to complete one color bucket at a time, to reduce our impact
1600 * => we loop until we either reach the target or there is a lwp ready
1601 * to run, or MD code detects a reason to break early.
1604 uvm_pageidlezero(void)
1607 struct pgfreelist
*pgfl
, *gpgfl
;
1608 struct uvm_cpu
*ucpu
;
1609 int free_list
, firstbucket
, nextbucket
;
1611 ucpu
= curcpu()->ci_data
.cpu_uvm
;
1612 if (!ucpu
->page_idle_zero
||
1613 ucpu
->pages
[PGFL_UNKNOWN
] < uvmexp
.ncolors
) {
1614 ucpu
->page_idle_zero
= false;
1617 mutex_enter(&uvm_fpageqlock
);
1618 firstbucket
= ucpu
->page_free_nextcolor
;
1619 nextbucket
= firstbucket
;
1621 for (free_list
= 0; free_list
< VM_NFREELIST
; free_list
++) {
1622 if (sched_curcpu_runnable_p()) {
1625 pgfl
= &ucpu
->page_free
[free_list
];
1626 gpgfl
= &uvm
.page_free
[free_list
];
1627 while ((pg
= LIST_FIRST(&pgfl
->pgfl_buckets
[
1628 nextbucket
].pgfl_queues
[PGFL_UNKNOWN
])) != NULL
) {
1629 if (sched_curcpu_runnable_p()) {
1632 LIST_REMOVE(pg
, pageq
.list
); /* global list */
1633 LIST_REMOVE(pg
, listq
.list
); /* per-cpu list */
1634 ucpu
->pages
[PGFL_UNKNOWN
]--;
1636 KASSERT(pg
->pqflags
== PQ_FREE
);
1638 mutex_spin_exit(&uvm_fpageqlock
);
1639 #ifdef PMAP_PAGEIDLEZERO
1640 if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg
))) {
1643 * The machine-dependent code detected
1644 * some reason for us to abort zeroing
1645 * pages, probably because there is a
1646 * process now ready to run.
1649 mutex_spin_enter(&uvm_fpageqlock
);
1650 pg
->pqflags
= PQ_FREE
;
1651 LIST_INSERT_HEAD(&gpgfl
->pgfl_buckets
[
1652 nextbucket
].pgfl_queues
[
1653 PGFL_UNKNOWN
], pg
, pageq
.list
);
1654 LIST_INSERT_HEAD(&pgfl
->pgfl_buckets
[
1655 nextbucket
].pgfl_queues
[
1656 PGFL_UNKNOWN
], pg
, listq
.list
);
1657 ucpu
->pages
[PGFL_UNKNOWN
]++;
1659 uvmexp
.zeroaborts
++;
1663 pmap_zero_page(VM_PAGE_TO_PHYS(pg
));
1664 #endif /* PMAP_PAGEIDLEZERO */
1665 pg
->flags
|= PG_ZERO
;
1667 mutex_spin_enter(&uvm_fpageqlock
);
1668 pg
->pqflags
= PQ_FREE
;
1669 LIST_INSERT_HEAD(&gpgfl
->pgfl_buckets
[
1670 nextbucket
].pgfl_queues
[PGFL_ZEROS
],
1672 LIST_INSERT_HEAD(&pgfl
->pgfl_buckets
[
1673 nextbucket
].pgfl_queues
[PGFL_ZEROS
],
1675 ucpu
->pages
[PGFL_ZEROS
]++;
1680 if (ucpu
->pages
[PGFL_UNKNOWN
] < uvmexp
.ncolors
) {
1683 nextbucket
= (nextbucket
+ 1) & uvmexp
.colormask
;
1684 } while (nextbucket
!= firstbucket
);
1685 ucpu
->page_idle_zero
= false;
1687 mutex_spin_exit(&uvm_fpageqlock
);
1691 * uvm_pagelookup: look up a page
1693 * => caller should lock object to keep someone from pulling the page
1698 uvm_pagelookup(struct uvm_object
*obj
, voff_t off
)
1702 KASSERT(mutex_owned(&obj
->vmobjlock
));
1704 pg
= (struct vm_page
*)rb_tree_find_node(&obj
->rb_tree
, &off
);
1706 KASSERT(pg
== NULL
|| obj
->uo_npages
!= 0);
1707 KASSERT(pg
== NULL
|| (pg
->flags
& (PG_RELEASED
|PG_PAGEOUT
)) == 0 ||
1708 (pg
->flags
& PG_BUSY
) != 0);
1713 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
1715 * => caller must lock page queues
1719 uvm_pagewire(struct vm_page
*pg
)
1721 KASSERT(mutex_owned(&uvm_pageqlock
));
1722 #if defined(READAHEAD_STATS)
1723 if ((pg
->pqflags
& PQ_READAHEAD
) != 0) {
1724 uvm_ra_hit
.ev_count
++;
1725 pg
->pqflags
&= ~PQ_READAHEAD
;
1727 #endif /* defined(READAHEAD_STATS) */
1728 if (pg
->wire_count
== 0) {
1729 uvm_pagedequeue(pg
);
1736 * uvm_pageunwire: unwire the page.
1738 * => activate if wire count goes to zero.
1739 * => caller must lock page queues
1743 uvm_pageunwire(struct vm_page
*pg
)
1745 KASSERT(mutex_owned(&uvm_pageqlock
));
1747 if (pg
->wire_count
== 0) {
1748 uvm_pageactivate(pg
);
1754 * uvm_pagedeactivate: deactivate page
1756 * => caller must lock page queues
1757 * => caller must check to make sure page is not wired
1758 * => object that page belongs to must be locked (so we can adjust pg->flags)
1759 * => caller must clear the reference on the page before calling
1763 uvm_pagedeactivate(struct vm_page
*pg
)
1766 KASSERT(mutex_owned(&uvm_pageqlock
));
1767 KASSERT(pg
->wire_count
!= 0 || uvmpdpol_pageisqueued_p(pg
));
1768 uvmpdpol_pagedeactivate(pg
);
1772 * uvm_pageactivate: activate page
1774 * => caller must lock page queues
1778 uvm_pageactivate(struct vm_page
*pg
)
1781 KASSERT(mutex_owned(&uvm_pageqlock
));
1782 #if defined(READAHEAD_STATS)
1783 if ((pg
->pqflags
& PQ_READAHEAD
) != 0) {
1784 uvm_ra_hit
.ev_count
++;
1785 pg
->pqflags
&= ~PQ_READAHEAD
;
1787 #endif /* defined(READAHEAD_STATS) */
1788 if (pg
->wire_count
!= 0) {
1791 uvmpdpol_pageactivate(pg
);
1795 * uvm_pagedequeue: remove a page from any paging queue
1799 uvm_pagedequeue(struct vm_page
*pg
)
1802 if (uvmpdpol_pageisqueued_p(pg
)) {
1803 KASSERT(mutex_owned(&uvm_pageqlock
));
1806 uvmpdpol_pagedequeue(pg
);
1810 * uvm_pageenqueue: add a page to a paging queue without activating.
1811 * used where a page is not really demanded (yet). eg. read-ahead
1815 uvm_pageenqueue(struct vm_page
*pg
)
1818 KASSERT(mutex_owned(&uvm_pageqlock
));
1819 if (pg
->wire_count
!= 0) {
1822 uvmpdpol_pageenqueue(pg
);
1826 * uvm_pagezero: zero fill a page
1828 * => if page is part of an object then the object should be locked
1829 * to protect pg->flags.
1833 uvm_pagezero(struct vm_page
*pg
)
1835 pg
->flags
&= ~PG_CLEAN
;
1836 pmap_zero_page(VM_PAGE_TO_PHYS(pg
));
1840 * uvm_pagecopy: copy a page
1842 * => if page is part of an object then the object should be locked
1843 * to protect pg->flags.
1847 uvm_pagecopy(struct vm_page
*src
, struct vm_page
*dst
)
1850 dst
->flags
&= ~PG_CLEAN
;
1851 pmap_copy_page(VM_PAGE_TO_PHYS(src
), VM_PAGE_TO_PHYS(dst
));
1855 * uvm_pageismanaged: test it see that a page (specified by PA) is managed.
1859 uvm_pageismanaged(paddr_t pa
)
1862 return (vm_physseg_find(atop(pa
), NULL
) != -1);
1866 * uvm_page_lookup_freelist: look up the free list for the specified page
1870 uvm_page_lookup_freelist(struct vm_page
*pg
)
1874 lcv
= vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg
)), NULL
);
1876 return (vm_physmem
[lcv
].free_list
);
1879 #if defined(DDB) || defined(DEBUGPRINT)
1882 * uvm_page_printit: actually print the page
1885 static const char page_flagbits
[] = UVM_PGFLAGBITS
;
1886 static const char page_pqflagbits
[] = UVM_PQFLAGBITS
;
1889 uvm_page_printit(struct vm_page
*pg
, bool full
,
1890 void (*pr
)(const char *, ...))
1892 struct vm_page
*tpg
;
1893 struct uvm_object
*uobj
;
1894 struct pgflist
*pgl
;
1898 (*pr
)("PAGE %p:\n", pg
);
1899 snprintb(pgbuf
, sizeof(pgbuf
), page_flagbits
, pg
->flags
);
1900 snprintb(pqbuf
, sizeof(pqbuf
), page_pqflagbits
, pg
->pqflags
);
1901 (*pr
)(" flags=%s, pqflags=%s, wire_count=%d, pa=0x%lx\n",
1902 pgbuf
, pqbuf
, pg
->wire_count
, (long)VM_PAGE_TO_PHYS(pg
));
1903 (*pr
)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n",
1904 pg
->uobject
, pg
->uanon
, (long long)pg
->offset
, pg
->loan_count
);
1905 #if defined(UVM_PAGE_TRKOWN)
1906 if (pg
->flags
& PG_BUSY
)
1907 (*pr
)(" owning process = %d, tag=%s\n",
1908 pg
->owner
, pg
->owner_tag
);
1910 (*pr
)(" page not busy, no owner\n");
1912 (*pr
)(" [page ownership tracking disabled]\n");
1918 /* cross-verify object/anon */
1919 if ((pg
->pqflags
& PQ_FREE
) == 0) {
1920 if (pg
->pqflags
& PQ_ANON
) {
1921 if (pg
->uanon
== NULL
|| pg
->uanon
->an_page
!= pg
)
1922 (*pr
)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n",
1923 (pg
->uanon
) ? pg
->uanon
->an_page
: NULL
);
1925 (*pr
)(" anon backpointer is OK\n");
1929 (*pr
)(" checking object list\n");
1930 TAILQ_FOREACH(tpg
, &uobj
->memq
, listq
.queue
) {
1936 (*pr
)(" page found on object list\n");
1938 (*pr
)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n");
1943 /* cross-verify page queue */
1944 if (pg
->pqflags
& PQ_FREE
) {
1945 int fl
= uvm_page_lookup_freelist(pg
);
1946 int color
= VM_PGCOLOR_BUCKET(pg
);
1947 pgl
= &uvm
.page_free
[fl
].pgfl_buckets
[color
].pgfl_queues
[
1948 ((pg
)->flags
& PG_ZERO
) ? PGFL_ZEROS
: PGFL_UNKNOWN
];
1954 (*pr
)(" checking pageq list\n");
1955 LIST_FOREACH(tpg
, pgl
, pageq
.list
) {
1961 (*pr
)(" page found on pageq list\n");
1963 (*pr
)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
1968 * uvm_pages_printthem - print a summary of all managed pages
1972 uvm_page_printall(void (*pr
)(const char *, ...))
1977 (*pr
)("%18s %4s %4s %18s %18s"
1978 #ifdef UVM_PAGE_TRKOWN
1981 "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON");
1982 for (i
= 0; i
< vm_nphysseg
; i
++) {
1983 for (pg
= vm_physmem
[i
].pgs
; pg
<= vm_physmem
[i
].lastpg
; pg
++) {
1984 (*pr
)("%18p %04x %04x %18p %18p",
1985 pg
, pg
->flags
, pg
->pqflags
, pg
->uobject
,
1987 #ifdef UVM_PAGE_TRKOWN
1988 if (pg
->flags
& PG_BUSY
)
1989 (*pr
)(" %d [%s]", pg
->owner
, pg
->owner_tag
);
1996 #endif /* DDB || DEBUGPRINT */