Sync usage with man page.
[netbsd-mini2440.git] / sys / uvm / uvm_pdaemon.c
blobe89632c13b6c32e9d3094ec63ef070f9f5882005
1 /* $NetBSD: uvm_pdaemon.c,v 1.99 2009/08/18 02:43:49 yamt Exp $ */
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
7 * All rights reserved.
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Charles D. Cranor,
23 * Washington University, the University of California, Berkeley and
24 * its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
41 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
42 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
45 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
46 * All rights reserved.
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
58 * Carnegie Mellon requests users of this software to return to
60 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
61 * School of Computer Science
62 * Carnegie Mellon University
63 * Pittsburgh PA 15213-3890
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
70 * uvm_pdaemon.c: the page daemon
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.99 2009/08/18 02:43:49 yamt Exp $");
76 #include "opt_uvmhist.h"
77 #include "opt_readahead.h"
79 #include <sys/param.h>
80 #include <sys/proc.h>
81 #include <sys/systm.h>
82 #include <sys/kernel.h>
83 #include <sys/pool.h>
84 #include <sys/buf.h>
85 #include <sys/module.h>
86 #include <sys/atomic.h>
88 #include <uvm/uvm.h>
89 #include <uvm/uvm_pdpolicy.h>
92 * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
93 * in a pass thru the inactive list when swap is full. the value should be
94 * "small"... if it's too large we'll cycle the active pages thru the inactive
95 * queue too quickly to for them to be referenced and avoid being freed.
98 #define UVMPD_NUMDIRTYREACTS 16
100 #define UVMPD_NUMTRYLOCKOWNER 16
103 * local prototypes
106 static void uvmpd_scan(void);
107 static void uvmpd_scan_queue(void);
108 static void uvmpd_tune(void);
110 unsigned int uvm_pagedaemon_waiters;
113 * XXX hack to avoid hangs when large processes fork.
115 u_int uvm_extrapages;
117 static kmutex_t uvm_reclaim_lock;
119 SLIST_HEAD(uvm_reclaim_hooks, uvm_reclaim_hook) uvm_reclaim_list;
122 * uvm_wait: wait (sleep) for the page daemon to free some pages
124 * => should be called with all locks released
125 * => should _not_ be called by the page daemon (to avoid deadlock)
128 void
129 uvm_wait(const char *wmsg)
131 int timo = 0;
133 mutex_spin_enter(&uvm_fpageqlock);
136 * check for page daemon going to sleep (waiting for itself)
139 if (curlwp == uvm.pagedaemon_lwp && uvmexp.paging == 0) {
141 * now we have a problem: the pagedaemon wants to go to
142 * sleep until it frees more memory. but how can it
143 * free more memory if it is asleep? that is a deadlock.
144 * we have two options:
145 * [1] panic now
146 * [2] put a timeout on the sleep, thus causing the
147 * pagedaemon to only pause (rather than sleep forever)
149 * note that option [2] will only help us if we get lucky
150 * and some other process on the system breaks the deadlock
151 * by exiting or freeing memory (thus allowing the pagedaemon
152 * to continue). for now we panic if DEBUG is defined,
153 * otherwise we hope for the best with option [2] (better
154 * yet, this should never happen in the first place!).
157 printf("pagedaemon: deadlock detected!\n");
158 timo = hz >> 3; /* set timeout */
159 #if defined(DEBUG)
160 /* DEBUG: panic so we can debug it */
161 panic("pagedaemon deadlock");
162 #endif
165 uvm_pagedaemon_waiters++;
166 wakeup(&uvm.pagedaemon); /* wake the daemon! */
167 UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm_fpageqlock, false, wmsg, timo);
171 * uvm_kick_pdaemon: perform checks to determine if we need to
172 * give the pagedaemon a nudge, and do so if necessary.
174 * => called with uvm_fpageqlock held.
177 void
178 uvm_kick_pdaemon(void)
181 KASSERT(mutex_owned(&uvm_fpageqlock));
183 if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
184 (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
185 uvmpdpol_needsscan_p())) {
186 wakeup(&uvm.pagedaemon);
191 * uvmpd_tune: tune paging parameters
193 * => called when ever memory is added (or removed?) to the system
194 * => caller must call with page queues locked
197 static void
198 uvmpd_tune(void)
200 int val;
202 UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
205 * try to keep 0.5% of available RAM free, but limit to between
206 * 128k and 1024k per-CPU. XXX: what are these values good for?
208 val = uvmexp.npages / 200;
209 val = MAX(val, (128*1024) >> PAGE_SHIFT);
210 val = MIN(val, (1024*1024) >> PAGE_SHIFT);
211 val *= ncpu;
213 /* Make sure there's always a user page free. */
214 if (val < uvmexp.reserve_kernel + 1)
215 val = uvmexp.reserve_kernel + 1;
216 uvmexp.freemin = val;
218 /* Calculate free target. */
219 val = (uvmexp.freemin * 4) / 3;
220 if (val <= uvmexp.freemin)
221 val = uvmexp.freemin + 1;
222 uvmexp.freetarg = val + atomic_swap_uint(&uvm_extrapages, 0);
224 uvmexp.wiredmax = uvmexp.npages / 3;
225 UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d",
226 uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
230 * uvm_pageout: the main loop for the pagedaemon
233 void
234 uvm_pageout(void *arg)
236 int bufcnt, npages = 0;
237 int extrapages = 0;
238 struct pool *pp;
239 uint64_t where;
240 struct uvm_reclaim_hook *hook;
242 UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
244 UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
247 * ensure correct priority and set paging parameters...
250 uvm.pagedaemon_lwp = curlwp;
251 mutex_enter(&uvm_pageqlock);
252 npages = uvmexp.npages;
253 uvmpd_tune();
254 mutex_exit(&uvm_pageqlock);
257 * main loop
260 for (;;) {
261 bool needsscan, needsfree;
263 mutex_spin_enter(&uvm_fpageqlock);
264 if (uvm_pagedaemon_waiters == 0 || uvmexp.paging > 0) {
265 UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0);
266 UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
267 &uvm_fpageqlock, false, "pgdaemon", 0);
268 uvmexp.pdwoke++;
269 UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
270 } else {
271 mutex_spin_exit(&uvm_fpageqlock);
275 * now lock page queues and recompute inactive count
278 mutex_enter(&uvm_pageqlock);
279 if (npages != uvmexp.npages || extrapages != uvm_extrapages) {
280 npages = uvmexp.npages;
281 extrapages = uvm_extrapages;
282 mutex_spin_enter(&uvm_fpageqlock);
283 uvmpd_tune();
284 mutex_spin_exit(&uvm_fpageqlock);
287 uvmpdpol_tune();
290 * Estimate a hint. Note that bufmem are returned to
291 * system only when entire pool page is empty.
293 mutex_spin_enter(&uvm_fpageqlock);
294 bufcnt = uvmexp.freetarg - uvmexp.free;
295 if (bufcnt < 0)
296 bufcnt = 0;
298 UVMHIST_LOG(pdhist," free/ftarg=%d/%d",
299 uvmexp.free, uvmexp.freetarg, 0,0);
301 needsfree = uvmexp.free + uvmexp.paging < uvmexp.freetarg;
302 needsscan = needsfree || uvmpdpol_needsscan_p();
305 * scan if needed
307 if (needsscan) {
308 mutex_spin_exit(&uvm_fpageqlock);
309 uvmpd_scan();
310 mutex_spin_enter(&uvm_fpageqlock);
314 * if there's any free memory to be had,
315 * wake up any waiters.
317 if (uvmexp.free > uvmexp.reserve_kernel ||
318 uvmexp.paging == 0) {
319 wakeup(&uvmexp.free);
320 uvm_pagedaemon_waiters = 0;
322 mutex_spin_exit(&uvm_fpageqlock);
325 * scan done. unlock page queues (the only lock we are holding)
327 mutex_exit(&uvm_pageqlock);
330 * if we don't need free memory, we're done.
333 if (!needsfree)
334 continue;
337 * start draining pool resources now that we're not
338 * holding any locks.
340 pool_drain_start(&pp, &where);
343 * kill unused metadata buffers.
345 mutex_enter(&bufcache_lock);
346 buf_drain(bufcnt << PAGE_SHIFT);
347 mutex_exit(&bufcache_lock);
349 mutex_enter(&uvm_reclaim_lock);
350 SLIST_FOREACH(hook, &uvm_reclaim_list, uvm_reclaim_next) {
351 (*hook->uvm_reclaim_hook)();
353 mutex_exit(&uvm_reclaim_lock);
356 * complete draining the pools.
358 pool_drain_end(pp, where);
360 /*NOTREACHED*/
365 * uvm_aiodone_worker: a workqueue callback for the aiodone daemon.
368 void
369 uvm_aiodone_worker(struct work *wk, void *dummy)
371 struct buf *bp = (void *)wk;
373 KASSERT(&bp->b_work == wk);
376 * process an i/o that's done.
379 (*bp->b_iodone)(bp);
382 void
383 uvm_pageout_start(int npages)
386 mutex_spin_enter(&uvm_fpageqlock);
387 uvmexp.paging += npages;
388 mutex_spin_exit(&uvm_fpageqlock);
391 void
392 uvm_pageout_done(int npages)
395 mutex_spin_enter(&uvm_fpageqlock);
396 KASSERT(uvmexp.paging >= npages);
397 uvmexp.paging -= npages;
400 * wake up either of pagedaemon or LWPs waiting for it.
403 if (uvmexp.free <= uvmexp.reserve_kernel) {
404 wakeup(&uvm.pagedaemon);
405 } else {
406 wakeup(&uvmexp.free);
407 uvm_pagedaemon_waiters = 0;
409 mutex_spin_exit(&uvm_fpageqlock);
413 * uvmpd_trylockowner: trylock the page's owner.
415 * => called with pageq locked.
416 * => resolve orphaned O->A loaned page.
417 * => return the locked mutex on success. otherwise, return NULL.
420 kmutex_t *
421 uvmpd_trylockowner(struct vm_page *pg)
423 struct uvm_object *uobj = pg->uobject;
424 kmutex_t *slock;
426 KASSERT(mutex_owned(&uvm_pageqlock));
428 if (uobj != NULL) {
429 slock = &uobj->vmobjlock;
430 } else {
431 struct vm_anon *anon = pg->uanon;
433 KASSERT(anon != NULL);
434 slock = &anon->an_lock;
437 if (!mutex_tryenter(slock)) {
438 return NULL;
441 if (uobj == NULL) {
444 * set PQ_ANON if it isn't set already.
447 if ((pg->pqflags & PQ_ANON) == 0) {
448 KASSERT(pg->loan_count > 0);
449 pg->loan_count--;
450 pg->pqflags |= PQ_ANON;
451 /* anon now owns it */
455 return slock;
458 #if defined(VMSWAP)
459 struct swapcluster {
460 int swc_slot;
461 int swc_nallocated;
462 int swc_nused;
463 struct vm_page *swc_pages[howmany(MAXPHYS, MIN_PAGE_SIZE)];
466 static void
467 swapcluster_init(struct swapcluster *swc)
470 swc->swc_slot = 0;
471 swc->swc_nused = 0;
474 static int
475 swapcluster_allocslots(struct swapcluster *swc)
477 int slot;
478 int npages;
480 if (swc->swc_slot != 0) {
481 return 0;
484 /* Even with strange MAXPHYS, the shift
485 implicitly rounds down to a page. */
486 npages = MAXPHYS >> PAGE_SHIFT;
487 slot = uvm_swap_alloc(&npages, true);
488 if (slot == 0) {
489 return ENOMEM;
491 swc->swc_slot = slot;
492 swc->swc_nallocated = npages;
493 swc->swc_nused = 0;
495 return 0;
498 static int
499 swapcluster_add(struct swapcluster *swc, struct vm_page *pg)
501 int slot;
502 struct uvm_object *uobj;
504 KASSERT(swc->swc_slot != 0);
505 KASSERT(swc->swc_nused < swc->swc_nallocated);
506 KASSERT((pg->pqflags & PQ_SWAPBACKED) != 0);
508 slot = swc->swc_slot + swc->swc_nused;
509 uobj = pg->uobject;
510 if (uobj == NULL) {
511 KASSERT(mutex_owned(&pg->uanon->an_lock));
512 pg->uanon->an_swslot = slot;
513 } else {
514 int result;
516 KASSERT(mutex_owned(&uobj->vmobjlock));
517 result = uao_set_swslot(uobj, pg->offset >> PAGE_SHIFT, slot);
518 if (result == -1) {
519 return ENOMEM;
522 swc->swc_pages[swc->swc_nused] = pg;
523 swc->swc_nused++;
525 return 0;
528 static void
529 swapcluster_flush(struct swapcluster *swc, bool now)
531 int slot;
532 int nused;
533 int nallocated;
534 int error;
536 if (swc->swc_slot == 0) {
537 return;
539 KASSERT(swc->swc_nused <= swc->swc_nallocated);
541 slot = swc->swc_slot;
542 nused = swc->swc_nused;
543 nallocated = swc->swc_nallocated;
546 * if this is the final pageout we could have a few
547 * unused swap blocks. if so, free them now.
550 if (nused < nallocated) {
551 if (!now) {
552 return;
554 uvm_swap_free(slot + nused, nallocated - nused);
558 * now start the pageout.
561 if (nused > 0) {
562 uvmexp.pdpageouts++;
563 uvm_pageout_start(nused);
564 error = uvm_swap_put(slot, swc->swc_pages, nused, 0);
565 KASSERT(error == 0 || error == ENOMEM);
569 * zero swslot to indicate that we are
570 * no longer building a swap-backed cluster.
573 swc->swc_slot = 0;
574 swc->swc_nused = 0;
577 static int
578 swapcluster_nused(struct swapcluster *swc)
581 return swc->swc_nused;
585 * uvmpd_dropswap: free any swap allocated to this page.
587 * => called with owner locked.
588 * => return true if a page had an associated slot.
591 static bool
592 uvmpd_dropswap(struct vm_page *pg)
594 bool result = false;
595 struct vm_anon *anon = pg->uanon;
597 if ((pg->pqflags & PQ_ANON) && anon->an_swslot) {
598 uvm_swap_free(anon->an_swslot, 1);
599 anon->an_swslot = 0;
600 pg->flags &= ~PG_CLEAN;
601 result = true;
602 } else if (pg->pqflags & PQ_AOBJ) {
603 int slot = uao_set_swslot(pg->uobject,
604 pg->offset >> PAGE_SHIFT, 0);
605 if (slot) {
606 uvm_swap_free(slot, 1);
607 pg->flags &= ~PG_CLEAN;
608 result = true;
612 return result;
616 * uvmpd_trydropswap: try to free any swap allocated to this page.
618 * => return true if a slot is successfully freed.
621 bool
622 uvmpd_trydropswap(struct vm_page *pg)
624 kmutex_t *slock;
625 bool result;
627 if ((pg->flags & PG_BUSY) != 0) {
628 return false;
632 * lock the page's owner.
635 slock = uvmpd_trylockowner(pg);
636 if (slock == NULL) {
637 return false;
641 * skip this page if it's busy.
644 if ((pg->flags & PG_BUSY) != 0) {
645 mutex_exit(slock);
646 return false;
649 result = uvmpd_dropswap(pg);
651 mutex_exit(slock);
653 return result;
656 #endif /* defined(VMSWAP) */
659 * uvmpd_scan_queue: scan an replace candidate list for pages
660 * to clean or free.
662 * => called with page queues locked
663 * => we work on meeting our free target by converting inactive pages
664 * into free pages.
665 * => we handle the building of swap-backed clusters
668 static void
669 uvmpd_scan_queue(void)
671 struct vm_page *p;
672 struct uvm_object *uobj;
673 struct vm_anon *anon;
674 #if defined(VMSWAP)
675 struct swapcluster swc;
676 #endif /* defined(VMSWAP) */
677 int dirtyreacts;
678 int lockownerfail;
679 kmutex_t *slock;
680 UVMHIST_FUNC("uvmpd_scan_queue"); UVMHIST_CALLED(pdhist);
683 * swslot is non-zero if we are building a swap cluster. we want
684 * to stay in the loop while we have a page to scan or we have
685 * a swap-cluster to build.
688 #if defined(VMSWAP)
689 swapcluster_init(&swc);
690 #endif /* defined(VMSWAP) */
692 dirtyreacts = 0;
693 lockownerfail = 0;
694 uvmpdpol_scaninit();
696 while (/* CONSTCOND */ 1) {
699 * see if we've met the free target.
702 if (uvmexp.free + uvmexp.paging
703 #if defined(VMSWAP)
704 + swapcluster_nused(&swc)
705 #endif /* defined(VMSWAP) */
706 >= uvmexp.freetarg << 2 ||
707 dirtyreacts == UVMPD_NUMDIRTYREACTS) {
708 UVMHIST_LOG(pdhist," met free target: "
709 "exit loop", 0, 0, 0, 0);
710 break;
713 p = uvmpdpol_selectvictim();
714 if (p == NULL) {
715 break;
717 KASSERT(uvmpdpol_pageisqueued_p(p));
718 KASSERT(p->wire_count == 0);
721 * we are below target and have a new page to consider.
724 anon = p->uanon;
725 uobj = p->uobject;
728 * first we attempt to lock the object that this page
729 * belongs to. if our attempt fails we skip on to
730 * the next page (no harm done). it is important to
731 * "try" locking the object as we are locking in the
732 * wrong order (pageq -> object) and we don't want to
733 * deadlock.
735 * the only time we expect to see an ownerless page
736 * (i.e. a page with no uobject and !PQ_ANON) is if an
737 * anon has loaned a page from a uvm_object and the
738 * uvm_object has dropped the ownership. in that
739 * case, the anon can "take over" the loaned page
740 * and make it its own.
743 slock = uvmpd_trylockowner(p);
744 if (slock == NULL) {
746 * yield cpu to make a chance for an LWP holding
747 * the lock run. otherwise we can busy-loop too long
748 * if the page queue is filled with a lot of pages
749 * from few objects.
751 lockownerfail++;
752 if (lockownerfail > UVMPD_NUMTRYLOCKOWNER) {
753 mutex_exit(&uvm_pageqlock);
754 /* XXX Better than yielding but inadequate. */
755 kpause("livelock", false, 1, NULL);
756 mutex_enter(&uvm_pageqlock);
757 lockownerfail = 0;
759 continue;
761 if (p->flags & PG_BUSY) {
762 mutex_exit(slock);
763 uvmexp.pdbusy++;
764 continue;
767 /* does the page belong to an object? */
768 if (uobj != NULL) {
769 uvmexp.pdobscan++;
770 } else {
771 #if defined(VMSWAP)
772 KASSERT(anon != NULL);
773 uvmexp.pdanscan++;
774 #else /* defined(VMSWAP) */
775 panic("%s: anon", __func__);
776 #endif /* defined(VMSWAP) */
781 * we now have the object and the page queues locked.
782 * if the page is not swap-backed, call the object's
783 * pager to flush and free the page.
786 #if defined(READAHEAD_STATS)
787 if ((p->pqflags & PQ_READAHEAD) != 0) {
788 p->pqflags &= ~PQ_READAHEAD;
789 uvm_ra_miss.ev_count++;
791 #endif /* defined(READAHEAD_STATS) */
793 if ((p->pqflags & PQ_SWAPBACKED) == 0) {
794 KASSERT(uobj != NULL);
795 mutex_exit(&uvm_pageqlock);
796 (void) (uobj->pgops->pgo_put)(uobj, p->offset,
797 p->offset + PAGE_SIZE, PGO_CLEANIT|PGO_FREE);
798 mutex_enter(&uvm_pageqlock);
799 continue;
803 * the page is swap-backed. remove all the permissions
804 * from the page so we can sync the modified info
805 * without any race conditions. if the page is clean
806 * we can free it now and continue.
809 pmap_page_protect(p, VM_PROT_NONE);
810 if ((p->flags & PG_CLEAN) && pmap_clear_modify(p)) {
811 p->flags &= ~(PG_CLEAN);
813 if (p->flags & PG_CLEAN) {
814 int slot;
815 int pageidx;
817 pageidx = p->offset >> PAGE_SHIFT;
818 uvm_pagefree(p);
819 uvmexp.pdfreed++;
822 * for anons, we need to remove the page
823 * from the anon ourselves. for aobjs,
824 * pagefree did that for us.
827 if (anon) {
828 KASSERT(anon->an_swslot != 0);
829 anon->an_page = NULL;
830 slot = anon->an_swslot;
831 } else {
832 slot = uao_find_swslot(uobj, pageidx);
834 mutex_exit(slock);
836 if (slot > 0) {
837 /* this page is now only in swap. */
838 mutex_enter(&uvm_swap_data_lock);
839 KASSERT(uvmexp.swpgonly < uvmexp.swpginuse);
840 uvmexp.swpgonly++;
841 mutex_exit(&uvm_swap_data_lock);
843 continue;
846 #if defined(VMSWAP)
848 * this page is dirty, skip it if we'll have met our
849 * free target when all the current pageouts complete.
852 if (uvmexp.free + uvmexp.paging > uvmexp.freetarg << 2) {
853 mutex_exit(slock);
854 continue;
858 * free any swap space allocated to the page since
859 * we'll have to write it again with its new data.
862 uvmpd_dropswap(p);
865 * start new swap pageout cluster (if necessary).
867 * if swap is full reactivate this page so that
868 * we eventually cycle all pages through the
869 * inactive queue.
872 if (swapcluster_allocslots(&swc)) {
873 dirtyreacts++;
874 uvm_pageactivate(p);
875 mutex_exit(slock);
876 continue;
880 * at this point, we're definitely going reuse this
881 * page. mark the page busy and delayed-free.
882 * we should remove the page from the page queues
883 * so we don't ever look at it again.
884 * adjust counters and such.
887 p->flags |= PG_BUSY;
888 UVM_PAGE_OWN(p, "scan_queue");
890 p->flags |= PG_PAGEOUT;
891 uvm_pagedequeue(p);
893 uvmexp.pgswapout++;
894 mutex_exit(&uvm_pageqlock);
897 * add the new page to the cluster.
900 if (swapcluster_add(&swc, p)) {
901 p->flags &= ~(PG_BUSY|PG_PAGEOUT);
902 UVM_PAGE_OWN(p, NULL);
903 mutex_enter(&uvm_pageqlock);
904 dirtyreacts++;
905 uvm_pageactivate(p);
906 mutex_exit(slock);
907 continue;
909 mutex_exit(slock);
911 swapcluster_flush(&swc, false);
912 mutex_enter(&uvm_pageqlock);
915 * the pageout is in progress. bump counters and set up
916 * for the next loop.
919 uvmexp.pdpending++;
921 #else /* defined(VMSWAP) */
922 uvm_pageactivate(p);
923 mutex_exit(slock);
924 #endif /* defined(VMSWAP) */
927 #if defined(VMSWAP)
928 mutex_exit(&uvm_pageqlock);
929 swapcluster_flush(&swc, true);
930 mutex_enter(&uvm_pageqlock);
931 #endif /* defined(VMSWAP) */
935 * uvmpd_scan: scan the page queues and attempt to meet our targets.
937 * => called with pageq's locked
940 static void
941 uvmpd_scan(void)
943 int swap_shortage, pages_freed;
944 UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
946 uvmexp.pdrevs++;
949 * work on meeting our targets. first we work on our free target
950 * by converting inactive pages into free pages. then we work on
951 * meeting our inactive target by converting active pages to
952 * inactive ones.
955 UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0);
957 pages_freed = uvmexp.pdfreed;
958 uvmpd_scan_queue();
959 pages_freed = uvmexp.pdfreed - pages_freed;
962 * detect if we're not going to be able to page anything out
963 * until we free some swap resources from active pages.
966 swap_shortage = 0;
967 if (uvmexp.free < uvmexp.freetarg &&
968 uvmexp.swpginuse >= uvmexp.swpgavail &&
969 !uvm_swapisfull() &&
970 pages_freed == 0) {
971 swap_shortage = uvmexp.freetarg - uvmexp.free;
974 uvmpdpol_balancequeue(swap_shortage);
977 * if still below the minimum target, try unloading kernel
978 * modules.
981 if (uvmexp.free < uvmexp.freemin) {
982 module_thread_kick();
987 * uvm_reclaimable: decide whether to wait for pagedaemon.
989 * => return true if it seems to be worth to do uvm_wait.
991 * XXX should be tunable.
992 * XXX should consider pools, etc?
995 bool
996 uvm_reclaimable(void)
998 int filepages;
999 int active, inactive;
1002 * if swap is not full, no problem.
1005 if (!uvm_swapisfull()) {
1006 return true;
1010 * file-backed pages can be reclaimed even when swap is full.
1011 * if we have more than 1/16 of pageable memory or 5MB, try to reclaim.
1013 * XXX assume the worst case, ie. all wired pages are file-backed.
1015 * XXX should consider about other reclaimable memory.
1016 * XXX ie. pools, traditional buffer cache.
1019 filepages = uvmexp.filepages + uvmexp.execpages - uvmexp.wired;
1020 uvm_estimatepageable(&active, &inactive);
1021 if (filepages >= MIN((active + inactive) >> 4,
1022 5 * 1024 * 1024 >> PAGE_SHIFT)) {
1023 return true;
1027 * kill the process, fail allocation, etc..
1030 return false;
1033 void
1034 uvm_estimatepageable(int *active, int *inactive)
1037 uvmpdpol_estimatepageable(active, inactive);
1040 void
1041 uvm_reclaim_init(void)
1044 /* Initialize UVM reclaim hooks. */
1045 mutex_init(&uvm_reclaim_lock, MUTEX_DEFAULT, IPL_NONE);
1046 SLIST_INIT(&uvm_reclaim_list);
1049 void
1050 uvm_reclaim_hook_add(struct uvm_reclaim_hook *hook)
1053 KASSERT(hook != NULL);
1055 mutex_enter(&uvm_reclaim_lock);
1056 SLIST_INSERT_HEAD(&uvm_reclaim_list, hook, uvm_reclaim_next);
1057 mutex_exit(&uvm_reclaim_lock);
1060 void
1061 uvm_reclaim_hook_del(struct uvm_reclaim_hook *hook_entry)
1063 struct uvm_reclaim_hook *hook;
1065 KASSERT(hook_entry != NULL);
1067 mutex_enter(&uvm_reclaim_lock);
1068 SLIST_FOREACH(hook, &uvm_reclaim_list, uvm_reclaim_next) {
1069 if (hook != hook_entry) {
1070 continue;
1073 SLIST_REMOVE(&uvm_reclaim_list, hook, uvm_reclaim_hook,
1074 uvm_reclaim_next);
1075 break;
1078 mutex_exit(&uvm_reclaim_lock);