Fix up mix of man(7)/mdoc(7).
[netbsd-mini2440.git] / sys / uvm / uvm_pdpolicy_clockpro.c
blobaa5b95c28b6f703b032d7e6bc19599cb2c673ce2
1 /* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.14 2008/03/22 05:50:42 bjs Exp $ */
3 /*-
4 * Copyright (c)2005, 2006 YAMAMOTO Takashi,
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
30 * CLOCK-Pro replacement policy:
31 * http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html
33 * approximation of the list of non-resident pages using hash:
34 * http://linux-mm.org/ClockProApproximation
37 /* #define CLOCKPRO_DEBUG */
39 #if defined(PDSIM)
41 #include "pdsim.h"
43 #else /* defined(PDSIM) */
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.14 2008/03/22 05:50:42 bjs Exp $");
48 #include "opt_ddb.h"
50 #include <sys/param.h>
51 #include <sys/proc.h>
52 #include <sys/systm.h>
53 #include <sys/kernel.h>
54 #include <sys/hash.h>
56 #include <uvm/uvm.h>
57 #include <uvm/uvm_pdpolicy.h>
58 #include <uvm/uvm_pdpolicy_impl.h>
60 #if ((__STDC_VERSION__ - 0) >= 199901L)
61 #define DPRINTF(...) /* nothing */
62 #define WARN(...) printf(__VA_ARGS__)
63 #else /* ((__STDC_VERSION__ - 0) >= 199901L) */
64 #define DPRINTF(a...) /* nothing */ /* GCC */
65 #define WARN(a...) printf(a)
66 #endif /* ((__STDC_VERSION__ - 0) >= 199901L) */
68 #define dump(a) /* nothing */
70 #undef USEONCE2
71 #define LISTQ
72 #undef ADAPTIVE
74 #endif /* defined(PDSIM) */
76 #if !defined(CLOCKPRO_COLDPCT)
77 #define CLOCKPRO_COLDPCT 10
78 #endif /* !defined(CLOCKPRO_COLDPCT) */
80 #define CLOCKPRO_COLDPCTMAX 90
82 #if !defined(CLOCKPRO_HASHFACTOR)
83 #define CLOCKPRO_HASHFACTOR 2
84 #endif /* !defined(CLOCKPRO_HASHFACTOR) */
86 #define CLOCKPRO_NEWQMIN ((1024 * 1024) >> PAGE_SHIFT) /* XXX */
88 int clockpro_hashfactor = CLOCKPRO_HASHFACTOR;
90 PDPOL_EVCNT_DEFINE(nresrecordobj)
91 PDPOL_EVCNT_DEFINE(nresrecordanon)
92 PDPOL_EVCNT_DEFINE(nreslookupobj)
93 PDPOL_EVCNT_DEFINE(nreslookupanon)
94 PDPOL_EVCNT_DEFINE(nresfoundobj)
95 PDPOL_EVCNT_DEFINE(nresfoundanon)
96 PDPOL_EVCNT_DEFINE(nresanonfree)
97 PDPOL_EVCNT_DEFINE(nresconflict)
98 PDPOL_EVCNT_DEFINE(nresoverwritten)
99 PDPOL_EVCNT_DEFINE(nreshandhot)
101 PDPOL_EVCNT_DEFINE(hhottakeover)
102 PDPOL_EVCNT_DEFINE(hhotref)
103 PDPOL_EVCNT_DEFINE(hhotunref)
104 PDPOL_EVCNT_DEFINE(hhotcold)
105 PDPOL_EVCNT_DEFINE(hhotcoldtest)
107 PDPOL_EVCNT_DEFINE(hcoldtakeover)
108 PDPOL_EVCNT_DEFINE(hcoldref)
109 PDPOL_EVCNT_DEFINE(hcoldunref)
110 PDPOL_EVCNT_DEFINE(hcoldreftest)
111 PDPOL_EVCNT_DEFINE(hcoldunreftest)
112 PDPOL_EVCNT_DEFINE(hcoldunreftestspeculative)
113 PDPOL_EVCNT_DEFINE(hcoldhot)
115 PDPOL_EVCNT_DEFINE(speculativeenqueue)
116 PDPOL_EVCNT_DEFINE(speculativehit1)
117 PDPOL_EVCNT_DEFINE(speculativehit2)
118 PDPOL_EVCNT_DEFINE(speculativemiss)
120 #define PQ_REFERENCED PQ_PRIVATE1
121 #define PQ_HOT PQ_PRIVATE2
122 #define PQ_TEST PQ_PRIVATE3
123 #define PQ_INITIALREF PQ_PRIVATE4
124 #if PQ_PRIVATE6 != PQ_PRIVATE5 * 2 || PQ_PRIVATE7 != PQ_PRIVATE6 * 2
125 #error PQ_PRIVATE
126 #endif
127 #define PQ_QMASK (PQ_PRIVATE5|PQ_PRIVATE6|PQ_PRIVATE7)
128 #define PQ_QFACTOR PQ_PRIVATE5
129 #define PQ_SPECULATIVE PQ_PRIVATE8
131 #define CLOCKPRO_NOQUEUE 0
132 #define CLOCKPRO_NEWQ 1 /* small queue to clear initial ref. */
133 #if defined(LISTQ)
134 #define CLOCKPRO_COLDQ 2
135 #define CLOCKPRO_HOTQ 3
136 #else /* defined(LISTQ) */
137 #define CLOCKPRO_COLDQ (2 + coldqidx) /* XXX */
138 #define CLOCKPRO_HOTQ (3 - coldqidx) /* XXX */
139 #endif /* defined(LISTQ) */
140 #define CLOCKPRO_LISTQ 4
141 #define CLOCKPRO_NQUEUE 4
143 static inline void
144 clockpro_setq(struct vm_page *pg, int qidx)
146 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
147 KASSERT(qidx <= CLOCKPRO_NQUEUE);
149 pg->pqflags = (pg->pqflags & ~PQ_QMASK) | (qidx * PQ_QFACTOR);
152 static inline int
153 clockpro_getq(struct vm_page *pg)
155 int qidx;
157 qidx = (pg->pqflags & PQ_QMASK) / PQ_QFACTOR;
158 KASSERT(qidx >= CLOCKPRO_NOQUEUE);
159 KASSERT(qidx <= CLOCKPRO_NQUEUE);
160 return qidx;
163 typedef struct {
164 struct pglist q_q;
165 int q_len;
166 } pageq_t;
168 struct clockpro_state {
169 int s_npages;
170 int s_coldtarget;
171 int s_ncold;
173 int s_newqlenmax;
174 pageq_t s_q[CLOCKPRO_NQUEUE];
176 struct uvm_pctparam s_coldtargetpct;
179 static pageq_t *
180 clockpro_queue(struct clockpro_state *s, int qidx)
183 KASSERT(CLOCKPRO_NOQUEUE < qidx);
184 KASSERT(qidx <= CLOCKPRO_NQUEUE);
186 return &s->s_q[qidx - 1];
189 #if !defined(LISTQ)
191 static int coldqidx;
193 static void
194 clockpro_switchqueue(void)
197 coldqidx = 1 - coldqidx;
200 #endif /* !defined(LISTQ) */
202 static struct clockpro_state clockpro;
203 static struct clockpro_scanstate {
204 int ss_nscanned;
205 } scanstate;
207 /* ---------------------------------------- */
209 static void
210 pageq_init(pageq_t *q)
213 TAILQ_INIT(&q->q_q);
214 q->q_len = 0;
217 static int
218 pageq_len(const pageq_t *q)
221 return q->q_len;
224 static struct vm_page *
225 pageq_first(const pageq_t *q)
228 return TAILQ_FIRST(&q->q_q);
231 static void
232 pageq_insert_tail(pageq_t *q, struct vm_page *pg)
235 TAILQ_INSERT_TAIL(&q->q_q, pg, pageq.queue);
236 q->q_len++;
239 #if defined(LISTQ)
240 static void
241 pageq_insert_head(pageq_t *q, struct vm_page *pg)
244 TAILQ_INSERT_HEAD(&q->q_q, pg, pageq.queue);
245 q->q_len++;
247 #endif
249 static void
250 pageq_remove(pageq_t *q, struct vm_page *pg)
253 #if 1
254 KASSERT(clockpro_queue(&clockpro, clockpro_getq(pg)) == q);
255 #endif
256 KASSERT(q->q_len > 0);
257 TAILQ_REMOVE(&q->q_q, pg, pageq.queue);
258 q->q_len--;
261 static struct vm_page *
262 pageq_remove_head(pageq_t *q)
264 struct vm_page *pg;
266 pg = TAILQ_FIRST(&q->q_q);
267 if (pg == NULL) {
268 KASSERT(q->q_len == 0);
269 return NULL;
271 pageq_remove(q, pg);
272 return pg;
275 /* ---------------------------------------- */
277 static void
278 clockpro_insert_tail(struct clockpro_state *s, int qidx, struct vm_page *pg)
280 pageq_t *q = clockpro_queue(s, qidx);
282 clockpro_setq(pg, qidx);
283 pageq_insert_tail(q, pg);
286 #if defined(LISTQ)
287 static void
288 clockpro_insert_head(struct clockpro_state *s, int qidx, struct vm_page *pg)
290 pageq_t *q = clockpro_queue(s, qidx);
292 clockpro_setq(pg, qidx);
293 pageq_insert_head(q, pg);
296 #endif
297 /* ---------------------------------------- */
299 typedef uint32_t nonres_cookie_t;
300 #define NONRES_COOKIE_INVAL 0
302 typedef uintptr_t objid_t;
305 * XXX maybe these hash functions need reconsideration,
306 * given that hash distribution is critical here.
309 static uint32_t
310 pageidentityhash1(objid_t obj, off_t idx)
312 uint32_t hash = HASH32_BUF_INIT;
314 #if 1
315 hash = hash32_buf(&idx, sizeof(idx), hash);
316 hash = hash32_buf(&obj, sizeof(obj), hash);
317 #else
318 hash = hash32_buf(&obj, sizeof(obj), hash);
319 hash = hash32_buf(&idx, sizeof(idx), hash);
320 #endif
321 return hash;
324 static uint32_t
325 pageidentityhash2(objid_t obj, off_t idx)
327 uint32_t hash = HASH32_BUF_INIT;
329 hash = hash32_buf(&obj, sizeof(obj), hash);
330 hash = hash32_buf(&idx, sizeof(idx), hash);
331 return hash;
334 static nonres_cookie_t
335 calccookie(objid_t obj, off_t idx)
337 uint32_t hash = pageidentityhash2(obj, idx);
338 nonres_cookie_t cookie = hash;
340 if (__predict_false(cookie == NONRES_COOKIE_INVAL)) {
341 cookie++; /* XXX */
343 return cookie;
346 #define BUCKETSIZE 14
347 struct bucket {
348 int cycle;
349 int cur;
350 nonres_cookie_t pages[BUCKETSIZE];
352 static int cycle_target;
353 static int cycle_target_frac;
355 static struct bucket static_bucket;
356 static struct bucket *buckets = &static_bucket;
357 static size_t hashsize = 1;
359 static int coldadj;
360 #define COLDTARGET_ADJ(d) coldadj += (d)
362 #if defined(PDSIM)
364 static void *
365 clockpro_hashalloc(int n)
367 size_t allocsz = sizeof(*buckets) * n;
369 return malloc(allocsz);
372 static void
373 clockpro_hashfree(void *p, int n)
376 free(p);
379 #else /* defined(PDSIM) */
381 static void *
382 clockpro_hashalloc(int n)
384 size_t allocsz = round_page(sizeof(*buckets) * n);
386 return (void *)uvm_km_alloc(kernel_map, allocsz, 0, UVM_KMF_WIRED);
389 static void
390 clockpro_hashfree(void *p, int n)
392 size_t allocsz = round_page(sizeof(*buckets) * n);
394 uvm_km_free(kernel_map, (vaddr_t)p, allocsz, UVM_KMF_WIRED);
397 #endif /* defined(PDSIM) */
399 static void
400 clockpro_hashinit(uint64_t n)
402 struct bucket *newbuckets;
403 struct bucket *oldbuckets;
404 size_t sz;
405 size_t oldsz;
406 int i;
408 sz = howmany(n, BUCKETSIZE);
409 sz *= clockpro_hashfactor;
410 newbuckets = clockpro_hashalloc(sz);
411 if (newbuckets == NULL) {
412 panic("%s: allocation failure", __func__);
414 for (i = 0; i < sz; i++) {
415 struct bucket *b = &newbuckets[i];
416 int j;
418 b->cycle = cycle_target;
419 b->cur = 0;
420 for (j = 0; j < BUCKETSIZE; j++) {
421 b->pages[j] = NONRES_COOKIE_INVAL;
424 /* XXX lock */
425 oldbuckets = buckets;
426 oldsz = hashsize;
427 buckets = newbuckets;
428 hashsize = sz;
429 /* XXX unlock */
430 if (oldbuckets != &static_bucket) {
431 clockpro_hashfree(oldbuckets, oldsz);
435 static struct bucket *
436 nonresident_getbucket(objid_t obj, off_t idx)
438 uint32_t hash;
440 hash = pageidentityhash1(obj, idx);
441 return &buckets[hash % hashsize];
444 static void
445 nonresident_rotate(struct bucket *b)
447 const int target = cycle_target;
448 const int cycle = b->cycle;
449 int cur;
450 int todo;
452 todo = target - cycle;
453 if (todo >= BUCKETSIZE * 2) {
454 todo = (todo % BUCKETSIZE) + BUCKETSIZE;
456 cur = b->cur;
457 while (todo > 0) {
458 if (b->pages[cur] != NONRES_COOKIE_INVAL) {
459 PDPOL_EVCNT_INCR(nreshandhot);
460 COLDTARGET_ADJ(-1);
462 b->pages[cur] = NONRES_COOKIE_INVAL;
463 cur++;
464 if (cur == BUCKETSIZE) {
465 cur = 0;
467 todo--;
469 b->cycle = target;
470 b->cur = cur;
473 static bool
474 nonresident_lookupremove(objid_t obj, off_t idx)
476 struct bucket *b = nonresident_getbucket(obj, idx);
477 nonres_cookie_t cookie = calccookie(obj, idx);
478 int i;
480 nonresident_rotate(b);
481 for (i = 0; i < BUCKETSIZE; i++) {
482 if (b->pages[i] == cookie) {
483 b->pages[i] = NONRES_COOKIE_INVAL;
484 return true;
487 return false;
490 static objid_t
491 pageobj(struct vm_page *pg)
493 const void *obj;
496 * XXX object pointer is often freed and reused for unrelated object.
497 * for vnodes, it would be better to use something like
498 * a hash of fsid/fileid/generation.
501 obj = pg->uobject;
502 if (obj == NULL) {
503 obj = pg->uanon;
504 KASSERT(obj != NULL);
505 KASSERT(pg->offset == 0);
508 return (objid_t)obj;
511 static off_t
512 pageidx(struct vm_page *pg)
515 KASSERT((pg->offset & PAGE_MASK) == 0);
516 return pg->offset >> PAGE_SHIFT;
519 static bool
520 nonresident_pagelookupremove(struct vm_page *pg)
522 bool found = nonresident_lookupremove(pageobj(pg), pageidx(pg));
524 if (pg->uobject) {
525 PDPOL_EVCNT_INCR(nreslookupobj);
526 } else {
527 PDPOL_EVCNT_INCR(nreslookupanon);
529 if (found) {
530 if (pg->uobject) {
531 PDPOL_EVCNT_INCR(nresfoundobj);
532 } else {
533 PDPOL_EVCNT_INCR(nresfoundanon);
536 return found;
539 static void
540 nonresident_pagerecord(struct vm_page *pg)
542 objid_t obj = pageobj(pg);
543 off_t idx = pageidx(pg);
544 struct bucket *b = nonresident_getbucket(obj, idx);
545 nonres_cookie_t cookie = calccookie(obj, idx);
547 #if defined(DEBUG)
548 int i;
550 for (i = 0; i < BUCKETSIZE; i++) {
551 if (b->pages[i] == cookie) {
552 PDPOL_EVCNT_INCR(nresconflict);
555 #endif /* defined(DEBUG) */
557 if (pg->uobject) {
558 PDPOL_EVCNT_INCR(nresrecordobj);
559 } else {
560 PDPOL_EVCNT_INCR(nresrecordanon);
562 nonresident_rotate(b);
563 if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
564 PDPOL_EVCNT_INCR(nresoverwritten);
565 COLDTARGET_ADJ(-1);
567 b->pages[b->cur] = cookie;
568 b->cur = (b->cur + 1) % BUCKETSIZE;
571 /* ---------------------------------------- */
573 #if defined(CLOCKPRO_DEBUG)
574 static void
575 check_sanity(void)
578 #else /* defined(CLOCKPRO_DEBUG) */
579 #define check_sanity() /* nothing */
580 #endif /* defined(CLOCKPRO_DEBUG) */
582 static void
583 clockpro_reinit(void)
586 clockpro_hashinit(uvmexp.npages);
589 static void
590 clockpro_init(void)
592 struct clockpro_state *s = &clockpro;
593 int i;
595 for (i = 0; i < CLOCKPRO_NQUEUE; i++) {
596 pageq_init(&s->s_q[i]);
598 s->s_newqlenmax = 1;
599 s->s_coldtarget = 1;
600 uvm_pctparam_init(&s->s_coldtargetpct, CLOCKPRO_COLDPCT, NULL);
603 static void
604 clockpro_tune(void)
606 struct clockpro_state *s = &clockpro;
607 int coldtarget;
609 #if defined(ADAPTIVE)
610 int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100;
611 int coldmin = 1;
613 coldtarget = s->s_coldtarget;
614 if (coldtarget + coldadj < coldmin) {
615 coldadj = coldmin - coldtarget;
616 } else if (coldtarget + coldadj > coldmax) {
617 coldadj = coldmax - coldtarget;
619 coldtarget += coldadj;
620 #else /* defined(ADAPTIVE) */
621 coldtarget = UVM_PCTPARAM_APPLY(&s->s_coldtargetpct, s->s_npages);
622 if (coldtarget < 1) {
623 coldtarget = 1;
625 #endif /* defined(ADAPTIVE) */
627 s->s_coldtarget = coldtarget;
628 s->s_newqlenmax = coldtarget / 4;
629 if (s->s_newqlenmax < CLOCKPRO_NEWQMIN) {
630 s->s_newqlenmax = CLOCKPRO_NEWQMIN;
634 static void
635 clockpro_movereferencebit(struct vm_page *pg)
637 bool referenced;
639 referenced = pmap_clear_reference(pg);
640 if (referenced) {
641 pg->pqflags |= PQ_REFERENCED;
645 static void
646 clockpro_clearreferencebit(struct vm_page *pg)
649 clockpro_movereferencebit(pg);
650 pg->pqflags &= ~PQ_REFERENCED;
653 static void
654 clockpro___newqrotate(int len)
656 struct clockpro_state * const s = &clockpro;
657 pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ);
658 struct vm_page *pg;
660 while (pageq_len(newq) > len) {
661 pg = pageq_remove_head(newq);
662 KASSERT(pg != NULL);
663 KASSERT(clockpro_getq(pg) == CLOCKPRO_NEWQ);
664 if ((pg->pqflags & PQ_INITIALREF) != 0) {
665 clockpro_clearreferencebit(pg);
666 pg->pqflags &= ~PQ_INITIALREF;
668 /* place at the list head */
669 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
673 static void
674 clockpro_newqrotate(void)
676 struct clockpro_state * const s = &clockpro;
678 check_sanity();
679 clockpro___newqrotate(s->s_newqlenmax);
680 check_sanity();
683 static void
684 clockpro_newqflush(int n)
687 check_sanity();
688 clockpro___newqrotate(n);
689 check_sanity();
692 static void
693 clockpro_newqflushone(void)
695 struct clockpro_state * const s = &clockpro;
697 clockpro_newqflush(
698 MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0));
702 * our "tail" is called "list-head" in the paper.
705 static void
706 clockpro___enqueuetail(struct vm_page *pg)
708 struct clockpro_state * const s = &clockpro;
710 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
712 check_sanity();
713 #if !defined(USEONCE2)
714 clockpro_insert_tail(s, CLOCKPRO_NEWQ, pg);
715 clockpro_newqrotate();
716 #else /* !defined(USEONCE2) */
717 #if defined(LISTQ)
718 KASSERT((pg->pqflags & PQ_REFERENCED) == 0);
719 #endif /* defined(LISTQ) */
720 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
721 #endif /* !defined(USEONCE2) */
722 check_sanity();
725 static void
726 clockpro_pageenqueue(struct vm_page *pg)
728 struct clockpro_state * const s = &clockpro;
729 bool hot;
730 bool speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */
732 KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0);
733 KASSERT(mutex_owned(&uvm_pageqlock));
734 check_sanity();
735 KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
736 s->s_npages++;
737 pg->pqflags &= ~(PQ_HOT|PQ_TEST);
738 if (speculative) {
739 hot = false;
740 PDPOL_EVCNT_INCR(speculativeenqueue);
741 } else {
742 hot = nonresident_pagelookupremove(pg);
743 if (hot) {
744 COLDTARGET_ADJ(1);
749 * consider mmap'ed file:
751 * - read-ahead enqueues a page.
753 * - on the following read-ahead hit, the fault handler activates it.
755 * - finally, the userland code which caused the above fault
756 * actually accesses the page. it makes its reference bit set.
758 * we want to count the above as a single access, rather than
759 * three accesses with short reuse distances.
762 #if defined(USEONCE2)
763 pg->pqflags &= ~PQ_INITIALREF;
764 if (hot) {
765 pg->pqflags |= PQ_TEST;
767 s->s_ncold++;
768 clockpro_clearreferencebit(pg);
769 clockpro___enqueuetail(pg);
770 #else /* defined(USEONCE2) */
771 if (speculative) {
772 s->s_ncold++;
773 } else if (hot) {
774 pg->pqflags |= PQ_HOT;
775 } else {
776 pg->pqflags |= PQ_TEST;
777 s->s_ncold++;
779 clockpro___enqueuetail(pg);
780 #endif /* defined(USEONCE2) */
781 KASSERT(s->s_ncold <= s->s_npages);
784 static pageq_t *
785 clockpro_pagequeue(struct vm_page *pg)
787 struct clockpro_state * const s = &clockpro;
788 int qidx;
790 qidx = clockpro_getq(pg);
791 KASSERT(qidx != CLOCKPRO_NOQUEUE);
793 return clockpro_queue(s, qidx);
796 static void
797 clockpro_pagedequeue(struct vm_page *pg)
799 struct clockpro_state * const s = &clockpro;
800 pageq_t *q;
802 KASSERT(s->s_npages > 0);
803 check_sanity();
804 q = clockpro_pagequeue(pg);
805 pageq_remove(q, pg);
806 check_sanity();
807 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
808 if ((pg->pqflags & PQ_HOT) == 0) {
809 KASSERT(s->s_ncold > 0);
810 s->s_ncold--;
812 KASSERT(s->s_npages > 0);
813 s->s_npages--;
814 check_sanity();
817 static void
818 clockpro_pagerequeue(struct vm_page *pg)
820 struct clockpro_state * const s = &clockpro;
821 int qidx;
823 qidx = clockpro_getq(pg);
824 KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ);
825 pageq_remove(clockpro_queue(s, qidx), pg);
826 check_sanity();
827 clockpro_setq(pg, CLOCKPRO_NOQUEUE);
829 clockpro___enqueuetail(pg);
832 static void
833 handhot_endtest(struct vm_page *pg)
836 KASSERT((pg->pqflags & PQ_HOT) == 0);
837 if ((pg->pqflags & PQ_TEST) != 0) {
838 PDPOL_EVCNT_INCR(hhotcoldtest);
839 COLDTARGET_ADJ(-1);
840 pg->pqflags &= ~PQ_TEST;
841 } else {
842 PDPOL_EVCNT_INCR(hhotcold);
846 static void
847 handhot_advance(void)
849 struct clockpro_state * const s = &clockpro;
850 struct vm_page *pg;
851 pageq_t *hotq;
852 int hotqlen;
854 clockpro_tune();
856 dump("hot called");
857 if (s->s_ncold >= s->s_coldtarget) {
858 return;
860 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
861 again:
862 pg = pageq_first(hotq);
863 if (pg == NULL) {
864 DPRINTF("%s: HHOT TAKEOVER\n", __func__);
865 dump("hhottakeover");
866 PDPOL_EVCNT_INCR(hhottakeover);
867 #if defined(LISTQ)
868 while (/* CONSTCOND */ 1) {
869 pageq_t *coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
871 pg = pageq_first(coldq);
872 if (pg == NULL) {
873 clockpro_newqflushone();
874 pg = pageq_first(coldq);
875 if (pg == NULL) {
876 WARN("hhot: no page?\n");
877 return;
880 KASSERT(clockpro_pagequeue(pg) == coldq);
881 pageq_remove(coldq, pg);
882 check_sanity();
883 if ((pg->pqflags & PQ_HOT) == 0) {
884 handhot_endtest(pg);
885 clockpro_insert_tail(s, CLOCKPRO_LISTQ, pg);
886 } else {
887 clockpro_insert_head(s, CLOCKPRO_HOTQ, pg);
888 break;
891 #else /* defined(LISTQ) */
892 clockpro_newqflush(0); /* XXX XXX */
893 clockpro_switchqueue();
894 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
895 goto again;
896 #endif /* defined(LISTQ) */
899 KASSERT(clockpro_pagequeue(pg) == hotq);
902 * terminate test period of nonresident pages by cycling them.
905 cycle_target_frac += BUCKETSIZE;
906 hotqlen = pageq_len(hotq);
907 while (cycle_target_frac >= hotqlen) {
908 cycle_target++;
909 cycle_target_frac -= hotqlen;
912 if ((pg->pqflags & PQ_HOT) == 0) {
913 #if defined(LISTQ)
914 panic("cold page in hotq: %p", pg);
915 #else /* defined(LISTQ) */
916 handhot_endtest(pg);
917 goto next;
918 #endif /* defined(LISTQ) */
920 KASSERT((pg->pqflags & PQ_TEST) == 0);
921 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
922 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
925 * once we met our target,
926 * stop at a hot page so that no cold pages in test period
927 * have larger recency than any hot pages.
930 if (s->s_ncold >= s->s_coldtarget) {
931 dump("hot done");
932 return;
934 clockpro_movereferencebit(pg);
935 if ((pg->pqflags & PQ_REFERENCED) == 0) {
936 PDPOL_EVCNT_INCR(hhotunref);
937 uvmexp.pddeact++;
938 pg->pqflags &= ~PQ_HOT;
939 clockpro.s_ncold++;
940 KASSERT(s->s_ncold <= s->s_npages);
941 } else {
942 PDPOL_EVCNT_INCR(hhotref);
944 pg->pqflags &= ~PQ_REFERENCED;
945 #if !defined(LISTQ)
946 next:
947 #endif /* !defined(LISTQ) */
948 clockpro_pagerequeue(pg);
949 dump("hot");
950 goto again;
953 static struct vm_page *
954 handcold_advance(void)
956 struct clockpro_state * const s = &clockpro;
957 struct vm_page *pg;
959 for (;;) {
960 #if defined(LISTQ)
961 pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ);
962 #endif /* defined(LISTQ) */
963 pageq_t *coldq;
965 clockpro_newqrotate();
966 handhot_advance();
967 #if defined(LISTQ)
968 pg = pageq_first(listq);
969 if (pg != NULL) {
970 KASSERT(clockpro_getq(pg) == CLOCKPRO_LISTQ);
971 KASSERT((pg->pqflags & PQ_TEST) == 0);
972 KASSERT((pg->pqflags & PQ_HOT) == 0);
973 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
974 pageq_remove(listq, pg);
975 check_sanity();
976 clockpro_insert_head(s, CLOCKPRO_COLDQ, pg); /* XXX */
977 goto gotcold;
979 #endif /* defined(LISTQ) */
980 check_sanity();
981 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
982 pg = pageq_first(coldq);
983 if (pg == NULL) {
984 clockpro_newqflushone();
985 pg = pageq_first(coldq);
987 if (pg == NULL) {
988 DPRINTF("%s: HCOLD TAKEOVER\n", __func__);
989 dump("hcoldtakeover");
990 PDPOL_EVCNT_INCR(hcoldtakeover);
991 KASSERT(
992 pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) == 0);
993 #if defined(LISTQ)
994 KASSERT(
995 pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)) == 0);
996 #else /* defined(LISTQ) */
997 clockpro_switchqueue();
998 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
999 pg = pageq_first(coldq);
1000 #endif /* defined(LISTQ) */
1002 if (pg == NULL) {
1003 WARN("hcold: no page?\n");
1004 return NULL;
1006 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
1007 if ((pg->pqflags & PQ_HOT) != 0) {
1008 PDPOL_EVCNT_INCR(hcoldhot);
1009 pageq_remove(coldq, pg);
1010 clockpro_insert_tail(s, CLOCKPRO_HOTQ, pg);
1011 check_sanity();
1012 KASSERT((pg->pqflags & PQ_TEST) == 0);
1013 uvmexp.pdscans++;
1014 continue;
1016 #if defined(LISTQ)
1017 gotcold:
1018 #endif /* defined(LISTQ) */
1019 KASSERT((pg->pqflags & PQ_HOT) == 0);
1020 uvmexp.pdscans++;
1021 clockpro_movereferencebit(pg);
1022 if ((pg->pqflags & PQ_SPECULATIVE) != 0) {
1023 KASSERT((pg->pqflags & PQ_TEST) == 0);
1024 if ((pg->pqflags & PQ_REFERENCED) != 0) {
1025 PDPOL_EVCNT_INCR(speculativehit2);
1026 pg->pqflags &= ~(PQ_SPECULATIVE|PQ_REFERENCED);
1027 clockpro_pagedequeue(pg);
1028 clockpro_pageenqueue(pg);
1029 continue;
1031 PDPOL_EVCNT_INCR(speculativemiss);
1033 switch (pg->pqflags & (PQ_REFERENCED|PQ_TEST)) {
1034 case PQ_TEST:
1035 PDPOL_EVCNT_INCR(hcoldunreftest);
1036 nonresident_pagerecord(pg);
1037 goto gotit;
1038 case 0:
1039 PDPOL_EVCNT_INCR(hcoldunref);
1040 gotit:
1041 KASSERT(s->s_ncold > 0);
1042 clockpro_pagerequeue(pg); /* XXX */
1043 dump("cold done");
1044 /* XXX "pg" is still in queue */
1045 handhot_advance();
1046 goto done;
1048 case PQ_REFERENCED|PQ_TEST:
1049 PDPOL_EVCNT_INCR(hcoldreftest);
1050 s->s_ncold--;
1051 COLDTARGET_ADJ(1);
1052 pg->pqflags |= PQ_HOT;
1053 pg->pqflags &= ~PQ_TEST;
1054 break;
1056 case PQ_REFERENCED:
1057 PDPOL_EVCNT_INCR(hcoldref);
1058 pg->pqflags |= PQ_TEST;
1059 break;
1061 pg->pqflags &= ~PQ_REFERENCED;
1062 uvmexp.pdreact++;
1063 /* move to the list head */
1064 clockpro_pagerequeue(pg);
1065 dump("cold");
1067 done:;
1068 return pg;
1071 void
1072 uvmpdpol_pageactivate(struct vm_page *pg)
1075 if (!uvmpdpol_pageisqueued_p(pg)) {
1076 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
1077 pg->pqflags |= PQ_INITIALREF;
1078 clockpro_pageenqueue(pg);
1079 } else if ((pg->pqflags & PQ_SPECULATIVE)) {
1080 PDPOL_EVCNT_INCR(speculativehit1);
1081 pg->pqflags &= ~PQ_SPECULATIVE;
1082 pg->pqflags |= PQ_INITIALREF;
1083 clockpro_pagedequeue(pg);
1084 clockpro_pageenqueue(pg);
1086 pg->pqflags |= PQ_REFERENCED;
1089 void
1090 uvmpdpol_pagedeactivate(struct vm_page *pg)
1093 clockpro_clearreferencebit(pg);
1096 void
1097 uvmpdpol_pagedequeue(struct vm_page *pg)
1100 if (!uvmpdpol_pageisqueued_p(pg)) {
1101 return;
1103 clockpro_pagedequeue(pg);
1104 pg->pqflags &= ~(PQ_INITIALREF|PQ_SPECULATIVE);
1107 void
1108 uvmpdpol_pageenqueue(struct vm_page *pg)
1111 #if 1
1112 if (uvmpdpol_pageisqueued_p(pg)) {
1113 return;
1115 clockpro_clearreferencebit(pg);
1116 pg->pqflags |= PQ_SPECULATIVE;
1117 clockpro_pageenqueue(pg);
1118 #else
1119 uvmpdpol_pageactivate(pg);
1120 #endif
1123 void
1124 uvmpdpol_anfree(struct vm_anon *an)
1127 KASSERT(an->an_page == NULL);
1128 if (nonresident_lookupremove((objid_t)an, 0)) {
1129 PDPOL_EVCNT_INCR(nresanonfree);
1133 void
1134 uvmpdpol_init(void)
1137 clockpro_init();
1140 void
1141 uvmpdpol_reinit(void)
1144 clockpro_reinit();
1147 void
1148 uvmpdpol_estimatepageable(int *active, int *inactive)
1150 struct clockpro_state * const s = &clockpro;
1152 if (active) {
1153 *active = s->s_npages - s->s_ncold;
1155 if (inactive) {
1156 *inactive = s->s_ncold;
1160 bool
1161 uvmpdpol_pageisqueued_p(struct vm_page *pg)
1164 return clockpro_getq(pg) != CLOCKPRO_NOQUEUE;
1167 void
1168 uvmpdpol_scaninit(void)
1170 struct clockpro_scanstate * const ss = &scanstate;
1172 ss->ss_nscanned = 0;
1175 struct vm_page *
1176 uvmpdpol_selectvictim(void)
1178 struct clockpro_state * const s = &clockpro;
1179 struct clockpro_scanstate * const ss = &scanstate;
1180 struct vm_page *pg;
1182 if (ss->ss_nscanned > s->s_npages) {
1183 DPRINTF("scan too much\n");
1184 return NULL;
1186 pg = handcold_advance();
1187 ss->ss_nscanned++;
1188 return pg;
1191 static void
1192 clockpro_dropswap(pageq_t *q, int *todo)
1194 struct vm_page *pg;
1196 TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pageq.queue) {
1197 if (*todo <= 0) {
1198 break;
1200 if ((pg->pqflags & PQ_HOT) == 0) {
1201 continue;
1203 if ((pg->pqflags & PQ_SWAPBACKED) == 0) {
1204 continue;
1206 if (uvmpd_trydropswap(pg)) {
1207 (*todo)--;
1212 void
1213 uvmpdpol_balancequeue(int swap_shortage)
1215 struct clockpro_state * const s = &clockpro;
1216 int todo = swap_shortage;
1218 if (todo == 0) {
1219 return;
1223 * reclaim swap slots from hot pages
1226 DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage);
1228 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo);
1229 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo);
1230 clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo);
1232 DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo);
1235 bool
1236 uvmpdpol_needsscan_p(void)
1238 struct clockpro_state * const s = &clockpro;
1240 if (s->s_ncold < s->s_coldtarget) {
1241 return true;
1243 return false;
1246 void
1247 uvmpdpol_tune(void)
1250 clockpro_tune();
1253 #if !defined(PDSIM)
1255 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
1257 void
1258 uvmpdpol_sysctlsetup(void)
1260 #if !defined(ADAPTIVE)
1261 struct clockpro_state * const s = &clockpro;
1263 uvm_pctparam_createsysctlnode(&s->s_coldtargetpct, "coldtargetpct",
1264 SYSCTL_DESCR("Percentage cold target queue of the entire queue"));
1265 #endif /* !defined(ADAPTIVE) */
1268 #endif /* !defined(PDSIM) */
1270 #if defined(DDB)
1272 void clockpro_dump(void);
1274 void
1275 clockpro_dump(void)
1277 struct clockpro_state * const s = &clockpro;
1279 struct vm_page *pg;
1280 int ncold, nhot, ntest, nspeculative, ninitialref, nref;
1281 int newqlen, coldqlen, hotqlen, listqlen;
1283 newqlen = coldqlen = hotqlen = listqlen = 0;
1284 printf("npages=%d, ncold=%d, coldtarget=%d, newqlenmax=%d\n",
1285 s->s_npages, s->s_ncold, s->s_coldtarget, s->s_newqlenmax);
1287 #define INITCOUNT() \
1288 ncold = nhot = ntest = nspeculative = ninitialref = nref = 0
1290 #define COUNT(pg) \
1291 if ((pg->pqflags & PQ_HOT) != 0) { \
1292 nhot++; \
1293 } else { \
1294 ncold++; \
1295 if ((pg->pqflags & PQ_TEST) != 0) { \
1296 ntest++; \
1298 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { \
1299 nspeculative++; \
1301 if ((pg->pqflags & PQ_INITIALREF) != 0) { \
1302 ninitialref++; \
1303 } else if ((pg->pqflags & PQ_REFERENCED) != 0 || \
1304 pmap_is_referenced(pg)) { \
1305 nref++; \
1309 #define PRINTCOUNT(name) \
1310 printf("%s hot=%d, cold=%d, test=%d, speculative=%d, initialref=%d, " \
1311 "nref=%d\n", \
1312 (name), nhot, ncold, ntest, nspeculative, ninitialref, nref)
1314 INITCOUNT();
1315 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_NEWQ)->q_q, pageq.queue) {
1316 if (clockpro_getq(pg) != CLOCKPRO_NEWQ) {
1317 printf("newq corrupt %p\n", pg);
1319 COUNT(pg)
1320 newqlen++;
1322 PRINTCOUNT("newq");
1324 INITCOUNT();
1325 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_COLDQ)->q_q, pageq.queue) {
1326 if (clockpro_getq(pg) != CLOCKPRO_COLDQ) {
1327 printf("coldq corrupt %p\n", pg);
1329 COUNT(pg)
1330 coldqlen++;
1332 PRINTCOUNT("coldq");
1334 INITCOUNT();
1335 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_HOTQ)->q_q, pageq.queue) {
1336 if (clockpro_getq(pg) != CLOCKPRO_HOTQ) {
1337 printf("hotq corrupt %p\n", pg);
1339 #if defined(LISTQ)
1340 if ((pg->pqflags & PQ_HOT) == 0) {
1341 printf("cold page in hotq: %p\n", pg);
1343 #endif /* defined(LISTQ) */
1344 COUNT(pg)
1345 hotqlen++;
1347 PRINTCOUNT("hotq");
1349 INITCOUNT();
1350 TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_LISTQ)->q_q, pageq.queue) {
1351 #if !defined(LISTQ)
1352 printf("listq %p\n", pg);
1353 #endif /* !defined(LISTQ) */
1354 if (clockpro_getq(pg) != CLOCKPRO_LISTQ) {
1355 printf("listq corrupt %p\n", pg);
1357 COUNT(pg)
1358 listqlen++;
1360 PRINTCOUNT("listq");
1362 printf("newqlen=%d/%d, coldqlen=%d/%d, hotqlen=%d/%d, listqlen=%d/%d\n",
1363 newqlen, pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)),
1364 coldqlen, pageq_len(clockpro_queue(s, CLOCKPRO_COLDQ)),
1365 hotqlen, pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)),
1366 listqlen, pageq_len(clockpro_queue(s, CLOCKPRO_LISTQ)));
1369 #endif /* defined(DDB) */
1371 #if defined(PDSIM)
1372 #if defined(DEBUG)
1373 static void
1374 pdsim_dumpq(int qidx)
1376 struct clockpro_state * const s = &clockpro;
1377 pageq_t *q = clockpro_queue(s, qidx);
1378 struct vm_page *pg;
1380 TAILQ_FOREACH(pg, &q->q_q, pageq.queue) {
1381 DPRINTF(" %" PRIu64 "%s%s%s%s%s%s",
1382 pg->offset >> PAGE_SHIFT,
1383 (pg->pqflags & PQ_HOT) ? "H" : "",
1384 (pg->pqflags & PQ_TEST) ? "T" : "",
1385 (pg->pqflags & PQ_REFERENCED) ? "R" : "",
1386 pmap_is_referenced(pg) ? "r" : "",
1387 (pg->pqflags & PQ_INITIALREF) ? "I" : "",
1388 (pg->pqflags & PQ_SPECULATIVE) ? "S" : ""
1392 #endif /* defined(DEBUG) */
1394 void
1395 pdsim_dump(const char *id)
1397 #if defined(DEBUG)
1398 struct clockpro_state * const s = &clockpro;
1400 DPRINTF(" %s L(", id);
1401 pdsim_dumpq(CLOCKPRO_LISTQ);
1402 DPRINTF(" ) H(");
1403 pdsim_dumpq(CLOCKPRO_HOTQ);
1404 DPRINTF(" ) C(");
1405 pdsim_dumpq(CLOCKPRO_COLDQ);
1406 DPRINTF(" ) N(");
1407 pdsim_dumpq(CLOCKPRO_NEWQ);
1408 DPRINTF(" ) ncold=%d/%d, coldadj=%d\n",
1409 s->s_ncold, s->s_coldtarget, coldadj);
1410 #endif /* defined(DEBUG) */
1412 #endif /* defined(PDSIM) */