Fix up mix of man(7)/mdoc(7).
[netbsd-mini2440.git] / sys / kern / kern_runq.c
blobbabb37534d522287ab9ebb56f6afb9dddaad203d
1 /* $NetBSD: kern_runq.c,v 1.27 2009/10/21 21:12:06 rmind Exp $ */
3 /*
4 * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.27 2009/10/21 21:12:06 rmind Exp $");
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/bitops.h>
35 #include <sys/cpu.h>
36 #include <sys/idle.h>
37 #include <sys/intr.h>
38 #include <sys/kmem.h>
39 #include <sys/lwp.h>
40 #include <sys/mutex.h>
41 #include <sys/proc.h>
42 #include <sys/sched.h>
43 #include <sys/syscallargs.h>
44 #include <sys/sysctl.h>
45 #include <sys/systm.h>
46 #include <sys/types.h>
47 #include <sys/evcnt.h>
50 * Priority related defintions.
52 #define PRI_TS_COUNT (NPRI_USER)
53 #define PRI_RT_COUNT (PRI_COUNT - PRI_TS_COUNT)
54 #define PRI_HTS_RANGE (PRI_TS_COUNT / 10)
56 #define PRI_HIGHEST_TS (MAXPRI_USER)
59 * Bits per map.
61 #define BITMAP_BITS (32)
62 #define BITMAP_SHIFT (5)
63 #define BITMAP_MSB (0x80000000U)
64 #define BITMAP_MASK (BITMAP_BITS - 1)
67 * Structures, runqueue.
70 const int schedppq = 1;
72 typedef struct {
73 TAILQ_HEAD(, lwp) q_head;
74 } queue_t;
76 typedef struct {
77 /* Bitmap */
78 uint32_t r_bitmap[PRI_COUNT >> BITMAP_SHIFT];
79 /* Counters */
80 u_int r_count; /* Count of the threads */
81 u_int r_avgcount; /* Average count of threads */
82 u_int r_mcount; /* Count of migratable threads */
83 /* Runqueues */
84 queue_t r_rt_queue[PRI_RT_COUNT];
85 queue_t r_ts_queue[PRI_TS_COUNT];
86 /* Event counters */
87 struct evcnt r_ev_pull;
88 struct evcnt r_ev_push;
89 struct evcnt r_ev_stay;
90 struct evcnt r_ev_localize;
91 } runqueue_t;
93 static void * sched_getrq(runqueue_t *, const pri_t);
94 #ifdef MULTIPROCESSOR
95 static lwp_t * sched_catchlwp(struct cpu_info *);
96 static void sched_balance(void *);
97 #endif
100 * Preemption control.
102 int sched_upreempt_pri = PRI_KERNEL;
103 #ifdef __HAVE_PREEMPTION
104 # ifdef DEBUG
105 int sched_kpreempt_pri = 0;
106 # else
107 int sched_kpreempt_pri = PRI_USER_RT;
108 # endif
109 #else
110 int sched_kpreempt_pri = 1000;
111 #endif
114 * Migration and balancing.
116 static u_int cacheht_time; /* Cache hotness time */
117 static u_int min_catch; /* Minimal LWP count for catching */
118 static u_int balance_period; /* Balance period */
119 static struct cpu_info *worker_ci; /* Victim CPU */
120 #ifdef MULTIPROCESSOR
121 static struct callout balance_ch; /* Callout of balancer */
122 #endif
124 void
125 runq_init(void)
128 /* Balancing */
129 worker_ci = curcpu();
130 cacheht_time = mstohz(3); /* ~3 ms */
131 balance_period = mstohz(300); /* ~300 ms */
133 /* Minimal count of LWPs for catching */
134 min_catch = 1;
136 /* Initialize balancing callout and run it */
137 #ifdef MULTIPROCESSOR
138 callout_init(&balance_ch, CALLOUT_MPSAFE);
139 callout_setfunc(&balance_ch, sched_balance, NULL);
140 callout_schedule(&balance_ch, balance_period);
141 #endif
144 void
145 sched_cpuattach(struct cpu_info *ci)
147 runqueue_t *ci_rq;
148 void *rq_ptr;
149 u_int i, size;
150 char *cpuname;
152 if (ci->ci_schedstate.spc_lwplock == NULL) {
153 ci->ci_schedstate.spc_lwplock =
154 mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
156 if (ci == lwp0.l_cpu) {
157 /* Initialize the scheduler structure of the primary LWP */
158 lwp0.l_mutex = ci->ci_schedstate.spc_lwplock;
160 if (ci->ci_schedstate.spc_mutex != NULL) {
161 /* Already initialized. */
162 return;
165 /* Allocate the run queue */
166 size = roundup2(sizeof(runqueue_t), coherency_unit) + coherency_unit;
167 rq_ptr = kmem_zalloc(size, KM_SLEEP);
168 if (rq_ptr == NULL) {
169 panic("sched_cpuattach: could not allocate the runqueue");
171 ci_rq = (void *)(roundup2((uintptr_t)(rq_ptr), coherency_unit));
173 /* Initialize run queues */
174 ci->ci_schedstate.spc_mutex =
175 mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
176 for (i = 0; i < PRI_RT_COUNT; i++)
177 TAILQ_INIT(&ci_rq->r_rt_queue[i].q_head);
178 for (i = 0; i < PRI_TS_COUNT; i++)
179 TAILQ_INIT(&ci_rq->r_ts_queue[i].q_head);
181 ci->ci_schedstate.spc_sched_info = ci_rq;
183 cpuname = kmem_alloc(8, KM_SLEEP);
184 snprintf(cpuname, 8, "cpu%d", cpu_index(ci));
186 evcnt_attach_dynamic(&ci_rq->r_ev_pull, EVCNT_TYPE_MISC, NULL,
187 cpuname, "runqueue pull");
188 evcnt_attach_dynamic(&ci_rq->r_ev_push, EVCNT_TYPE_MISC, NULL,
189 cpuname, "runqueue push");
190 evcnt_attach_dynamic(&ci_rq->r_ev_stay, EVCNT_TYPE_MISC, NULL,
191 cpuname, "runqueue stay");
192 evcnt_attach_dynamic(&ci_rq->r_ev_localize, EVCNT_TYPE_MISC, NULL,
193 cpuname, "runqueue localize");
197 * Control of the runqueue.
200 static inline void *
201 sched_getrq(runqueue_t *ci_rq, const pri_t prio)
204 KASSERT(prio < PRI_COUNT);
205 return (prio <= PRI_HIGHEST_TS) ?
206 &ci_rq->r_ts_queue[prio].q_head :
207 &ci_rq->r_rt_queue[prio - PRI_HIGHEST_TS - 1].q_head;
210 void
211 sched_enqueue(struct lwp *l, bool swtch)
213 runqueue_t *ci_rq;
214 struct schedstate_percpu *spc;
215 TAILQ_HEAD(, lwp) *q_head;
216 const pri_t eprio = lwp_eprio(l);
217 struct cpu_info *ci;
218 int type;
220 ci = l->l_cpu;
221 spc = &ci->ci_schedstate;
222 ci_rq = spc->spc_sched_info;
223 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
225 /* Update the last run time on switch */
226 if (__predict_true(swtch == true))
227 l->l_rticksum += (hardclock_ticks - l->l_rticks);
228 else if (l->l_rticks == 0)
229 l->l_rticks = hardclock_ticks;
231 /* Enqueue the thread */
232 q_head = sched_getrq(ci_rq, eprio);
233 if (TAILQ_EMPTY(q_head)) {
234 u_int i;
235 uint32_t q;
237 /* Mark bit */
238 i = eprio >> BITMAP_SHIFT;
239 q = BITMAP_MSB >> (eprio & BITMAP_MASK);
240 KASSERT((ci_rq->r_bitmap[i] & q) == 0);
241 ci_rq->r_bitmap[i] |= q;
243 TAILQ_INSERT_TAIL(q_head, l, l_runq);
244 ci_rq->r_count++;
245 if ((l->l_pflag & LP_BOUND) == 0)
246 ci_rq->r_mcount++;
249 * Update the value of highest priority in the runqueue,
250 * if priority of this thread is higher.
252 if (eprio > spc->spc_maxpriority)
253 spc->spc_maxpriority = eprio;
255 sched_newts(l);
258 * Wake the chosen CPU or cause a preemption if the newly
259 * enqueued thread has higher priority. Don't cause a
260 * preemption if the thread is yielding (swtch).
262 if (!swtch && eprio > spc->spc_curpriority) {
263 if (eprio >= sched_kpreempt_pri)
264 type = RESCHED_KPREEMPT;
265 else if (eprio >= sched_upreempt_pri)
266 type = RESCHED_IMMED;
267 else
268 type = RESCHED_LAZY;
269 cpu_need_resched(ci, type);
273 void
274 sched_dequeue(struct lwp *l)
276 runqueue_t *ci_rq;
277 TAILQ_HEAD(, lwp) *q_head;
278 struct schedstate_percpu *spc;
279 const pri_t eprio = lwp_eprio(l);
281 spc = & l->l_cpu->ci_schedstate;
282 ci_rq = spc->spc_sched_info;
283 KASSERT(lwp_locked(l, spc->spc_mutex));
285 KASSERT(eprio <= spc->spc_maxpriority);
286 KASSERT(ci_rq->r_bitmap[eprio >> BITMAP_SHIFT] != 0);
287 KASSERT(ci_rq->r_count > 0);
289 if (spc->spc_migrating == l)
290 spc->spc_migrating = NULL;
292 ci_rq->r_count--;
293 if ((l->l_pflag & LP_BOUND) == 0)
294 ci_rq->r_mcount--;
296 q_head = sched_getrq(ci_rq, eprio);
297 TAILQ_REMOVE(q_head, l, l_runq);
298 if (TAILQ_EMPTY(q_head)) {
299 u_int i;
300 uint32_t q;
302 /* Unmark bit */
303 i = eprio >> BITMAP_SHIFT;
304 q = BITMAP_MSB >> (eprio & BITMAP_MASK);
305 KASSERT((ci_rq->r_bitmap[i] & q) != 0);
306 ci_rq->r_bitmap[i] &= ~q;
309 * Update the value of highest priority in the runqueue, in a
310 * case it was a last thread in the queue of highest priority.
312 if (eprio != spc->spc_maxpriority)
313 return;
315 do {
316 if (ci_rq->r_bitmap[i] != 0) {
317 q = ffs(ci_rq->r_bitmap[i]);
318 spc->spc_maxpriority =
319 (i << BITMAP_SHIFT) + (BITMAP_BITS - q);
320 return;
322 } while (i--);
324 /* If not found - set the lowest value */
325 spc->spc_maxpriority = 0;
330 * Migration and balancing.
333 #ifdef MULTIPROCESSOR
335 /* Estimate if LWP is cache-hot */
336 static inline bool
337 lwp_cache_hot(const struct lwp *l)
340 if (__predict_false(l->l_slptime || l->l_rticks == 0))
341 return false;
343 return (hardclock_ticks - l->l_rticks <= cacheht_time);
346 /* Check if LWP can migrate to the chosen CPU */
347 static inline bool
348 sched_migratable(const struct lwp *l, struct cpu_info *ci)
350 const struct schedstate_percpu *spc = &ci->ci_schedstate;
351 KASSERT(lwp_locked(__UNCONST(l), NULL));
353 /* CPU is offline */
354 if (__predict_false(spc->spc_flags & SPCF_OFFLINE))
355 return false;
357 /* Affinity bind */
358 if (__predict_false(l->l_flag & LW_AFFINITY))
359 return kcpuset_isset(cpu_index(ci), l->l_affinity);
361 /* Processor-set */
362 return (spc->spc_psid == l->l_psid);
366 * Estimate the migration of LWP to the other CPU.
367 * Take and return the CPU, if migration is needed.
369 struct cpu_info *
370 sched_takecpu(struct lwp *l)
372 struct cpu_info *ci, *tci, *first, *next;
373 struct schedstate_percpu *spc;
374 runqueue_t *ci_rq, *ici_rq;
375 pri_t eprio, lpri, pri;
377 KASSERT(lwp_locked(l, NULL));
379 /* If thread is strictly bound, do not estimate other CPUs */
380 ci = l->l_cpu;
381 if (l->l_pflag & LP_BOUND)
382 return ci;
384 spc = &ci->ci_schedstate;
385 ci_rq = spc->spc_sched_info;
387 /* Make sure that thread is in appropriate processor-set */
388 if (__predict_true(spc->spc_psid == l->l_psid)) {
389 /* If CPU of this thread is idling - run there */
390 if (ci_rq->r_count == 0) {
391 ci_rq->r_ev_stay.ev_count++;
392 return ci;
394 /* Stay if thread is cache-hot */
395 eprio = lwp_eprio(l);
396 if (__predict_true(l->l_stat != LSIDL) &&
397 lwp_cache_hot(l) && eprio >= spc->spc_curpriority) {
398 ci_rq->r_ev_stay.ev_count++;
399 return ci;
401 } else {
402 eprio = lwp_eprio(l);
405 /* Run on current CPU if priority of thread is higher */
406 ci = curcpu();
407 spc = &ci->ci_schedstate;
408 if (eprio > spc->spc_curpriority && sched_migratable(l, ci)) {
409 ci_rq = spc->spc_sched_info;
410 ci_rq->r_ev_localize.ev_count++;
411 return ci;
415 * Look for the CPU with the lowest priority thread. In case of
416 * equal priority, choose the CPU with the fewest of threads.
418 first = l->l_cpu;
419 ci = first;
420 tci = first;
421 lpri = PRI_COUNT;
422 do {
423 next = CIRCLEQ_LOOP_NEXT(&cpu_queue, ci, ci_data.cpu_qchain);
424 spc = &ci->ci_schedstate;
425 ici_rq = spc->spc_sched_info;
426 pri = max(spc->spc_curpriority, spc->spc_maxpriority);
427 if (pri > lpri)
428 continue;
430 if (pri == lpri && ci_rq->r_count < ici_rq->r_count)
431 continue;
433 if (!sched_migratable(l, ci))
434 continue;
436 lpri = pri;
437 tci = ci;
438 ci_rq = ici_rq;
439 } while (ci = next, ci != first);
441 ci_rq = tci->ci_schedstate.spc_sched_info;
442 ci_rq->r_ev_push.ev_count++;
444 return tci;
448 * Tries to catch an LWP from the runqueue of other CPU.
450 static struct lwp *
451 sched_catchlwp(struct cpu_info *ci)
453 struct cpu_info *curci = curcpu();
454 struct schedstate_percpu *spc, *curspc;
455 TAILQ_HEAD(, lwp) *q_head;
456 runqueue_t *ci_rq;
457 struct lwp *l;
459 curspc = &curci->ci_schedstate;
460 spc = &ci->ci_schedstate;
461 KASSERT(curspc->spc_psid == spc->spc_psid);
463 ci_rq = spc->spc_sched_info;
464 if (ci_rq->r_mcount < min_catch) {
465 spc_unlock(ci);
466 return NULL;
469 /* Take the highest priority thread */
470 q_head = sched_getrq(ci_rq, spc->spc_maxpriority);
471 l = TAILQ_FIRST(q_head);
473 for (;;) {
474 /* Check the first and next result from the queue */
475 if (l == NULL) {
476 break;
478 KASSERT(l->l_stat == LSRUN);
480 /* Look for threads, whose are allowed to migrate */
481 if ((l->l_pflag & LP_BOUND) || lwp_cache_hot(l) ||
482 !sched_migratable(l, curci)) {
483 l = TAILQ_NEXT(l, l_runq);
484 continue;
487 /* Grab the thread, and move to the local run queue */
488 sched_dequeue(l);
491 * If LWP is still context switching, we may need to
492 * spin-wait before changing its CPU.
494 if (__predict_false(l->l_ctxswtch != 0)) {
495 u_int count;
496 count = SPINLOCK_BACKOFF_MIN;
497 while (l->l_ctxswtch)
498 SPINLOCK_BACKOFF(count);
500 l->l_cpu = curci;
501 ci_rq->r_ev_pull.ev_count++;
502 lwp_unlock_to(l, curspc->spc_mutex);
503 sched_enqueue(l, false);
504 return l;
506 spc_unlock(ci);
508 return l;
512 * Periodical calculations for balancing.
514 static void
515 sched_balance(void *nocallout)
517 struct cpu_info *ci, *hci;
518 runqueue_t *ci_rq;
519 CPU_INFO_ITERATOR cii;
520 u_int highest;
522 hci = curcpu();
523 highest = 0;
525 /* Make lockless countings */
526 for (CPU_INFO_FOREACH(cii, ci)) {
527 ci_rq = ci->ci_schedstate.spc_sched_info;
529 /* Average count of the threads */
530 ci_rq->r_avgcount = (ci_rq->r_avgcount + ci_rq->r_mcount) >> 1;
532 /* Look for CPU with the highest average */
533 if (ci_rq->r_avgcount > highest) {
534 hci = ci;
535 highest = ci_rq->r_avgcount;
539 /* Update the worker */
540 worker_ci = hci;
542 if (nocallout == NULL)
543 callout_schedule(&balance_ch, balance_period);
547 * Called from each CPU's idle loop.
549 void
550 sched_idle(void)
552 struct cpu_info *ci = curcpu(), *tci = NULL;
553 struct schedstate_percpu *spc, *tspc;
554 runqueue_t *ci_rq;
555 bool dlock = false;
557 /* Check if there is a migrating LWP */
558 spc = &ci->ci_schedstate;
559 if (spc->spc_migrating == NULL)
560 goto no_migration;
562 spc_lock(ci);
563 for (;;) {
564 struct lwp *l;
566 l = spc->spc_migrating;
567 if (l == NULL)
568 break;
571 * If second attempt, and target CPU has changed,
572 * drop the old lock.
574 if (dlock == true && tci != l->l_target_cpu) {
575 KASSERT(tci != NULL);
576 spc_unlock(tci);
577 dlock = false;
581 * Nothing to do if destination has changed to the
582 * local CPU, or migration was done by other CPU.
584 tci = l->l_target_cpu;
585 if (tci == NULL || tci == ci) {
586 spc->spc_migrating = NULL;
587 l->l_target_cpu = NULL;
588 break;
590 tspc = &tci->ci_schedstate;
593 * Double-lock the runqueues.
594 * We do that only once.
596 if (dlock == false) {
597 dlock = true;
598 if (ci < tci) {
599 spc_lock(tci);
600 } else if (!mutex_tryenter(tspc->spc_mutex)) {
601 spc_unlock(ci);
602 spc_lock(tci);
603 spc_lock(ci);
604 /* Check the situation again.. */
605 continue;
609 /* Migrate the thread */
610 KASSERT(l->l_stat == LSRUN);
611 spc->spc_migrating = NULL;
612 l->l_target_cpu = NULL;
613 sched_dequeue(l);
614 l->l_cpu = tci;
615 lwp_setlock(l, tspc->spc_mutex);
616 sched_enqueue(l, false);
617 break;
619 if (dlock == true) {
620 KASSERT(tci != NULL);
621 spc_unlock(tci);
623 spc_unlock(ci);
625 no_migration:
626 ci_rq = spc->spc_sched_info;
627 if ((spc->spc_flags & SPCF_OFFLINE) != 0 || ci_rq->r_count != 0) {
628 return;
631 /* Reset the counter, and call the balancer */
632 ci_rq->r_avgcount = 0;
633 sched_balance(ci);
634 tci = worker_ci;
635 tspc = &tci->ci_schedstate;
636 if (ci == tci || spc->spc_psid != tspc->spc_psid)
637 return;
638 spc_dlock(ci, tci);
639 (void)sched_catchlwp(tci);
640 spc_unlock(ci);
643 #else
645 struct cpu_info *
646 sched_takecpu(struct lwp *l)
649 return l->l_cpu;
652 void
653 sched_idle(void)
657 #endif /* MULTIPROCESSOR */
660 * Scheduling statistics and balancing.
662 void
663 sched_lwp_stats(struct lwp *l)
665 int batch;
667 KASSERT(lwp_locked(l, NULL));
669 /* Update sleep time */
670 if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
671 l->l_stat == LSSUSPENDED)
672 l->l_slptime++;
675 * Set that thread is more CPU-bound, if sum of run time exceeds the
676 * sum of sleep time. Check if thread is CPU-bound a first time.
678 batch = (l->l_rticksum > l->l_slpticksum);
679 if (batch != 0) {
680 if ((l->l_flag & LW_BATCH) == 0)
681 batch = 0;
682 l->l_flag |= LW_BATCH;
683 } else
684 l->l_flag &= ~LW_BATCH;
687 * If thread is CPU-bound and never sleeps, it would occupy the CPU.
688 * In such case reset the value of last sleep, and check it later, if
689 * it is still zero - perform the migration, unmark the batch flag.
691 if (batch && (l->l_slptime + l->l_slpticksum) == 0) {
692 if (l->l_slpticks == 0) {
693 if (l->l_target_cpu == NULL &&
694 (l->l_stat == LSRUN || l->l_stat == LSONPROC)) {
695 struct cpu_info *ci = sched_takecpu(l);
696 l->l_target_cpu = (ci != l->l_cpu) ? ci : NULL;
698 l->l_flag &= ~LW_BATCH;
699 } else {
700 l->l_slpticks = 0;
704 /* Reset the time sums */
705 l->l_slpticksum = 0;
706 l->l_rticksum = 0;
708 /* Scheduler-specific hook */
709 sched_pstats_hook(l, batch);
713 * Scheduler mill.
715 struct lwp *
716 sched_nextlwp(void)
718 struct cpu_info *ci = curcpu();
719 struct schedstate_percpu *spc;
720 TAILQ_HEAD(, lwp) *q_head;
721 runqueue_t *ci_rq;
722 struct lwp *l;
724 /* Return to idle LWP if there is a migrating thread */
725 spc = &ci->ci_schedstate;
726 if (__predict_false(spc->spc_migrating != NULL))
727 return NULL;
728 ci_rq = spc->spc_sched_info;
730 #ifdef MULTIPROCESSOR
731 /* If runqueue is empty, try to catch some thread from other CPU */
732 if (__predict_false(ci_rq->r_count == 0)) {
733 struct schedstate_percpu *cspc;
734 struct cpu_info *cci;
736 /* Offline CPUs should not perform this, however */
737 if (__predict_false(spc->spc_flags & SPCF_OFFLINE))
738 return NULL;
740 /* Reset the counter, and call the balancer */
741 ci_rq->r_avgcount = 0;
742 sched_balance(ci);
743 cci = worker_ci;
744 cspc = &cci->ci_schedstate;
745 if (ci == cci || spc->spc_psid != cspc->spc_psid ||
746 !mutex_tryenter(cci->ci_schedstate.spc_mutex))
747 return NULL;
748 return sched_catchlwp(cci);
750 #else
751 if (__predict_false(ci_rq->r_count == 0))
752 return NULL;
753 #endif
755 /* Take the highest priority thread */
756 KASSERT(ci_rq->r_bitmap[spc->spc_maxpriority >> BITMAP_SHIFT]);
757 q_head = sched_getrq(ci_rq, spc->spc_maxpriority);
758 l = TAILQ_FIRST(q_head);
759 KASSERT(l != NULL);
761 sched_oncpu(l);
762 l->l_rticks = hardclock_ticks;
764 return l;
767 bool
768 sched_curcpu_runnable_p(void)
770 const struct cpu_info *ci;
771 const struct schedstate_percpu *spc;
772 const runqueue_t *ci_rq;
773 bool rv;
775 kpreempt_disable();
776 ci = curcpu();
777 spc = &ci->ci_schedstate;
778 ci_rq = spc->spc_sched_info;
780 #ifndef __HAVE_FAST_SOFTINTS
781 if (ci->ci_data.cpu_softints) {
782 kpreempt_enable();
783 return true;
785 #endif
787 rv = (ci_rq->r_count != 0) ? true : false;
788 kpreempt_enable();
790 return rv;
794 * Sysctl nodes and initialization.
797 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
799 const struct sysctlnode *node = NULL;
801 sysctl_createv(clog, 0, NULL, NULL,
802 CTLFLAG_PERMANENT,
803 CTLTYPE_NODE, "kern", NULL,
804 NULL, 0, NULL, 0,
805 CTL_KERN, CTL_EOL);
806 sysctl_createv(clog, 0, NULL, &node,
807 CTLFLAG_PERMANENT,
808 CTLTYPE_NODE, "sched",
809 SYSCTL_DESCR("Scheduler options"),
810 NULL, 0, NULL, 0,
811 CTL_KERN, CTL_CREATE, CTL_EOL);
813 if (node == NULL)
814 return;
816 sysctl_createv(clog, 0, &node, NULL,
817 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
818 CTLTYPE_INT, "cacheht_time",
819 SYSCTL_DESCR("Cache hotness time (in ticks)"),
820 NULL, 0, &cacheht_time, 0,
821 CTL_CREATE, CTL_EOL);
822 sysctl_createv(clog, 0, &node, NULL,
823 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
824 CTLTYPE_INT, "balance_period",
825 SYSCTL_DESCR("Balance period (in ticks)"),
826 NULL, 0, &balance_period, 0,
827 CTL_CREATE, CTL_EOL);
828 sysctl_createv(clog, 0, &node, NULL,
829 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
830 CTLTYPE_INT, "min_catch",
831 SYSCTL_DESCR("Minimal count of threads for catching"),
832 NULL, 0, &min_catch, 0,
833 CTL_CREATE, CTL_EOL);
834 sysctl_createv(clog, 0, &node, NULL,
835 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
836 CTLTYPE_INT, "timesoftints",
837 SYSCTL_DESCR("Track CPU time for soft interrupts"),
838 NULL, 0, &softint_timing, 0,
839 CTL_CREATE, CTL_EOL);
840 sysctl_createv(clog, 0, &node, NULL,
841 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
842 CTLTYPE_INT, "kpreempt_pri",
843 SYSCTL_DESCR("Minimum priority to trigger kernel preemption"),
844 NULL, 0, &sched_kpreempt_pri, 0,
845 CTL_CREATE, CTL_EOL);
846 sysctl_createv(clog, 0, &node, NULL,
847 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
848 CTLTYPE_INT, "upreempt_pri",
849 SYSCTL_DESCR("Minimum priority to trigger user preemption"),
850 NULL, 0, &sched_upreempt_pri, 0,
851 CTL_CREATE, CTL_EOL);
855 * Debugging.
858 #ifdef DDB
860 void
861 sched_print_runqueue(void (*pr)(const char *, ...)
862 __attribute__((__format__(__printf__,1,2))))
864 runqueue_t *ci_rq;
865 struct cpu_info *ci, *tci;
866 struct schedstate_percpu *spc;
867 struct lwp *l;
868 struct proc *p;
869 CPU_INFO_ITERATOR cii;
871 for (CPU_INFO_FOREACH(cii, ci)) {
872 int i;
874 spc = &ci->ci_schedstate;
875 ci_rq = spc->spc_sched_info;
877 (*pr)("Run-queue (CPU = %u):\n", ci->ci_index);
878 (*pr)(" pid.lid = %d.%d, r_count = %u, r_avgcount = %u, "
879 "maxpri = %d, mlwp = %p\n",
880 #ifdef MULTIPROCESSOR
881 ci->ci_curlwp->l_proc->p_pid, ci->ci_curlwp->l_lid,
882 #else
883 curlwp->l_proc->p_pid, curlwp->l_lid,
884 #endif
885 ci_rq->r_count, ci_rq->r_avgcount, spc->spc_maxpriority,
886 spc->spc_migrating);
887 i = (PRI_COUNT >> BITMAP_SHIFT) - 1;
888 do {
889 uint32_t q;
890 q = ci_rq->r_bitmap[i];
891 (*pr)(" bitmap[%d] => [ %d (0x%x) ]\n", i, ffs(q), q);
892 } while (i--);
895 (*pr)(" %5s %4s %4s %10s %3s %18s %4s %4s %s\n",
896 "LID", "PRI", "EPRI", "FL", "ST", "LWP", "CPU", "TCI", "LRTICKS");
898 PROCLIST_FOREACH(p, &allproc) {
899 if ((p->p_flag & PK_MARKER) != 0)
900 continue;
901 (*pr)(" /- %d (%s)\n", (int)p->p_pid, p->p_comm);
902 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
903 ci = l->l_cpu;
904 tci = l->l_target_cpu;
905 (*pr)(" | %5d %4u %4u 0x%8.8x %3s %18p %4u %4d %u\n",
906 (int)l->l_lid, l->l_priority, lwp_eprio(l),
907 l->l_flag, l->l_stat == LSRUN ? "RQ" :
908 (l->l_stat == LSSLEEP ? "SQ" : "-"),
909 l, ci->ci_index, (tci ? tci->ci_index : -1),
910 (u_int)(hardclock_ticks - l->l_rticks));
915 #endif