1 /* $NetBSD: kern_clock.c,v 1.125 2008/07/02 19:38:37 rmind Exp $ */
4 * Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 * This code is derived from software contributed to The NetBSD Foundation
11 * by Charles M. Hannum.
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
36 * Copyright (c) 1982, 1986, 1991, 1993
37 * The Regents of the University of California. All rights reserved.
38 * (c) UNIX System Laboratories, Inc.
39 * All or some portions of this file are derived from material licensed
40 * to the University of California by American Telephone and Telegraph
41 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
42 * the permission of UNIX System Laboratories, Inc.
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 * 3. Neither the name of the University nor the names of its contributors
53 * may be used to endorse or promote products derived from this software
54 * without specific prior written permission.
56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
71 #include <sys/cdefs.h>
72 __KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.125 2008/07/02 19:38:37 rmind Exp $");
75 #include "opt_perfctrs.h"
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/callout.h>
80 #include <sys/kernel.h>
82 #include <sys/resourcevar.h>
83 #include <sys/signalvar.h>
84 #include <sys/sysctl.h>
85 #include <sys/timex.h>
86 #include <sys/sched.h>
88 #include <sys/timetc.h>
90 #include <sys/atomic.h>
92 #include <uvm/uvm_extern.h>
99 * Clock handling routines.
101 * This code is written to operate with two timers that run independently of
102 * each other. The main clock, running hz times per second, is used to keep
103 * track of real time. The second timer handles kernel and user profiling,
104 * and does resource use estimation. If the second timer is programmable,
105 * it is randomized to avoid aliasing between the two clocks. For example,
106 * the randomization prevents an adversary from always giving up the CPU
107 * just before its quantum expires. Otherwise, it would never accumulate
108 * CPU ticks. The mean frequency of the second timer is stathz.
110 * If no second timer exists, stathz will be zero; in this case we drive
111 * profiling and statistics off the main clock. This WILL NOT be accurate;
112 * do not do it unless absolutely necessary.
114 * The statistics clock may (or may not) be run at a higher rate while
115 * profiling. This profile clock runs at profhz. We require that profhz
116 * be an integral multiple of stathz.
118 * If the statistics clock is running fast, it must be divided by the ratio
119 * profhz/stathz for statistics. (For profiling, every tick counts.)
128 static int hardscheddiv
; /* hard => sched divider (used if schedhz == 0) */
129 static int psdiv
; /* prof => stat divider */
130 int psratio
; /* ratio: prof / stat */
132 static u_int
get_intr_timecount(struct timecounter
*);
134 static struct timecounter intr_timecounter
= {
135 get_intr_timecount
, /* get_timecount */
137 ~0u, /* counter_mask */
139 "clockinterrupt", /* name */
140 0, /* quality - minimum implementation level for a clock */
146 get_intr_timecount(struct timecounter
*tc
)
149 return (u_int
)hardclock_ticks
;
153 * Initialize clock frequencies and start both clocks running.
161 * Set divisors to 1 (normal case) and let the machine-specific
166 * provide minimum default time counter
167 * will only run at interrupt resolution
169 intr_timecounter
.tc_frequency
= hz
;
170 tc_init(&intr_timecounter
);
174 * Compute profhz and stathz, fix profhz if needed.
176 i
= stathz
? stathz
: hz
;
179 psratio
= profhz
/ i
;
182 hardscheddiv
= hz
/ 16;
183 if (hardscheddiv
<= 0)
184 panic("hardscheddiv");
190 * The real-time timer, interrupting hz times per second.
193 hardclock(struct clockframe
*frame
)
199 l
= ci
->ci_data
.cpu_onproc
;
201 timer_tick(l
, CLKF_USERMODE(frame
));
204 * If no separate statistics clock is available, run it from here.
209 * If no separate schedclock is provided, call it here
213 if ((int)(--ci
->ci_schedstate
.spc_schedticks
) <= 0) {
215 ci
->ci_schedstate
.spc_schedticks
= hardscheddiv
;
218 if ((--ci
->ci_schedstate
.spc_ticks
) <= 0)
221 if (CPU_IS_PRIMARY(ci
)) {
227 * Update real-time timeout queue.
233 * Start profiling on a process.
235 * Kernel profiling passes proc0 which never exits and hence
236 * keeps the profile clock running constantly.
239 startprofclock(struct proc
*p
)
242 KASSERT(mutex_owned(&p
->p_stmutex
));
244 if ((p
->p_stflag
& PST_PROFIL
) == 0) {
245 p
->p_stflag
|= PST_PROFIL
;
247 * This is only necessary if using the clock as the
250 if (++profprocs
== 1 && stathz
!= 0)
256 * Stop profiling on a process.
259 stopprofclock(struct proc
*p
)
262 KASSERT(mutex_owned(&p
->p_stmutex
));
264 if (p
->p_stflag
& PST_PROFIL
) {
265 p
->p_stflag
&= ~PST_PROFIL
;
267 * This is only necessary if using the clock as the
270 if (--profprocs
== 0 && stathz
!= 0)
275 #if defined(PERFCTRS)
277 * Independent profiling "tick" in case we're using a separate
278 * clock or profiling event source. Currently, that's just
279 * performance counters--hence the wrapper.
282 proftick(struct clockframe
*frame
)
291 l
= curcpu()->ci_data
.cpu_onproc
;
292 p
= (l
? l
->l_proc
: NULL
);
293 if (CLKF_USERMODE(frame
)) {
294 mutex_spin_enter(&p
->p_stmutex
);
295 if (p
->p_stflag
& PST_PROFIL
)
296 addupc_intr(l
, CLKF_PC(frame
));
297 mutex_spin_exit(&p
->p_stmutex
);
301 if (g
->state
== GMON_PROF_ON
) {
302 i
= CLKF_PC(frame
) - g
->lowpc
;
303 if (i
< g
->textsize
) {
304 i
/= HISTFRACTION
* sizeof(*g
->kcount
);
310 if (p
!= NULL
&& (p
->p_stflag
& PST_PROFIL
) != 0)
311 addupc_intr(l
, LWP_PC(l
));
318 schedclock(struct lwp
*l
)
324 /* Accumulate syscall and context switch counts. */
325 atomic_add_int((unsigned *)&uvmexp
.swtch
, ci
->ci_data
.cpu_nswtch
);
326 ci
->ci_data
.cpu_nswtch
= 0;
327 atomic_add_int((unsigned *)&uvmexp
.syscalls
, ci
->ci_data
.cpu_nsyscall
);
328 ci
->ci_data
.cpu_nsyscall
= 0;
329 atomic_add_int((unsigned *)&uvmexp
.traps
, ci
->ci_data
.cpu_ntrap
);
330 ci
->ci_data
.cpu_ntrap
= 0;
332 if ((l
->l_flag
& LW_IDLE
) != 0)
339 * Statistics clock. Grab profile sample, and if divider reaches 0,
340 * do process and kernel statistics.
343 statclock(struct clockframe
*frame
)
349 struct cpu_info
*ci
= curcpu();
350 struct schedstate_percpu
*spc
= &ci
->ci_schedstate
;
355 * Notice changes in divisor frequency, and adjust clock
356 * frequency accordingly.
358 if (spc
->spc_psdiv
!= psdiv
) {
359 spc
->spc_psdiv
= psdiv
;
360 spc
->spc_pscnt
= psdiv
;
362 setstatclockrate(stathz
);
364 setstatclockrate(profhz
);
367 l
= ci
->ci_data
.cpu_onproc
;
368 if ((l
->l_flag
& LW_IDLE
) != 0) {
370 * don't account idle lwps as swapper.
375 mutex_spin_enter(&p
->p_stmutex
);
378 if (CLKF_USERMODE(frame
)) {
379 if ((p
->p_stflag
& PST_PROFIL
) && profsrc
== PROFSRC_CLOCK
)
380 addupc_intr(l
, CLKF_PC(frame
));
381 if (--spc
->spc_pscnt
> 0) {
382 mutex_spin_exit(&p
->p_stmutex
);
387 * Came from user mode; CPU was in user state.
388 * If this process is being profiled record the tick.
391 if (p
->p_nice
> NZERO
)
392 spc
->spc_cp_time
[CP_NICE
]++;
394 spc
->spc_cp_time
[CP_USER
]++;
398 * Kernel statistics are just like addupc_intr, only easier.
401 if (profsrc
== PROFSRC_CLOCK
&& g
->state
== GMON_PROF_ON
) {
402 i
= CLKF_PC(frame
) - g
->lowpc
;
403 if (i
< g
->textsize
) {
404 i
/= HISTFRACTION
* sizeof(*g
->kcount
);
410 if (p
!= NULL
&& profsrc
== PROFSRC_CLOCK
&&
411 (p
->p_stflag
& PST_PROFIL
)) {
412 addupc_intr(l
, LWP_PC(l
));
415 if (--spc
->spc_pscnt
> 0) {
417 mutex_spin_exit(&p
->p_stmutex
);
421 * Came from kernel mode, so we were:
422 * - handling an interrupt,
423 * - doing syscall or trap work on behalf of the current
425 * - spinning in the idle loop.
426 * Whichever it is, charge the time as appropriate.
427 * Note that we charge interrupts to the current process,
428 * regardless of whether they are ``for'' that process,
429 * so that we know how much of its real time was spent
430 * in ``non-process'' (i.e., interrupt) work.
432 if (CLKF_INTR(frame
) || (curlwp
->l_pflag
& LP_INTR
) != 0) {
436 spc
->spc_cp_time
[CP_INTR
]++;
437 } else if (p
!= NULL
) {
439 spc
->spc_cp_time
[CP_SYS
]++;
441 spc
->spc_cp_time
[CP_IDLE
]++;
444 spc
->spc_pscnt
= psdiv
;
447 atomic_inc_uint(&l
->l_cpticks
);
448 mutex_spin_exit(&p
->p_stmutex
);