2 * Copyright (c) 1982, 1986, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * @(#)subr_prof.c 8.3 (Berkeley) 9/23/93
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/sysproto.h>
38 #include <sys/kernel.h>
40 #include <sys/mutex.h>
42 #include <sys/resourcevar.h>
43 #include <sys/sysctl.h>
45 #include <machine/cpu.h>
48 #include <sys/malloc.h>
52 static MALLOC_DEFINE(M_GPROF
, "gprof", "kernel profiling buffer");
54 static void kmstartup(void *);
55 SYSINIT(kmem
, SI_SUB_KPROF
, SI_ORDER_FIRST
, kmstartup
, NULL
);
57 struct gmonparam _gmonparam
= { GMON_PROF_OFF
};
61 nullfunc_loop_profiled()
65 for (i
= 0; i
< CALIB_SCALE
; i
++)
69 #define nullfunc_loop_profiled_end nullfunc_profiled /* XXX */
78 * Update the histograms to support extending the text region arbitrarily.
79 * This is done slightly naively (no sparse regions), so will waste slight
80 * amounts of memory, but will overall work nicely enough to allow profiling
84 kmupetext(uintfptr_t nhighpc
)
86 struct gmonparam np
; /* slightly large */
87 struct gmonparam
*p
= &_gmonparam
;
91 bcopy(p
, &np
, sizeof(*p
));
92 np
.highpc
= ROUNDUP(nhighpc
, HISTFRACTION
* sizeof(HISTCOUNTER
));
93 if (np
.highpc
<= p
->highpc
)
95 np
.textsize
= np
.highpc
- p
->lowpc
;
96 np
.kcountsize
= np
.textsize
/ HISTFRACTION
;
97 np
.hashfraction
= HASHFRACTION
;
98 np
.fromssize
= np
.textsize
/ HASHFRACTION
;
99 np
.tolimit
= np
.textsize
* ARCDENSITY
/ 100;
100 if (np
.tolimit
< MINARCS
)
101 np
.tolimit
= MINARCS
;
102 else if (np
.tolimit
> MAXARCS
)
103 np
.tolimit
= MAXARCS
;
104 np
.tossize
= np
.tolimit
* sizeof(struct tostruct
);
105 cp
= malloc(np
.kcountsize
+ np
.fromssize
+ np
.tossize
,
108 * Check for something else extending highpc while we slept.
110 if (np
.highpc
<= p
->highpc
) {
114 np
.tos
= (struct tostruct
*)cp
;
116 np
.kcount
= (HISTCOUNTER
*)cp
;
118 np
.froms
= (u_short
*)cp
;
120 /* Reinitialize pointers to overhead counters. */
121 np
.cputime_count
= &KCOUNT(&np
, PC_TO_I(&np
, cputime
));
122 np
.mcount_count
= &KCOUNT(&np
, PC_TO_I(&np
, mcount
));
123 np
.mexitcount_count
= &KCOUNT(&np
, PC_TO_I(&np
, mexitcount
));
126 bcopy(p
->tos
, np
.tos
, p
->tossize
);
127 bzero((char *)np
.tos
+ p
->tossize
, np
.tossize
- p
->tossize
);
128 bcopy(p
->kcount
, np
.kcount
, p
->kcountsize
);
129 bzero((char *)np
.kcount
+ p
->kcountsize
, np
.kcountsize
-
131 bcopy(p
->froms
, np
.froms
, p
->fromssize
);
132 bzero((char *)np
.froms
+ p
->fromssize
, np
.fromssize
- p
->fromssize
);
134 bcopy(&np
, p
, sizeof(*p
));
144 struct gmonparam
*p
= &_gmonparam
;
146 int cputime_overhead
;
150 int mexitcount_overhead
;
151 int nullfunc_loop_overhead
;
152 int nullfunc_loop_profiled_time
;
157 * Round lowpc and highpc to multiples of the density we're using
158 * so the rest of the scaling (here and in gprof) stays in ints.
160 p
->lowpc
= ROUNDDOWN((u_long
)btext
, HISTFRACTION
* sizeof(HISTCOUNTER
));
161 p
->highpc
= ROUNDUP((u_long
)etext
, HISTFRACTION
* sizeof(HISTCOUNTER
));
162 p
->textsize
= p
->highpc
- p
->lowpc
;
163 printf("Profiling kernel, textsize=%lu [%jx..%jx]\n",
164 p
->textsize
, (uintmax_t)p
->lowpc
, (uintmax_t)p
->highpc
);
165 p
->kcountsize
= p
->textsize
/ HISTFRACTION
;
166 p
->hashfraction
= HASHFRACTION
;
167 p
->fromssize
= p
->textsize
/ HASHFRACTION
;
168 p
->tolimit
= p
->textsize
* ARCDENSITY
/ 100;
169 if (p
->tolimit
< MINARCS
)
170 p
->tolimit
= MINARCS
;
171 else if (p
->tolimit
> MAXARCS
)
172 p
->tolimit
= MAXARCS
;
173 p
->tossize
= p
->tolimit
* sizeof(struct tostruct
);
174 cp
= (char *)malloc(p
->kcountsize
+ p
->fromssize
+ p
->tossize
,
175 M_GPROF
, M_WAITOK
| M_ZERO
);
176 p
->tos
= (struct tostruct
*)cp
;
178 p
->kcount
= (HISTCOUNTER
*)cp
;
180 p
->froms
= (u_short
*)cp
;
181 p
->histcounter_type
= FUNCTION_ALIGNMENT
/ HISTFRACTION
* NBBY
;
184 /* Signed counters. */
185 p
->histcounter_type
= -p
->histcounter_type
;
187 /* Initialize pointers to overhead counters. */
188 p
->cputime_count
= &KCOUNT(p
, PC_TO_I(p
, cputime
));
189 p
->mcount_count
= &KCOUNT(p
, PC_TO_I(p
, mcount
));
190 p
->mexitcount_count
= &KCOUNT(p
, PC_TO_I(p
, mexitcount
));
193 * Disable interrupts to avoid interference while we calibrate
199 * Determine overheads.
200 * XXX this needs to be repeated for each useful timer/counter.
202 cputime_overhead
= 0;
204 for (i
= 0; i
< CALIB_SCALE
; i
++)
205 cputime_overhead
+= cputime();
210 empty_loop_time
= cputime();
212 nullfunc_loop_profiled();
215 * Start profiling. There won't be any normal function calls since
216 * interrupts are disabled, but we will call the profiling routines
217 * directly to determine their overheads.
219 p
->state
= GMON_PROF_HIRES
;
222 nullfunc_loop_profiled();
225 for (i
= 0; i
< CALIB_SCALE
; i
++)
226 MCOUNT_OVERHEAD(profil
);
227 mcount_overhead
= KCOUNT(p
, PC_TO_I(p
, profil
));
230 for (i
= 0; i
< CALIB_SCALE
; i
++)
231 MEXITCOUNT_OVERHEAD();
232 MEXITCOUNT_OVERHEAD_GETLABEL(tmp_addr
);
233 mexitcount_overhead
= KCOUNT(p
, PC_TO_I(p
, tmp_addr
));
235 p
->state
= GMON_PROF_OFF
;
240 nullfunc_loop_profiled_time
= 0;
241 for (tmp_addr
= (uintfptr_t
)nullfunc_loop_profiled
;
242 tmp_addr
< (uintfptr_t
)nullfunc_loop_profiled_end
;
243 tmp_addr
+= HISTFRACTION
* sizeof(HISTCOUNTER
))
244 nullfunc_loop_profiled_time
+= KCOUNT(p
, PC_TO_I(p
, tmp_addr
));
245 #define CALIB_DOSCALE(count) (((count) + CALIB_SCALE / 3) / CALIB_SCALE)
246 #define c2n(count, freq) ((int)((count) * 1000000000LL / freq))
247 printf("cputime %d, empty_loop %d, nullfunc_loop_profiled %d, mcount %d, mexitcount %d\n",
248 CALIB_DOSCALE(c2n(cputime_overhead
, p
->profrate
)),
249 CALIB_DOSCALE(c2n(empty_loop_time
, p
->profrate
)),
250 CALIB_DOSCALE(c2n(nullfunc_loop_profiled_time
, p
->profrate
)),
251 CALIB_DOSCALE(c2n(mcount_overhead
, p
->profrate
)),
252 CALIB_DOSCALE(c2n(mexitcount_overhead
, p
->profrate
)));
253 cputime_overhead
-= empty_loop_time
;
254 mcount_overhead
-= empty_loop_time
;
255 mexitcount_overhead
-= empty_loop_time
;
258 * Profiling overheads are determined by the times between the
260 * MC1: mcount() is called
261 * MC2: cputime() (called from mcount()) latches the timer
262 * MC3: mcount() completes
263 * ME1: mexitcount() is called
264 * ME2: cputime() (called from mexitcount()) latches the timer
265 * ME3: mexitcount() completes.
266 * The times between the events vary slightly depending on instruction
267 * combination and cache misses, etc. Attempt to determine the
268 * minimum times. These can be subtracted from the profiling times
269 * without much risk of reducing the profiling times below what they
270 * would be when profiling is not configured. Abbreviate:
271 * ab = minimum time between MC1 and MC3
272 * a = minumum time between MC1 and MC2
273 * b = minimum time between MC2 and MC3
274 * cd = minimum time between ME1 and ME3
275 * c = minimum time between ME1 and ME2
276 * d = minimum time between ME2 and ME3.
277 * These satisfy the relations:
278 * ab <= mcount_overhead (just measured)
280 * cd <= mexitcount_overhead (just measured)
282 * a + d <= nullfunc_loop_profiled_time (just measured)
283 * a >= 0, b >= 0, c >= 0, d >= 0.
284 * Assume that ab and cd are equal to the minimums.
286 p
->cputime_overhead
= CALIB_DOSCALE(cputime_overhead
);
287 p
->mcount_overhead
= CALIB_DOSCALE(mcount_overhead
- cputime_overhead
);
288 p
->mexitcount_overhead
= CALIB_DOSCALE(mexitcount_overhead
290 nullfunc_loop_overhead
= nullfunc_loop_profiled_time
- empty_loop_time
;
291 p
->mexitcount_post_overhead
= CALIB_DOSCALE((mcount_overhead
292 - nullfunc_loop_overhead
)
294 p
->mexitcount_pre_overhead
= p
->mexitcount_overhead
295 + p
->cputime_overhead
296 - p
->mexitcount_post_overhead
;
297 p
->mcount_pre_overhead
= CALIB_DOSCALE(nullfunc_loop_overhead
)
298 - p
->mexitcount_post_overhead
;
299 p
->mcount_post_overhead
= p
->mcount_overhead
300 + p
->cputime_overhead
301 - p
->mcount_pre_overhead
;
303 "Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d nsec\n",
304 c2n(p
->cputime_overhead
, p
->profrate
),
305 c2n(p
->mcount_overhead
, p
->profrate
),
306 c2n(p
->mcount_pre_overhead
, p
->profrate
),
307 c2n(p
->mcount_post_overhead
, p
->profrate
),
308 c2n(p
->cputime_overhead
, p
->profrate
),
309 c2n(p
->mexitcount_overhead
, p
->profrate
),
310 c2n(p
->mexitcount_pre_overhead
, p
->profrate
),
311 c2n(p
->mexitcount_post_overhead
, p
->profrate
));
313 "Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d cycles\n",
314 p
->cputime_overhead
, p
->mcount_overhead
,
315 p
->mcount_pre_overhead
, p
->mcount_post_overhead
,
316 p
->cputime_overhead
, p
->mexitcount_overhead
,
317 p
->mexitcount_pre_overhead
, p
->mexitcount_post_overhead
);
322 * Return kernel profiling information.
325 sysctl_kern_prof(SYSCTL_HANDLER_ARGS
)
327 int *name
= (int *) arg1
;
328 u_int namelen
= arg2
;
329 struct gmonparam
*gp
= &_gmonparam
;
333 /* all sysctl names at this level are terminal */
335 return (ENOTDIR
); /* overloaded */
340 error
= sysctl_handle_int(oidp
, &state
, 0, req
);
345 if (state
== GMON_PROF_OFF
) {
348 stopprofclock(&proc0
);
351 } else if (state
== GMON_PROF_ON
) {
352 gp
->state
= GMON_PROF_OFF
;
354 gp
->profrate
= profhz
;
356 startprofclock(&proc0
);
360 } else if (state
== GMON_PROF_HIRES
) {
361 gp
->state
= GMON_PROF_OFF
;
363 stopprofclock(&proc0
);
368 } else if (state
!= gp
->state
)
372 return (sysctl_handle_opaque(oidp
,
373 gp
->kcount
, gp
->kcountsize
, req
));
375 return (sysctl_handle_opaque(oidp
,
376 gp
->froms
, gp
->fromssize
, req
));
378 return (sysctl_handle_opaque(oidp
,
379 gp
->tos
, gp
->tossize
, req
));
380 case GPROF_GMONPARAM
:
381 return (sysctl_handle_opaque(oidp
, gp
, sizeof *gp
, req
));
388 SYSCTL_NODE(_kern
, KERN_PROF
, prof
, CTLFLAG_RW
, sysctl_kern_prof
, "");
392 * Profiling system call.
394 * The scale factor is a fixed point number with 16 bits of fraction, so that
395 * 1.0 is represented as 0x10000. A scale factor of 0 turns off profiling.
397 #ifndef _SYS_SYSPROTO_H_
409 register struct profil_args
*uap
;
414 if (uap
->scale
> (1 << 16))
418 if (uap
->scale
== 0) {
425 upp
= &td
->td_proc
->p_stats
->p_prof
;
427 upp
->pr_off
= uap
->offset
;
428 upp
->pr_scale
= uap
->scale
;
429 upp
->pr_base
= uap
->samples
;
430 upp
->pr_size
= uap
->size
;
439 * Scale is a fixed-point number with the binary point 16 bits
440 * into the value, and is <= 1.0. pc is at most 32 bits, so the
441 * intermediate result is at most 48 bits.
443 #define PC_TO_INDEX(pc, prof) \
444 ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
445 (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
448 * Collect user-level profiling statistics; called on a profiling tick,
449 * when a process is running in user-mode. This routine may be called
450 * from an interrupt context. We try to update the user profiling buffers
451 * cheaply with fuswintr() and suswintr(). If that fails, we revert to
452 * an AST that will vector us to trap() with a context in which copyin
453 * and copyout will work. Trap will then call addupc_task().
455 * Note that we may (rarely) not get around to the AST soon enough, and
456 * lose profile ticks when the next tick overwrites this one, but in this
457 * case the system is overloaded and the profile is probably already
461 addupc_intr(struct thread
*td
, uintfptr_t pc
, u_int ticks
)
470 prof
= &td
->td_proc
->p_stats
->p_prof
;
471 PROC_SLOCK(td
->td_proc
);
472 if (pc
< prof
->pr_off
||
473 (i
= PC_TO_INDEX(pc
, prof
)) >= prof
->pr_size
) {
474 PROC_SUNLOCK(td
->td_proc
);
475 return; /* out of range; ignore */
478 addr
= prof
->pr_base
+ i
;
479 PROC_SUNLOCK(td
->td_proc
);
480 if ((v
= fuswintr(addr
)) == -1 || suswintr(addr
, v
+ ticks
) == -1) {
481 td
->td_profil_addr
= pc
;
482 td
->td_profil_ticks
= ticks
;
483 td
->td_pflags
|= TDP_OWEUPC
;
485 td
->td_flags
|= TDF_ASTPENDING
;
491 * Much like before, but we can afford to take faults here. If the
492 * update fails, we simply turn off profiling.
495 addupc_task(struct thread
*td
, uintfptr_t pc
, u_int ticks
)
497 struct proc
*p
= td
->td_proc
;
508 if (!(p
->p_flag
& P_PROFIL
)) {
513 prof
= &p
->p_stats
->p_prof
;
515 if (pc
< prof
->pr_off
||
516 (i
= PC_TO_INDEX(pc
, prof
)) >= prof
->pr_size
) {
521 addr
= prof
->pr_base
+ i
;
524 if (copyin(addr
, &v
, sizeof(v
)) == 0) {
526 if (copyout(&v
, addr
, sizeof(v
)) == 0) {
535 if (--p
->p_profthreads
== 0) {
536 if (p
->p_flag
& P_STOPPROF
) {
537 wakeup(&p
->p_profthreads
);
546 #if (defined(__amd64__) || defined(__i386__)) && \
547 defined(__GNUCLIKE_CTOR_SECTION_HANDLING)
549 * Support for "--test-coverage --profile-arcs" in GCC.
551 * We need to call all the functions in the .ctor section, in order
552 * to get all the counter-arrays strung into a list.
554 * XXX: the .ctors call __bb_init_func which is located in over in
555 * XXX: i386/i386/support.s for historical reasons. There is probably
556 * XXX: no reason for that to be assembler anymore, but doing it right
557 * XXX: in MI C code requires one to reverse-engineer the type-selection
558 * XXX: inside GCC. Have fun.
560 * XXX: Worrisome perspective: Calling the .ctors may make C++ in the
561 * XXX: kernel feasible. Don't.
563 typedef void (*ctor_t
)(void);
564 extern ctor_t _start_ctors
, _stop_ctors
;
567 tcov_init(void *foo __unused
)
571 for (p
= &_start_ctors
; p
< &_stop_ctors
; p
++) {
577 SYSINIT(tcov_init
, SI_SUB_KPROF
, SI_ORDER_SECOND
, tcov_init
, NULL
);
580 * GCC contains magic to recognize calls to for instance execve() and
581 * puts in calls to this function to preserve the profile counters.
582 * XXX: Put zinging punchline here.
584 void __bb_fork_func(void);