4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright (c) 2011, Joyent, Inc. All rights reserved.
30 #include <sys/errno.h>
32 #include <sys/modctl.h>
34 #include <sys/systm.h>
36 #include <sys/sunddi.h>
37 #include <sys/cpuvar.h>
39 #include <sys/strsubr.h>
40 #include <sys/dtrace.h>
41 #include <sys/cyclic.h>
42 #include <sys/atomic.h>
44 static dev_info_t
*profile_devi
;
45 static dtrace_provider_id_t profile_id
;
48 * Regardless of platform, the stack frames look like this in the case of the
57 * On x86, there are five frames from the generic interrupt code; further, the
58 * interrupted instruction appears as its own stack frame, giving us a total of
61 * On SPARC, the picture is further complicated because the compiler
62 * optimizes away tail-calls -- so the following frames are optimized away:
67 * This gives three frames. However, on DEBUG kernels, the cyclic_expire
68 * frame cannot be tail-call eliminated, yielding four frames in this case.
70 * All of the above constraints lead to the mess below. Yes, the profile
71 * provider should ideally figure this out on-the-fly by hitting one of its own
72 * probes and then walking its own stack trace. This is complicated, however,
73 * and the static definition doesn't seem to be overly brittle. Still, we
74 * allow for a manual override in case we get it completely wrong.
77 #define PROF_ARTIFICIAL_FRAMES 10
81 #define PROF_ARTIFICIAL_FRAMES 4
83 #define PROF_ARTIFICIAL_FRAMES 3
88 #define PROF_NAMELEN 15
90 #define PROF_PROFILE 0
92 #define PROF_PREFIX_PROFILE "profile-"
93 #define PROF_PREFIX_TICK "tick-"
95 typedef struct profile_probe
{
96 char prof_name
[PROF_NAMELEN
];
99 hrtime_t prof_interval
;
100 cyclic_id_t prof_cyclic
;
103 typedef struct profile_probe_percpu
{
104 hrtime_t profc_expected
;
105 hrtime_t profc_interval
;
106 profile_probe_t
*profc_probe
;
107 } profile_probe_percpu_t
;
109 hrtime_t profile_interval_min
= NANOSEC
/ 5000; /* 5000 hz */
110 int profile_aframes
= 0; /* override */
112 static int profile_rates
[] = {
113 97, 199, 499, 997, 1999,
119 static int profile_ticks
[] = {
120 1, 10, 100, 500, 1000,
126 * profile_max defines the upper bound on the number of profile probes that
127 * can exist (this is to prevent malicious or clumsy users from exhausing
128 * system resources by creating a slew of profile probes). At mod load time,
129 * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
130 * present in the profile.conf file.
132 #define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */
133 static uint32_t profile_max
; /* maximum number of profile probes */
134 static uint32_t profile_total
; /* current number of profile probes */
137 profile_fire(void *arg
)
139 profile_probe_percpu_t
*pcpu
= arg
;
140 profile_probe_t
*prof
= pcpu
->profc_probe
;
143 late
= dtrace_gethrtime() - pcpu
->profc_expected
;
144 pcpu
->profc_expected
+= pcpu
->profc_interval
;
146 dtrace_probe(prof
->prof_id
, CPU
->cpu_profile_pc
,
147 CPU
->cpu_profile_upc
, late
, 0, 0);
151 profile_tick(void *arg
)
153 profile_probe_t
*prof
= arg
;
155 dtrace_probe(prof
->prof_id
, CPU
->cpu_profile_pc
,
156 CPU
->cpu_profile_upc
, 0, 0, 0);
160 profile_create(hrtime_t interval
, const char *name
, int kind
)
162 profile_probe_t
*prof
;
163 int nr_frames
= PROF_ARTIFICIAL_FRAMES
+ dtrace_mach_aframes();
166 nr_frames
= profile_aframes
;
168 if (interval
< profile_interval_min
)
171 if (dtrace_probe_lookup(profile_id
, NULL
, NULL
, name
) != 0)
174 atomic_inc_32(&profile_total
);
175 if (profile_total
> profile_max
) {
176 atomic_dec_32(&profile_total
);
180 prof
= kmem_zalloc(sizeof (profile_probe_t
), KM_SLEEP
);
181 (void) strcpy(prof
->prof_name
, name
);
182 prof
->prof_interval
= interval
;
183 prof
->prof_cyclic
= CYCLIC_NONE
;
184 prof
->prof_kind
= kind
;
185 prof
->prof_id
= dtrace_probe_create(profile_id
,
186 NULL
, NULL
, name
, nr_frames
, prof
);
191 profile_provide(void *arg
, const dtrace_probedesc_t
*desc
)
193 int i
, j
, rate
, kind
;
194 hrtime_t val
= 0, mult
= 1, len
;
195 const char *name
, *suffix
= NULL
;
201 { PROF_PREFIX_PROFILE
, PROF_PROFILE
},
202 { PROF_PREFIX_TICK
, PROF_TICK
},
210 { "ns", NANOSEC
/ NANOSEC
},
211 { "nsec", NANOSEC
/ NANOSEC
},
212 { "us", NANOSEC
/ MICROSEC
},
213 { "usec", NANOSEC
/ MICROSEC
},
214 { "ms", NANOSEC
/ MILLISEC
},
215 { "msec", NANOSEC
/ MILLISEC
},
216 { "s", NANOSEC
/ SEC
},
217 { "sec", NANOSEC
/ SEC
},
218 { "m", NANOSEC
* (hrtime_t
)60 },
219 { "min", NANOSEC
* (hrtime_t
)60 },
220 { "h", NANOSEC
* (hrtime_t
)(60 * 60) },
221 { "hour", NANOSEC
* (hrtime_t
)(60 * 60) },
222 { "d", NANOSEC
* (hrtime_t
)(24 * 60 * 60) },
223 { "day", NANOSEC
* (hrtime_t
)(24 * 60 * 60) },
229 char n
[PROF_NAMELEN
];
232 * If no description was provided, provide all of our probes.
234 for (i
= 0; i
< sizeof (profile_rates
) / sizeof (int); i
++) {
235 if ((rate
= profile_rates
[i
]) == 0)
238 (void) snprintf(n
, PROF_NAMELEN
, "%s%d",
239 PROF_PREFIX_PROFILE
, rate
);
240 profile_create(NANOSEC
/ rate
, n
, PROF_PROFILE
);
243 for (i
= 0; i
< sizeof (profile_ticks
) / sizeof (int); i
++) {
244 if ((rate
= profile_ticks
[i
]) == 0)
247 (void) snprintf(n
, PROF_NAMELEN
, "%s%d",
248 PROF_PREFIX_TICK
, rate
);
249 profile_create(NANOSEC
/ rate
, n
, PROF_TICK
);
255 name
= desc
->dtpd_name
;
257 for (i
= 0; types
[i
].prefix
!= NULL
; i
++) {
258 len
= strlen(types
[i
].prefix
);
260 if (strncmp(name
, types
[i
].prefix
, len
) != 0)
265 if (types
[i
].prefix
== NULL
)
268 kind
= types
[i
].kind
;
269 j
= strlen(name
) - len
;
272 * We need to start before any time suffix.
274 for (j
= strlen(name
); j
>= len
; j
--) {
275 if (name
[j
] >= '0' && name
[j
] <= '9')
280 ASSERT(suffix
!= NULL
);
283 * Now determine the numerical value present in the probe name.
285 for (; j
>= len
; j
--) {
286 if (name
[j
] < '0' || name
[j
] > '9')
289 val
+= (name
[j
] - '0') * mult
;
290 mult
*= (hrtime_t
)10;
297 * Look-up the suffix to determine the multiplier.
299 for (i
= 0, mult
= 0; suffixes
[i
].name
!= NULL
; i
++) {
300 if (strcasecmp(suffixes
[i
].name
, suffix
) == 0) {
301 mult
= suffixes
[i
].mult
;
306 if (suffixes
[i
].name
== NULL
&& *suffix
!= '\0')
311 * The default is frequency-per-second.
318 profile_create(val
, name
, kind
);
323 profile_destroy(void *arg
, dtrace_id_t id
, void *parg
)
325 profile_probe_t
*prof
= parg
;
327 ASSERT(prof
->prof_cyclic
== CYCLIC_NONE
);
328 kmem_free(prof
, sizeof (profile_probe_t
));
330 ASSERT(profile_total
>= 1);
331 atomic_dec_32(&profile_total
);
336 profile_online(void *arg
, cpu_t
*cpu
, cyc_handler_t
*hdlr
, cyc_time_t
*when
)
338 profile_probe_t
*prof
= arg
;
339 profile_probe_percpu_t
*pcpu
;
341 pcpu
= kmem_zalloc(sizeof (profile_probe_percpu_t
), KM_SLEEP
);
342 pcpu
->profc_probe
= prof
;
344 hdlr
->cyh_func
= profile_fire
;
345 hdlr
->cyh_arg
= pcpu
;
346 hdlr
->cyh_level
= CY_HIGH_LEVEL
;
348 when
->cyt_interval
= prof
->prof_interval
;
349 when
->cyt_when
= dtrace_gethrtime() + when
->cyt_interval
;
351 pcpu
->profc_expected
= when
->cyt_when
;
352 pcpu
->profc_interval
= when
->cyt_interval
;
357 profile_offline(void *arg
, cpu_t
*cpu
, void *oarg
)
359 profile_probe_percpu_t
*pcpu
= oarg
;
361 ASSERT(pcpu
->profc_probe
== arg
);
362 kmem_free(pcpu
, sizeof (profile_probe_percpu_t
));
367 profile_enable(void *arg
, dtrace_id_t id
, void *parg
)
369 profile_probe_t
*prof
= parg
;
370 cyc_omni_handler_t omni
;
374 ASSERT(prof
->prof_interval
!= 0);
375 ASSERT(MUTEX_HELD(&cpu_lock
));
377 if (prof
->prof_kind
== PROF_TICK
) {
378 hdlr
.cyh_func
= profile_tick
;
380 hdlr
.cyh_level
= CY_HIGH_LEVEL
;
382 when
.cyt_interval
= prof
->prof_interval
;
383 when
.cyt_when
= dtrace_gethrtime() + when
.cyt_interval
;
385 ASSERT(prof
->prof_kind
== PROF_PROFILE
);
386 omni
.cyo_online
= profile_online
;
387 omni
.cyo_offline
= profile_offline
;
391 if (prof
->prof_kind
== PROF_TICK
) {
392 prof
->prof_cyclic
= cyclic_add(&hdlr
, &when
);
394 prof
->prof_cyclic
= cyclic_add_omni(&omni
);
401 profile_disable(void *arg
, dtrace_id_t id
, void *parg
)
403 profile_probe_t
*prof
= parg
;
405 ASSERT(prof
->prof_cyclic
!= CYCLIC_NONE
);
406 ASSERT(MUTEX_HELD(&cpu_lock
));
408 cyclic_remove(prof
->prof_cyclic
);
409 prof
->prof_cyclic
= CYCLIC_NONE
;
414 profile_mode(void *arg
, dtrace_id_t id
, void *parg
)
416 profile_probe_t
*prof
= parg
;
419 if (CPU
->cpu_profile_pc
!= 0) {
420 mode
= DTRACE_MODE_KERNEL
;
422 mode
= DTRACE_MODE_USER
;
425 if (prof
->prof_kind
== PROF_TICK
) {
426 mode
|= DTRACE_MODE_NOPRIV_RESTRICT
;
428 ASSERT(prof
->prof_kind
== PROF_PROFILE
);
429 mode
|= DTRACE_MODE_NOPRIV_DROP
;
435 static dtrace_pattr_t profile_attr
= {
436 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
437 { DTRACE_STABILITY_UNSTABLE
, DTRACE_STABILITY_UNSTABLE
, DTRACE_CLASS_UNKNOWN
},
438 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
439 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
440 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
443 static dtrace_pops_t profile_pops
= {
457 profile_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
463 return (DDI_SUCCESS
);
465 return (DDI_FAILURE
);
468 if (ddi_create_minor_node(devi
, "profile", S_IFCHR
, 0,
469 DDI_PSEUDO
, 0) == DDI_FAILURE
||
470 dtrace_register("profile", &profile_attr
,
471 DTRACE_PRIV_KERNEL
| DTRACE_PRIV_USER
, NULL
,
472 &profile_pops
, NULL
, &profile_id
) != 0) {
473 ddi_remove_minor_node(devi
, NULL
);
474 return (DDI_FAILURE
);
477 profile_max
= ddi_getprop(DDI_DEV_T_ANY
, devi
, DDI_PROP_DONTPASS
,
478 "profile-max-probes", PROFILE_MAX_DEFAULT
);
480 ddi_report_dev(devi
);
482 return (DDI_SUCCESS
);
486 profile_detach(dev_info_t
*devi
, ddi_detach_cmd_t cmd
)
492 return (DDI_SUCCESS
);
494 return (DDI_FAILURE
);
497 if (dtrace_unregister(profile_id
) != 0)
498 return (DDI_FAILURE
);
500 ddi_remove_minor_node(devi
, NULL
);
501 return (DDI_SUCCESS
);
506 profile_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
511 case DDI_INFO_DEVT2DEVINFO
:
512 *result
= (void *)profile_devi
;
515 case DDI_INFO_DEVT2INSTANCE
:
527 profile_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
532 static struct cb_ops profile_cb_ops
= {
533 profile_open
, /* open */
535 nulldev
, /* strategy */
545 ddi_prop_op
, /* cb_prop_op */
547 D_NEW
| D_MP
/* Driver compatibility flag */
550 static struct dev_ops profile_ops
= {
551 DEVO_REV
, /* devo_rev, */
553 profile_info
, /* get_dev_info */
554 nulldev
, /* identify */
556 profile_attach
, /* attach */
557 profile_detach
, /* detach */
559 &profile_cb_ops
, /* driver operations */
560 NULL
, /* bus operations */
561 nodev
, /* dev power */
562 ddi_quiesce_not_needed
, /* quiesce */
566 * Module linkage information for the kernel.
568 static struct modldrv modldrv
= {
569 &mod_driverops
, /* module type (this is a pseudo driver) */
570 "Profile Interrupt Tracing", /* name of module */
571 &profile_ops
, /* driver ops */
574 static struct modlinkage modlinkage
= {
583 return (mod_install(&modlinkage
));
587 _info(struct modinfo
*modinfop
)
589 return (mod_info(&modlinkage
, modinfop
));
595 return (mod_remove(&modlinkage
));