Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / dtrace / profile.c
blobd1ba9560611d1f1a928881c40f03233c1336275e
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright (c) 2011, Joyent, Inc. All rights reserved.
30 #include <sys/errno.h>
31 #include <sys/stat.h>
32 #include <sys/modctl.h>
33 #include <sys/conf.h>
34 #include <sys/systm.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/cpuvar.h>
38 #include <sys/kmem.h>
39 #include <sys/strsubr.h>
40 #include <sys/dtrace.h>
41 #include <sys/cyclic.h>
42 #include <sys/atomic.h>
44 static dev_info_t *profile_devi;
45 static dtrace_provider_id_t profile_id;
48 * Regardless of platform, the stack frames look like this in the case of the
49 * profile provider:
51 * profile_fire
52 * cyclic_expire
53 * cyclic_fire
54 * [ cbe ]
55 * [ interrupt code ]
57 * On x86, there are five frames from the generic interrupt code; further, the
58 * interrupted instruction appears as its own stack frame, giving us a total of
59 * 10.
61 * On SPARC, the picture is further complicated because the compiler
62 * optimizes away tail-calls -- so the following frames are optimized away:
64 * profile_fire
65 * cyclic_expire
67 * This gives three frames. However, on DEBUG kernels, the cyclic_expire
68 * frame cannot be tail-call eliminated, yielding four frames in this case.
70 * All of the above constraints lead to the mess below. Yes, the profile
71 * provider should ideally figure this out on-the-fly by hitting one of its own
72 * probes and then walking its own stack trace. This is complicated, however,
73 * and the static definition doesn't seem to be overly brittle. Still, we
74 * allow for a manual override in case we get it completely wrong.
76 #ifdef __x86
77 #define PROF_ARTIFICIAL_FRAMES 10
78 #else
79 #ifdef __sparc
80 #ifdef DEBUG
81 #define PROF_ARTIFICIAL_FRAMES 4
82 #else
83 #define PROF_ARTIFICIAL_FRAMES 3
84 #endif
85 #endif
86 #endif
88 #define PROF_NAMELEN 15
90 #define PROF_PROFILE 0
91 #define PROF_TICK 1
92 #define PROF_PREFIX_PROFILE "profile-"
93 #define PROF_PREFIX_TICK "tick-"
95 typedef struct profile_probe {
96 char prof_name[PROF_NAMELEN];
97 dtrace_id_t prof_id;
98 int prof_kind;
99 hrtime_t prof_interval;
100 cyclic_id_t prof_cyclic;
101 } profile_probe_t;
103 typedef struct profile_probe_percpu {
104 hrtime_t profc_expected;
105 hrtime_t profc_interval;
106 profile_probe_t *profc_probe;
107 } profile_probe_percpu_t;
109 hrtime_t profile_interval_min = NANOSEC / 5000; /* 5000 hz */
110 int profile_aframes = 0; /* override */
112 static int profile_rates[] = {
113 97, 199, 499, 997, 1999,
114 4001, 4999, 0, 0, 0,
115 0, 0, 0, 0, 0,
116 0, 0, 0, 0, 0
119 static int profile_ticks[] = {
120 1, 10, 100, 500, 1000,
121 5000, 0, 0, 0, 0,
122 0, 0, 0, 0, 0
126 * profile_max defines the upper bound on the number of profile probes that
127 * can exist (this is to prevent malicious or clumsy users from exhausing
128 * system resources by creating a slew of profile probes). At mod load time,
129 * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
130 * present in the profile.conf file.
132 #define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */
133 static uint32_t profile_max; /* maximum number of profile probes */
134 static uint32_t profile_total; /* current number of profile probes */
136 static void
137 profile_fire(void *arg)
139 profile_probe_percpu_t *pcpu = arg;
140 profile_probe_t *prof = pcpu->profc_probe;
141 hrtime_t late;
143 late = dtrace_gethrtime() - pcpu->profc_expected;
144 pcpu->profc_expected += pcpu->profc_interval;
146 dtrace_probe(prof->prof_id, CPU->cpu_profile_pc,
147 CPU->cpu_profile_upc, late, 0, 0);
150 static void
151 profile_tick(void *arg)
153 profile_probe_t *prof = arg;
155 dtrace_probe(prof->prof_id, CPU->cpu_profile_pc,
156 CPU->cpu_profile_upc, 0, 0, 0);
159 static void
160 profile_create(hrtime_t interval, const char *name, int kind)
162 profile_probe_t *prof;
163 int nr_frames = PROF_ARTIFICIAL_FRAMES + dtrace_mach_aframes();
165 if (profile_aframes)
166 nr_frames = profile_aframes;
168 if (interval < profile_interval_min)
169 return;
171 if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0)
172 return;
174 atomic_inc_32(&profile_total);
175 if (profile_total > profile_max) {
176 atomic_dec_32(&profile_total);
177 return;
180 prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP);
181 (void) strcpy(prof->prof_name, name);
182 prof->prof_interval = interval;
183 prof->prof_cyclic = CYCLIC_NONE;
184 prof->prof_kind = kind;
185 prof->prof_id = dtrace_probe_create(profile_id,
186 NULL, NULL, name, nr_frames, prof);
189 /*ARGSUSED*/
190 static void
191 profile_provide(void *arg, const dtrace_probedesc_t *desc)
193 int i, j, rate, kind;
194 hrtime_t val = 0, mult = 1, len;
195 const char *name, *suffix = NULL;
197 const struct {
198 char *prefix;
199 int kind;
200 } types[] = {
201 { PROF_PREFIX_PROFILE, PROF_PROFILE },
202 { PROF_PREFIX_TICK, PROF_TICK },
203 { NULL, 0}
206 const struct {
207 char *name;
208 hrtime_t mult;
209 } suffixes[] = {
210 { "ns", NANOSEC / NANOSEC },
211 { "nsec", NANOSEC / NANOSEC },
212 { "us", NANOSEC / MICROSEC },
213 { "usec", NANOSEC / MICROSEC },
214 { "ms", NANOSEC / MILLISEC },
215 { "msec", NANOSEC / MILLISEC },
216 { "s", NANOSEC / SEC },
217 { "sec", NANOSEC / SEC },
218 { "m", NANOSEC * (hrtime_t)60 },
219 { "min", NANOSEC * (hrtime_t)60 },
220 { "h", NANOSEC * (hrtime_t)(60 * 60) },
221 { "hour", NANOSEC * (hrtime_t)(60 * 60) },
222 { "d", NANOSEC * (hrtime_t)(24 * 60 * 60) },
223 { "day", NANOSEC * (hrtime_t)(24 * 60 * 60) },
224 { "hz", 0 },
225 { NULL }
228 if (desc == NULL) {
229 char n[PROF_NAMELEN];
232 * If no description was provided, provide all of our probes.
234 for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) {
235 if ((rate = profile_rates[i]) == 0)
236 continue;
238 (void) snprintf(n, PROF_NAMELEN, "%s%d",
239 PROF_PREFIX_PROFILE, rate);
240 profile_create(NANOSEC / rate, n, PROF_PROFILE);
243 for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) {
244 if ((rate = profile_ticks[i]) == 0)
245 continue;
247 (void) snprintf(n, PROF_NAMELEN, "%s%d",
248 PROF_PREFIX_TICK, rate);
249 profile_create(NANOSEC / rate, n, PROF_TICK);
252 return;
255 name = desc->dtpd_name;
257 for (i = 0; types[i].prefix != NULL; i++) {
258 len = strlen(types[i].prefix);
260 if (strncmp(name, types[i].prefix, len) != 0)
261 continue;
262 break;
265 if (types[i].prefix == NULL)
266 return;
268 kind = types[i].kind;
269 j = strlen(name) - len;
272 * We need to start before any time suffix.
274 for (j = strlen(name); j >= len; j--) {
275 if (name[j] >= '0' && name[j] <= '9')
276 break;
277 suffix = &name[j];
280 ASSERT(suffix != NULL);
283 * Now determine the numerical value present in the probe name.
285 for (; j >= len; j--) {
286 if (name[j] < '0' || name[j] > '9')
287 return;
289 val += (name[j] - '0') * mult;
290 mult *= (hrtime_t)10;
293 if (val == 0)
294 return;
297 * Look-up the suffix to determine the multiplier.
299 for (i = 0, mult = 0; suffixes[i].name != NULL; i++) {
300 if (strcasecmp(suffixes[i].name, suffix) == 0) {
301 mult = suffixes[i].mult;
302 break;
306 if (suffixes[i].name == NULL && *suffix != '\0')
307 return;
309 if (mult == 0) {
311 * The default is frequency-per-second.
313 val = NANOSEC / val;
314 } else {
315 val *= mult;
318 profile_create(val, name, kind);
321 /*ARGSUSED*/
322 static void
323 profile_destroy(void *arg, dtrace_id_t id, void *parg)
325 profile_probe_t *prof = parg;
327 ASSERT(prof->prof_cyclic == CYCLIC_NONE);
328 kmem_free(prof, sizeof (profile_probe_t));
330 ASSERT(profile_total >= 1);
331 atomic_dec_32(&profile_total);
334 /*ARGSUSED*/
335 static void
336 profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
338 profile_probe_t *prof = arg;
339 profile_probe_percpu_t *pcpu;
341 pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP);
342 pcpu->profc_probe = prof;
344 hdlr->cyh_func = profile_fire;
345 hdlr->cyh_arg = pcpu;
346 hdlr->cyh_level = CY_HIGH_LEVEL;
348 when->cyt_interval = prof->prof_interval;
349 when->cyt_when = dtrace_gethrtime() + when->cyt_interval;
351 pcpu->profc_expected = when->cyt_when;
352 pcpu->profc_interval = when->cyt_interval;
355 /*ARGSUSED*/
356 static void
357 profile_offline(void *arg, cpu_t *cpu, void *oarg)
359 profile_probe_percpu_t *pcpu = oarg;
361 ASSERT(pcpu->profc_probe == arg);
362 kmem_free(pcpu, sizeof (profile_probe_percpu_t));
365 /*ARGSUSED*/
366 static int
367 profile_enable(void *arg, dtrace_id_t id, void *parg)
369 profile_probe_t *prof = parg;
370 cyc_omni_handler_t omni;
371 cyc_handler_t hdlr;
372 cyc_time_t when;
374 ASSERT(prof->prof_interval != 0);
375 ASSERT(MUTEX_HELD(&cpu_lock));
377 if (prof->prof_kind == PROF_TICK) {
378 hdlr.cyh_func = profile_tick;
379 hdlr.cyh_arg = prof;
380 hdlr.cyh_level = CY_HIGH_LEVEL;
382 when.cyt_interval = prof->prof_interval;
383 when.cyt_when = dtrace_gethrtime() + when.cyt_interval;
384 } else {
385 ASSERT(prof->prof_kind == PROF_PROFILE);
386 omni.cyo_online = profile_online;
387 omni.cyo_offline = profile_offline;
388 omni.cyo_arg = prof;
391 if (prof->prof_kind == PROF_TICK) {
392 prof->prof_cyclic = cyclic_add(&hdlr, &when);
393 } else {
394 prof->prof_cyclic = cyclic_add_omni(&omni);
396 return (0);
399 /*ARGSUSED*/
400 static void
401 profile_disable(void *arg, dtrace_id_t id, void *parg)
403 profile_probe_t *prof = parg;
405 ASSERT(prof->prof_cyclic != CYCLIC_NONE);
406 ASSERT(MUTEX_HELD(&cpu_lock));
408 cyclic_remove(prof->prof_cyclic);
409 prof->prof_cyclic = CYCLIC_NONE;
412 /*ARGSUSED*/
413 static int
414 profile_mode(void *arg, dtrace_id_t id, void *parg)
416 profile_probe_t *prof = parg;
417 int mode;
419 if (CPU->cpu_profile_pc != 0) {
420 mode = DTRACE_MODE_KERNEL;
421 } else {
422 mode = DTRACE_MODE_USER;
425 if (prof->prof_kind == PROF_TICK) {
426 mode |= DTRACE_MODE_NOPRIV_RESTRICT;
427 } else {
428 ASSERT(prof->prof_kind == PROF_PROFILE);
429 mode |= DTRACE_MODE_NOPRIV_DROP;
432 return (mode);
435 static dtrace_pattr_t profile_attr = {
436 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
437 { DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_UNKNOWN },
438 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
439 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
440 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
443 static dtrace_pops_t profile_pops = {
444 profile_provide,
445 NULL,
446 profile_enable,
447 profile_disable,
448 NULL,
449 NULL,
450 NULL,
451 NULL,
452 profile_mode,
453 profile_destroy
456 static int
457 profile_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
459 switch (cmd) {
460 case DDI_ATTACH:
461 break;
462 case DDI_RESUME:
463 return (DDI_SUCCESS);
464 default:
465 return (DDI_FAILURE);
468 if (ddi_create_minor_node(devi, "profile", S_IFCHR, 0,
469 DDI_PSEUDO, 0) == DDI_FAILURE ||
470 dtrace_register("profile", &profile_attr,
471 DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER, NULL,
472 &profile_pops, NULL, &profile_id) != 0) {
473 ddi_remove_minor_node(devi, NULL);
474 return (DDI_FAILURE);
477 profile_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
478 "profile-max-probes", PROFILE_MAX_DEFAULT);
480 ddi_report_dev(devi);
481 profile_devi = devi;
482 return (DDI_SUCCESS);
485 static int
486 profile_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
488 switch (cmd) {
489 case DDI_DETACH:
490 break;
491 case DDI_SUSPEND:
492 return (DDI_SUCCESS);
493 default:
494 return (DDI_FAILURE);
497 if (dtrace_unregister(profile_id) != 0)
498 return (DDI_FAILURE);
500 ddi_remove_minor_node(devi, NULL);
501 return (DDI_SUCCESS);
504 /*ARGSUSED*/
505 static int
506 profile_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
508 int error;
510 switch (infocmd) {
511 case DDI_INFO_DEVT2DEVINFO:
512 *result = (void *)profile_devi;
513 error = DDI_SUCCESS;
514 break;
515 case DDI_INFO_DEVT2INSTANCE:
516 *result = NULL;
517 error = DDI_SUCCESS;
518 break;
519 default:
520 error = DDI_FAILURE;
522 return (error);
525 /*ARGSUSED*/
526 static int
527 profile_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
529 return (0);
532 static struct cb_ops profile_cb_ops = {
533 profile_open, /* open */
534 nodev, /* close */
535 nulldev, /* strategy */
536 nulldev, /* print */
537 nodev, /* dump */
538 nodev, /* read */
539 nodev, /* write */
540 nodev, /* ioctl */
541 nodev, /* devmap */
542 nodev, /* mmap */
543 nodev, /* segmap */
544 nochpoll, /* poll */
545 ddi_prop_op, /* cb_prop_op */
546 0, /* streamtab */
547 D_NEW | D_MP /* Driver compatibility flag */
550 static struct dev_ops profile_ops = {
551 DEVO_REV, /* devo_rev, */
552 0, /* refcnt */
553 profile_info, /* get_dev_info */
554 nulldev, /* identify */
555 nulldev, /* probe */
556 profile_attach, /* attach */
557 profile_detach, /* detach */
558 nodev, /* reset */
559 &profile_cb_ops, /* driver operations */
560 NULL, /* bus operations */
561 nodev, /* dev power */
562 ddi_quiesce_not_needed, /* quiesce */
566 * Module linkage information for the kernel.
568 static struct modldrv modldrv = {
569 &mod_driverops, /* module type (this is a pseudo driver) */
570 "Profile Interrupt Tracing", /* name of module */
571 &profile_ops, /* driver ops */
574 static struct modlinkage modlinkage = {
575 MODREV_1,
576 (void *)&modldrv,
577 NULL
581 _init(void)
583 return (mod_install(&modlinkage));
587 _info(struct modinfo *modinfop)
589 return (mod_info(&modlinkage, modinfop));
593 _fini(void)
595 return (mod_remove(&modlinkage));