Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / dtrace / dcpc.c
blob8d2bb4462ed9cee7353324be096739c2d3ef5395
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/errno.h>
28 #include <sys/cpuvar.h>
29 #include <sys/stat.h>
30 #include <sys/modctl.h>
31 #include <sys/cmn_err.h>
32 #include <sys/ddi.h>
33 #include <sys/sunddi.h>
34 #include <sys/ksynch.h>
35 #include <sys/conf.h>
36 #include <sys/kmem.h>
37 #include <sys/kcpc.h>
38 #include <sys/cap_util.h>
39 #include <sys/cpc_pcbe.h>
40 #include <sys/cpc_impl.h>
41 #include <sys/dtrace_impl.h>
44 * DTrace CPU Performance Counter Provider
45 * ---------------------------------------
47 * The DTrace cpc provider allows DTrace consumers to access the CPU
48 * performance counter overflow mechanism of a CPU. The configuration
49 * presented in a probe specification is programmed into the performance
50 * counter hardware of all available CPUs on a system. Programming the
51 * hardware causes a counter on each CPU to begin counting events of the
52 * given type. When the specified number of events have occurred, an overflow
53 * interrupt will be generated and the probe is fired.
55 * The required configuration for the performance counter is encoded into
56 * the probe specification and this includes the performance counter event
57 * name, processor mode, overflow rate and an optional unit mask.
59 * Most processors provide several counters (PICs) which can count all or a
60 * subset of the events available for a given CPU. However, when overflow
61 * profiling is being used, not all CPUs can detect which counter generated the
62 * overflow interrupt. In this case we cannot reliably determine which counter
63 * overflowed and we therefore only allow such CPUs to configure one event at
64 * a time. Processors that can determine the counter which overflowed are
65 * allowed to program as many events at one time as possible (in theory up to
66 * the number of instrumentation counters supported by that platform).
67 * Therefore, multiple consumers can enable multiple probes at the same time
68 * on such platforms. Platforms which cannot determine the source of an
69 * overflow interrupt are only allowed to program a single event at one time.
71 * The performance counter hardware is made available to consumers on a
72 * first-come, first-served basis. Only a finite amount of hardware resource
73 * is available and, while we make every attempt to accomodate requests from
74 * consumers, we must deny requests when hardware resources have been exhausted.
75 * A consumer will fail to enable probes when resources are currently in use.
77 * The cpc provider contends for shared hardware resources along with other
78 * consumers of the kernel CPU performance counter subsystem (e.g. cpustat(8)).
79 * Only one such consumer can use the performance counters at any one time and
80 * counters are made available on a first-come, first-served basis. As with
81 * cpustat, the cpc provider has priority over per-LWP libcpc usage (e.g.
82 * cputrack(1)). Invoking the cpc provider will cause all existing per-LWP
83 * counter contexts to be invalidated.
86 typedef struct dcpc_probe {
87 char dcpc_event_name[CPC_MAX_EVENT_LEN];
88 int dcpc_flag; /* flags (USER/SYS) */
89 uint32_t dcpc_ovfval; /* overflow value */
90 int64_t dcpc_umask; /* umask/emask for this event */
91 int dcpc_picno; /* pic this event is programmed in */
92 int dcpc_enabled; /* probe is actually enabled? */
93 int dcpc_disabling; /* probe is currently being disabled */
94 dtrace_id_t dcpc_id; /* probeid this request is enabling */
95 int dcpc_actv_req_idx; /* idx into dcpc_actv_reqs[] */
96 } dcpc_probe_t;
98 static dev_info_t *dcpc_devi;
99 static dtrace_provider_id_t dcpc_pid;
100 static dcpc_probe_t **dcpc_actv_reqs;
101 static uint32_t dcpc_enablings = 0;
102 static int dcpc_ovf_mask = 0;
103 static int dcpc_mult_ovf_cap = 0;
104 static int dcpc_mask_type = 0;
107 * When the dcpc provider is loaded, dcpc_min_overflow is set to either
108 * DCPC_MIN_OVF_DEFAULT or the value that dcpc-min-overflow is set to in
109 * the dcpc.conf file. Decrease this value to set probes with smaller
110 * overflow values. Remember that very small values could render a system
111 * unusable with frequently occurring events.
113 #define DCPC_MIN_OVF_DEFAULT 5000
114 static uint32_t dcpc_min_overflow;
116 static int dcpc_aframes = 0; /* override for artificial frame setting */
117 #if defined(__x86)
118 #define DCPC_ARTIFICIAL_FRAMES 8
119 #elif defined(__sparc)
120 #define DCPC_ARTIFICIAL_FRAMES 2
121 #endif
124 * Called from the platform overflow interrupt handler. 'bitmap' is a mask
125 * which contains the pic(s) that have overflowed.
127 static void
128 dcpc_fire(uint64_t bitmap)
130 int i;
133 * No counter was marked as overflowing. Shout about it and get out.
135 if ((bitmap & dcpc_ovf_mask) == 0) {
136 cmn_err(CE_NOTE, "dcpc_fire: no counter overflow found\n");
137 return;
141 * This is the common case of a processor that doesn't support
142 * multiple overflow events. Such systems are only allowed a single
143 * enabling and therefore we just look for the first entry in
144 * the active request array.
146 if (!dcpc_mult_ovf_cap) {
147 for (i = 0; i < cpc_ncounters; i++) {
148 if (dcpc_actv_reqs[i] != NULL) {
149 dtrace_probe(dcpc_actv_reqs[i]->dcpc_id,
150 CPU->cpu_cpcprofile_pc,
151 CPU->cpu_cpcprofile_upc, 0, 0, 0);
152 return;
155 return;
159 * This is a processor capable of handling multiple overflow events.
160 * Iterate over the array of active requests and locate the counters
161 * that overflowed (note: it is possible for more than one counter to
162 * have overflowed at the same time).
164 for (i = 0; i < cpc_ncounters; i++) {
165 if (dcpc_actv_reqs[i] != NULL &&
166 (bitmap & (1ULL << dcpc_actv_reqs[i]->dcpc_picno))) {
167 dtrace_probe(dcpc_actv_reqs[i]->dcpc_id,
168 CPU->cpu_cpcprofile_pc,
169 CPU->cpu_cpcprofile_upc, 0, 0, 0);
174 static void
175 dcpc_create_probe(dtrace_provider_id_t id, const char *probename,
176 char *eventname, int64_t umask, uint32_t ovfval, char flag)
178 dcpc_probe_t *pp;
179 int nr_frames = DCPC_ARTIFICIAL_FRAMES + dtrace_mach_aframes();
181 if (dcpc_aframes)
182 nr_frames = dcpc_aframes;
184 if (dtrace_probe_lookup(id, NULL, NULL, probename) != 0)
185 return;
187 pp = kmem_zalloc(sizeof (dcpc_probe_t), KM_SLEEP);
188 (void) strncpy(pp->dcpc_event_name, eventname,
189 sizeof (pp->dcpc_event_name) - 1);
190 pp->dcpc_event_name[sizeof (pp->dcpc_event_name) - 1] = '\0';
191 pp->dcpc_flag = flag | CPC_OVF_NOTIFY_EMT;
192 pp->dcpc_ovfval = ovfval;
193 pp->dcpc_umask = umask;
194 pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1;
196 pp->dcpc_id = dtrace_probe_create(id, NULL, NULL, probename,
197 nr_frames, pp);
200 /*ARGSUSED*/
201 static void
202 dcpc_provide(void *arg, const dtrace_probedesc_t *desc)
205 * The format of a probe is:
207 * event_name-mode-{optional_umask}-overflow_rate
208 * e.g.
209 * DC_refill_from_system-user-0x1e-50000, or,
210 * DC_refill_from_system-all-10000
213 char *str, *end, *p;
214 int i, flag = 0;
215 char event[CPC_MAX_EVENT_LEN];
216 long umask = -1, val = 0;
217 size_t evlen, len;
220 * The 'cpc' provider offers no probes by default.
222 if (desc == NULL)
223 return;
225 len = strlen(desc->dtpd_name);
226 p = str = kmem_alloc(len + 1, KM_SLEEP);
227 (void) strcpy(str, desc->dtpd_name);
230 * We have a poor man's strtok() going on here. Replace any hyphens
231 * in the the probe name with NULL characters in order to make it
232 * easy to parse the string with regular string functions.
234 for (i = 0; i < len; i++) {
235 if (str[i] == '-')
236 str[i] = '\0';
240 * The first part of the string must be either a platform event
241 * name or a generic event name.
243 evlen = strlen(p);
244 (void) strncpy(event, p, CPC_MAX_EVENT_LEN - 1);
245 event[CPC_MAX_EVENT_LEN - 1] = '\0';
248 * The next part of the name is the mode specification. Valid
249 * settings are "user", "kernel" or "all".
251 p += evlen + 1;
253 if (strcmp(p, "user") == 0)
254 flag |= CPC_COUNT_USER;
255 else if (strcmp(p, "kernel") == 0)
256 flag |= CPC_COUNT_SYSTEM;
257 else if (strcmp(p, "all") == 0)
258 flag |= CPC_COUNT_USER | CPC_COUNT_SYSTEM;
259 else
260 goto err;
263 * Next we either have a mask specification followed by an overflow
264 * rate or just an overflow rate on its own.
266 p += strlen(p) + 1;
267 if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
269 * A unit mask can only be specified if:
270 * 1) this performance counter back end supports masks.
271 * 2) the specified event is platform specific.
272 * 3) a valid hex number is converted.
273 * 4) no extraneous characters follow the mask specification.
275 if (dcpc_mask_type != 0 && strncmp(event, "PAPI", 4) != 0 &&
276 ddi_strtol(p, &end, 16, &umask) == 0 &&
277 end == p + strlen(p)) {
278 p += strlen(p) + 1;
279 } else {
280 goto err;
285 * This final part must be an overflow value which has to be greater
286 * than the minimum permissible overflow rate.
288 if ((ddi_strtol(p, &end, 10, &val) != 0) || end != p + strlen(p) ||
289 val < dcpc_min_overflow)
290 goto err;
293 * Validate the event and create the probe.
295 for (i = 0; i < cpc_ncounters; i++) {
296 char *events, *cp, *p, *end;
297 int found = 0, j;
298 size_t llen;
300 if ((events = kcpc_list_events(i)) == NULL)
301 goto err;
303 llen = strlen(events);
304 p = cp = ddi_strdup(events, KM_NOSLEEP);
305 end = cp + llen;
307 for (j = 0; j < llen; j++) {
308 if (cp[j] == ',')
309 cp[j] = '\0';
312 while (p < end && found == 0) {
313 if (strcmp(p, event) == 0) {
314 dcpc_create_probe(dcpc_pid, desc->dtpd_name,
315 event, umask, (uint32_t)val, flag);
316 found = 1;
318 p += strlen(p) + 1;
320 kmem_free(cp, llen + 1);
322 if (found)
323 break;
326 err:
327 kmem_free(str, len + 1);
330 /*ARGSUSED*/
331 static void
332 dcpc_destroy(void *arg, dtrace_id_t id, void *parg)
334 dcpc_probe_t *pp = parg;
336 ASSERT(pp->dcpc_enabled == 0);
337 kmem_free(pp, sizeof (dcpc_probe_t));
340 /*ARGSUSED*/
341 static int
342 dcpc_mode(void *arg, dtrace_id_t id, void *parg)
344 if (CPU->cpu_cpcprofile_pc == 0) {
345 return (DTRACE_MODE_NOPRIV_DROP | DTRACE_MODE_USER);
346 } else {
347 return (DTRACE_MODE_NOPRIV_DROP | DTRACE_MODE_KERNEL);
351 static void
352 dcpc_populate_set(cpu_t *c, dcpc_probe_t *pp, kcpc_set_t *set, int reqno)
354 kcpc_set_t *oset;
355 int i;
357 (void) strncpy(set->ks_req[reqno].kr_event, pp->dcpc_event_name,
358 CPC_MAX_EVENT_LEN);
359 set->ks_req[reqno].kr_config = NULL;
360 set->ks_req[reqno].kr_index = reqno;
361 set->ks_req[reqno].kr_picnum = -1;
362 set->ks_req[reqno].kr_flags = pp->dcpc_flag;
365 * If a unit mask has been specified then detect which attribute
366 * the platform needs. For now, it's either "umask" or "emask".
368 if (pp->dcpc_umask >= 0) {
369 set->ks_req[reqno].kr_attr =
370 kmem_zalloc(sizeof (kcpc_attr_t), KM_SLEEP);
371 set->ks_req[reqno].kr_nattrs = 1;
372 if (dcpc_mask_type & DCPC_UMASK)
373 (void) strncpy(set->ks_req[reqno].kr_attr->ka_name,
374 "umask", 5);
375 else
376 (void) strncpy(set->ks_req[reqno].kr_attr->ka_name,
377 "emask", 5);
378 set->ks_req[reqno].kr_attr->ka_val = pp->dcpc_umask;
379 } else {
380 set->ks_req[reqno].kr_attr = NULL;
381 set->ks_req[reqno].kr_nattrs = 0;
385 * If this probe is enabled, obtain its current countdown value
386 * and use that. The CPUs cpc context might not exist yet if we
387 * are dealing with a CPU that is just coming online.
389 if (pp->dcpc_enabled && (c->cpu_cpc_ctx != NULL)) {
390 oset = c->cpu_cpc_ctx->kc_set;
392 for (i = 0; i < oset->ks_nreqs; i++) {
393 if (strcmp(oset->ks_req[i].kr_event,
394 set->ks_req[reqno].kr_event) == 0) {
395 set->ks_req[reqno].kr_preset =
396 *(oset->ks_req[i].kr_data);
399 } else {
400 set->ks_req[reqno].kr_preset = UINT64_MAX - pp->dcpc_ovfval;
403 set->ks_nreqs++;
408 * Create a fresh request set for the enablings represented in the
409 * 'dcpc_actv_reqs' array which contains the probes we want to be
410 * in the set. This can be called for several reasons:
412 * 1) We are on a single or multi overflow platform and we have no
413 * current events so we can just create the set and initialize it.
414 * 2) We are on a multi-overflow platform and we already have one or
415 * more existing events and we are adding a new enabling. Create a
416 * new set and copy old requests in and then add the new request.
417 * 3) We are on a multi-overflow platform and we have just removed an
418 * enabling but we still have enablings whch are valid. Create a new
419 * set and copy in still valid requests.
421 static kcpc_set_t *
422 dcpc_create_set(cpu_t *c)
424 int i, reqno = 0;
425 int active_requests = 0;
426 kcpc_set_t *set;
429 * First get a count of the number of currently active requests.
430 * Note that dcpc_actv_reqs[] should always reflect which requests
431 * we want to be in the set that is to be created. It is the
432 * responsibility of the caller of dcpc_create_set() to adjust that
433 * array accordingly beforehand.
435 for (i = 0; i < cpc_ncounters; i++) {
436 if (dcpc_actv_reqs[i] != NULL)
437 active_requests++;
440 set = kmem_zalloc(sizeof (kcpc_set_t), KM_SLEEP);
442 set->ks_req =
443 kmem_zalloc(sizeof (kcpc_request_t) * active_requests, KM_SLEEP);
445 set->ks_data =
446 kmem_zalloc(active_requests * sizeof (uint64_t), KM_SLEEP);
449 * Look for valid entries in the active requests array and populate
450 * the request set for any entries found.
452 for (i = 0; i < cpc_ncounters; i++) {
453 if (dcpc_actv_reqs[i] != NULL) {
454 dcpc_populate_set(c, dcpc_actv_reqs[i], set, reqno);
455 reqno++;
459 return (set);
462 static int
463 dcpc_program_cpu_event(cpu_t *c)
465 int i, j, subcode;
466 kcpc_ctx_t *ctx, *octx;
467 kcpc_set_t *set;
469 set = dcpc_create_set(c);
471 set->ks_ctx = ctx = kcpc_ctx_alloc(KM_SLEEP);
472 ctx->kc_set = set;
473 ctx->kc_cpuid = c->cpu_id;
475 if (kcpc_assign_reqs(set, ctx) != 0)
476 goto err;
478 if (kcpc_configure_reqs(ctx, set, &subcode) != 0)
479 goto err;
481 for (i = 0; i < set->ks_nreqs; i++) {
482 for (j = 0; j < cpc_ncounters; j++) {
483 if (dcpc_actv_reqs[j] != NULL &&
484 strcmp(set->ks_req[i].kr_event,
485 dcpc_actv_reqs[j]->dcpc_event_name) == 0) {
486 dcpc_actv_reqs[j]->dcpc_picno =
487 set->ks_req[i].kr_picnum;
493 * If we already have an active enabling then save the current cpc
494 * context away.
496 octx = c->cpu_cpc_ctx;
498 kcpc_cpu_program(c, ctx);
500 if (octx != NULL) {
501 kcpc_set_t *oset = octx->kc_set;
502 kmem_free(oset->ks_data, oset->ks_nreqs * sizeof (uint64_t));
503 kcpc_free_configs(oset);
504 kcpc_free_set(oset);
505 kcpc_ctx_free(octx);
508 return (0);
510 err:
512 * We failed to configure this request up so free things up and
513 * get out.
515 kcpc_free_configs(set);
516 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
517 kcpc_free_set(set);
518 kcpc_ctx_free(ctx);
520 return (-1);
523 static void
524 dcpc_disable_cpu(cpu_t *c)
526 kcpc_ctx_t *ctx;
527 kcpc_set_t *set;
530 * Leave this CPU alone if it's already offline.
532 if (c->cpu_flags & CPU_OFFLINE)
533 return;
536 * Grab CPUs CPC context before kcpc_cpu_stop() stops counters and
537 * changes it.
539 ctx = c->cpu_cpc_ctx;
541 kcpc_cpu_stop(c, B_FALSE);
543 set = ctx->kc_set;
545 kcpc_free_configs(set);
546 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
547 kcpc_free_set(set);
548 kcpc_ctx_free(ctx);
552 * The dcpc_*_interrupts() routines are responsible for manipulating the
553 * per-CPU dcpc interrupt state byte. The purpose of the state byte is to
554 * synchronize processing of hardware overflow interrupts wth configuration
555 * changes made to the CPU performance counter subsystem by the dcpc provider.
557 * The dcpc provider claims ownership of the overflow interrupt mechanism
558 * by transitioning the state byte from DCPC_INTR_INACTIVE (indicating the
559 * dcpc provider is not in use) to DCPC_INTR_FREE (the dcpc provider owns the
560 * overflow mechanism and interrupts may be processed). Before modifying
561 * a CPUs configuration state the state byte is transitioned from
562 * DCPC_INTR_FREE to DCPC_INTR_CONFIG ("configuration in process" state).
563 * The hardware overflow handler, kcpc_hw_overflow_intr(), will only process
564 * an interrupt when a configuration is not in process (i.e. the state is
565 * marked as free). During interrupt processing the state is set to
566 * DCPC_INTR_PROCESSING by the overflow handler. When the last dcpc based
567 * enabling is removed, the state byte is set to DCPC_INTR_INACTIVE to indicate
568 * the dcpc provider is no longer interested in overflow interrupts.
570 static void
571 dcpc_block_interrupts(void)
573 cpu_t *c = cpu_list;
574 uint8_t *state;
576 ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE);
578 do {
579 state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state;
581 while (atomic_cas_8(state, DCPC_INTR_FREE,
582 DCPC_INTR_CONFIG) != DCPC_INTR_FREE)
583 continue;
585 } while ((c = c->cpu_next) != cpu_list);
589 * Set all CPUs dcpc interrupt state to DCPC_INTR_FREE to indicate that
590 * overflow interrupts can be processed safely.
592 static void
593 dcpc_release_interrupts(void)
595 cpu_t *c = cpu_list;
597 ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE);
599 do {
600 cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE;
601 membar_producer();
602 } while ((c = c->cpu_next) != cpu_list);
606 * Transition all CPUs dcpc interrupt state from DCPC_INTR_INACTIVE to
607 * to DCPC_INTR_FREE. This indicates that the dcpc provider is now
608 * responsible for handling all overflow interrupt activity. Should only be
609 * called before enabling the first dcpc based probe.
611 static void
612 dcpc_claim_interrupts(void)
614 cpu_t *c = cpu_list;
616 ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state == DCPC_INTR_INACTIVE);
618 do {
619 cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE;
620 membar_producer();
621 } while ((c = c->cpu_next) != cpu_list);
625 * Set all CPUs dcpc interrupt state to DCPC_INTR_INACTIVE to indicate that
626 * the dcpc provider is no longer processing overflow interrupts. Only called
627 * during removal of the last dcpc based enabling.
629 static void
630 dcpc_surrender_interrupts(void)
632 cpu_t *c = cpu_list;
634 ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE);
636 do {
637 cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_INACTIVE;
638 membar_producer();
639 } while ((c = c->cpu_next) != cpu_list);
643 * dcpc_program_event() can be called owing to a new enabling or if a multi
644 * overflow platform has disabled a request but needs to program the requests
645 * that are still valid.
647 * Every invocation of dcpc_program_event() will create a new kcpc_ctx_t
648 * and a new request set which contains the new enabling and any old enablings
649 * which are still valid (possible with multi-overflow platforms).
651 static int
652 dcpc_program_event(dcpc_probe_t *pp)
654 cpu_t *c;
655 int ret = 0;
657 ASSERT(MUTEX_HELD(&cpu_lock));
659 kpreempt_disable();
661 dcpc_block_interrupts();
663 c = cpu_list;
665 do {
667 * Skip CPUs that are currently offline.
669 if (c->cpu_flags & CPU_OFFLINE)
670 continue;
673 * Stop counters but preserve existing DTrace CPC context
674 * if there is one.
676 * If we come here when the first event is programmed for a CPU,
677 * there should be no DTrace CPC context installed. In this
678 * case, kcpc_cpu_stop() will ensure that there is no other
679 * context on the CPU.
681 * If we add new enabling to the original one, the CPU should
682 * have the old DTrace CPC context which we need to keep around
683 * since dcpc_program_event() will add to it.
685 if (c->cpu_cpc_ctx != NULL)
686 kcpc_cpu_stop(c, B_TRUE);
687 } while ((c = c->cpu_next) != cpu_list);
689 dcpc_release_interrupts();
692 * If this enabling is being removed (in the case of a multi event
693 * capable system with more than one active enabling), we can now
694 * update the active request array to reflect the enablings that need
695 * to be reprogrammed.
697 if (pp->dcpc_disabling == 1)
698 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL;
700 do {
702 * Skip CPUs that are currently offline.
704 if (c->cpu_flags & CPU_OFFLINE)
705 continue;
707 ret = dcpc_program_cpu_event(c);
708 } while ((c = c->cpu_next) != cpu_list && ret == 0);
711 * If dcpc_program_cpu_event() fails then it is because we couldn't
712 * configure the requests in the set for the CPU and not because of
713 * an error programming the hardware. If we have a failure here then
714 * we assume no CPUs have been programmed in the above step as they
715 * are all configured identically.
717 if (ret != 0) {
718 pp->dcpc_enabled = 0;
719 kpreempt_enable();
720 return (-1);
723 if (pp->dcpc_disabling != 1)
724 pp->dcpc_enabled = 1;
726 kpreempt_enable();
728 return (0);
731 /*ARGSUSED*/
732 static int
733 dcpc_enable(void *arg, dtrace_id_t id, void *parg)
735 dcpc_probe_t *pp = parg;
736 int i, found = 0;
737 cpu_t *c;
739 ASSERT(MUTEX_HELD(&cpu_lock));
742 * Bail out if the counters are being used by a libcpc consumer.
744 rw_enter(&kcpc_cpuctx_lock, RW_READER);
745 if (kcpc_cpuctx > 0) {
746 rw_exit(&kcpc_cpuctx_lock);
747 return (-1);
750 dtrace_cpc_in_use++;
751 rw_exit(&kcpc_cpuctx_lock);
754 * Locate this enabling in the first free entry of the active
755 * request array.
757 for (i = 0; i < cpc_ncounters; i++) {
758 if (dcpc_actv_reqs[i] == NULL) {
759 dcpc_actv_reqs[i] = pp;
760 pp->dcpc_actv_req_idx = i;
761 found = 1;
762 break;
767 * If we couldn't find a slot for this probe then there is no
768 * room at the inn.
770 if (!found) {
771 dtrace_cpc_in_use--;
772 return (-1);
775 ASSERT(pp->dcpc_actv_req_idx >= 0);
778 * DTrace is taking over CPC contexts, so stop collecting
779 * capacity/utilization data for all CPUs.
781 if (dtrace_cpc_in_use == 1)
782 cu_disable();
785 * The following must hold true if we are to (attempt to) enable
786 * this request:
788 * 1) No enablings currently exist. We allow all platforms to
789 * proceed if this is true.
791 * OR
793 * 2) If the platform is multi overflow capable and there are
794 * less valid enablings than there are counters. There is no
795 * guarantee that a platform can accommodate as many events as
796 * it has counters for but we will at least try to program
797 * up to that many requests.
799 * The 'dcpc_enablings' variable is implictly protected by locking
800 * provided by the DTrace framework and the cpu management framework.
802 if (dcpc_enablings == 0 || (dcpc_mult_ovf_cap &&
803 dcpc_enablings < cpc_ncounters)) {
805 * Before attempting to program the first enabling we need to
806 * invalidate any lwp-based contexts and lay claim to the
807 * overflow interrupt mechanism.
809 if (dcpc_enablings == 0) {
810 kcpc_invalidate_all();
811 dcpc_claim_interrupts();
814 if (dcpc_program_event(pp) == 0) {
815 dcpc_enablings++;
816 return (0);
821 * If active enablings existed before we failed to enable this probe
822 * on a multi event capable platform then we need to restart counters
823 * as they will have been stopped in the attempted configuration. The
824 * context should now just contain the request prior to this failed
825 * enabling.
827 if (dcpc_enablings > 0 && dcpc_mult_ovf_cap) {
828 c = cpu_list;
830 ASSERT(dcpc_mult_ovf_cap == 1);
831 do {
833 * Skip CPUs that are currently offline.
835 if (c->cpu_flags & CPU_OFFLINE)
836 continue;
838 kcpc_cpu_program(c, c->cpu_cpc_ctx);
839 } while ((c = c->cpu_next) != cpu_list);
843 * Give up any claim to the overflow interrupt mechanism if no
844 * dcpc based enablings exist.
846 if (dcpc_enablings == 0)
847 dcpc_surrender_interrupts();
849 dtrace_cpc_in_use--;
850 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL;
851 pp->dcpc_actv_req_idx = pp->dcpc_picno = -1;
854 * If all probes are removed, enable capacity/utilization data
855 * collection for every CPU.
857 if (dtrace_cpc_in_use == 0)
858 cu_enable();
860 return (-1);
864 * If only one enabling is active then remove the context and free
865 * everything up. If there are multiple enablings active then remove this
866 * one, its associated meta-data and re-program the hardware.
868 /*ARGSUSED*/
869 static void
870 dcpc_disable(void *arg, dtrace_id_t id, void *parg)
872 cpu_t *c;
873 dcpc_probe_t *pp = parg;
875 ASSERT(MUTEX_HELD(&cpu_lock));
877 kpreempt_disable();
880 * This probe didn't actually make it as far as being fully enabled
881 * so we needn't do anything with it.
883 if (pp->dcpc_enabled == 0) {
885 * If we actually allocated this request a slot in the
886 * request array but failed to enabled it then remove the
887 * entry in the array.
889 if (pp->dcpc_actv_req_idx >= 0) {
890 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL;
891 pp->dcpc_actv_req_idx = pp->dcpc_picno =
892 pp->dcpc_disabling = -1;
895 kpreempt_enable();
896 return;
900 * If this is the only enabling then stop all the counters and
901 * free up the meta-data.
903 if (dcpc_enablings == 1) {
904 ASSERT(dtrace_cpc_in_use == 1);
906 dcpc_block_interrupts();
908 c = cpu_list;
910 do {
911 dcpc_disable_cpu(c);
912 } while ((c = c->cpu_next) != cpu_list);
914 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL;
915 dcpc_surrender_interrupts();
916 } else {
918 * This platform can support multiple overflow events and
919 * the enabling being disabled is not the last one. Remove this
920 * enabling and re-program the hardware with the new config.
922 ASSERT(dcpc_mult_ovf_cap);
923 ASSERT(dcpc_enablings > 1);
925 pp->dcpc_disabling = 1;
926 (void) dcpc_program_event(pp);
929 kpreempt_enable();
931 dcpc_enablings--;
932 dtrace_cpc_in_use--;
933 pp->dcpc_enabled = 0;
934 pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1;
937 * If all probes are removed, enable capacity/utilization data
938 * collection for every CPU
940 if (dtrace_cpc_in_use == 0)
941 cu_enable();
944 /*ARGSUSED*/
945 static int
946 dcpc_cpu_setup(cpu_setup_t what, processorid_t cpu, void *arg)
948 cpu_t *c;
949 uint8_t *state;
951 ASSERT(MUTEX_HELD(&cpu_lock));
953 switch (what) {
954 case CPU_OFF:
956 * Offline CPUs are not allowed to take part so remove this
957 * CPU if we are actively tracing.
959 if (dtrace_cpc_in_use) {
960 c = cpu_get(cpu);
961 state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state;
964 * Indicate that a configuration is in process in
965 * order to stop overflow interrupts being processed
966 * on this CPU while we disable it.
968 while (atomic_cas_8(state, DCPC_INTR_FREE,
969 DCPC_INTR_CONFIG) != DCPC_INTR_FREE)
970 continue;
972 dcpc_disable_cpu(c);
975 * Reset this CPUs interrupt state as the configuration
976 * has ended.
978 cpu_core[c->cpu_id].cpuc_dcpc_intr_state =
979 DCPC_INTR_FREE;
980 membar_producer();
982 break;
984 case CPU_ON:
985 case CPU_SETUP:
987 * This CPU is being initialized or brought online so program
988 * it with the current request set if we are actively tracing.
990 if (dtrace_cpc_in_use) {
991 c = cpu_get(cpu);
992 (void) dcpc_program_cpu_event(c);
994 break;
996 default:
997 break;
1000 return (0);
1003 static dtrace_pattr_t dcpc_attr = {
1004 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
1005 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
1006 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
1007 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_CPU },
1008 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
1011 static dtrace_pops_t dcpc_pops = {
1012 dcpc_provide,
1013 NULL,
1014 dcpc_enable,
1015 dcpc_disable,
1016 NULL,
1017 NULL,
1018 NULL,
1019 NULL,
1020 dcpc_mode,
1021 dcpc_destroy
1024 /*ARGSUSED*/
1025 static int
1026 dcpc_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
1028 return (0);
1031 /*ARGSUSED*/
1032 static int
1033 dcpc_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1035 int error;
1037 switch (infocmd) {
1038 case DDI_INFO_DEVT2DEVINFO:
1039 *result = (void *)dcpc_devi;
1040 error = DDI_SUCCESS;
1041 break;
1042 case DDI_INFO_DEVT2INSTANCE:
1043 *result = NULL;
1044 error = DDI_SUCCESS;
1045 break;
1046 default:
1047 error = DDI_FAILURE;
1049 return (error);
1052 static int
1053 dcpc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1055 switch (cmd) {
1056 case DDI_DETACH:
1057 break;
1058 case DDI_SUSPEND:
1059 return (DDI_SUCCESS);
1060 default:
1061 return (DDI_FAILURE);
1064 if (dtrace_unregister(dcpc_pid) != 0)
1065 return (DDI_FAILURE);
1067 ddi_remove_minor_node(devi, NULL);
1069 mutex_enter(&cpu_lock);
1070 unregister_cpu_setup_func(dcpc_cpu_setup, NULL);
1071 mutex_exit(&cpu_lock);
1073 kmem_free(dcpc_actv_reqs, cpc_ncounters * sizeof (dcpc_probe_t *));
1075 kcpc_unregister_dcpc();
1077 return (DDI_SUCCESS);
1080 static int
1081 dcpc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
1083 uint_t caps;
1084 char *attrs;
1086 switch (cmd) {
1087 case DDI_ATTACH:
1088 break;
1089 case DDI_RESUME:
1090 return (DDI_SUCCESS);
1091 default:
1092 return (DDI_FAILURE);
1095 if (kcpc_pcbe_loaded() == -1)
1096 return (DDI_FAILURE);
1098 caps = kcpc_pcbe_capabilities();
1100 if (!(caps & CPC_CAP_OVERFLOW_INTERRUPT)) {
1101 cmn_err(CE_NOTE, "!dcpc: Counter Overflow not supported"\
1102 " on this processor");
1103 return (DDI_FAILURE);
1106 if (ddi_create_minor_node(devi, "dcpc", S_IFCHR, 0,
1107 DDI_PSEUDO, 0) == DDI_FAILURE ||
1108 dtrace_register("cpc", &dcpc_attr, DTRACE_PRIV_KERNEL,
1109 NULL, &dcpc_pops, NULL, &dcpc_pid) != 0) {
1110 ddi_remove_minor_node(devi, NULL);
1111 return (DDI_FAILURE);
1114 mutex_enter(&cpu_lock);
1115 register_cpu_setup_func(dcpc_cpu_setup, NULL);
1116 mutex_exit(&cpu_lock);
1118 dcpc_ovf_mask = (1 << cpc_ncounters) - 1;
1119 ASSERT(dcpc_ovf_mask != 0);
1121 if (caps & CPC_CAP_OVERFLOW_PRECISE)
1122 dcpc_mult_ovf_cap = 1;
1125 * Determine which, if any, mask attribute the back-end can use.
1127 attrs = kcpc_list_attrs();
1128 if (strstr(attrs, "umask") != NULL)
1129 dcpc_mask_type |= DCPC_UMASK;
1130 else if (strstr(attrs, "emask") != NULL)
1131 dcpc_mask_type |= DCPC_EMASK;
1134 * The dcpc_actv_reqs array is used to store the requests that
1135 * we currently have programmed. The order of requests in this
1136 * array is not necessarily the order that the event appears in
1137 * the kcpc_request_t array. Once entered into a slot in the array
1138 * the entry is not moved until it's removed.
1140 dcpc_actv_reqs =
1141 kmem_zalloc(cpc_ncounters * sizeof (dcpc_probe_t *), KM_SLEEP);
1143 dcpc_min_overflow = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
1144 DDI_PROP_DONTPASS, "dcpc-min-overflow", DCPC_MIN_OVF_DEFAULT);
1146 kcpc_register_dcpc(dcpc_fire);
1148 ddi_report_dev(devi);
1149 dcpc_devi = devi;
1151 return (DDI_SUCCESS);
1154 static struct cb_ops dcpc_cb_ops = {
1155 dcpc_open, /* open */
1156 nodev, /* close */
1157 nulldev, /* strategy */
1158 nulldev, /* print */
1159 nodev, /* dump */
1160 nodev, /* read */
1161 nodev, /* write */
1162 nodev, /* ioctl */
1163 nodev, /* devmap */
1164 nodev, /* mmap */
1165 nodev, /* segmap */
1166 nochpoll, /* poll */
1167 ddi_prop_op, /* cb_prop_op */
1168 0, /* streamtab */
1169 D_NEW | D_MP /* Driver compatibility flag */
1172 static struct dev_ops dcpc_ops = {
1173 DEVO_REV, /* devo_rev, */
1174 0, /* refcnt */
1175 dcpc_info, /* get_dev_info */
1176 nulldev, /* identify */
1177 nulldev, /* probe */
1178 dcpc_attach, /* attach */
1179 dcpc_detach, /* detach */
1180 nodev, /* reset */
1181 &dcpc_cb_ops, /* driver operations */
1182 NULL, /* bus operations */
1183 nodev, /* dev power */
1184 ddi_quiesce_not_needed /* quiesce */
1188 * Module linkage information for the kernel.
1190 static struct modldrv modldrv = {
1191 &mod_driverops, /* module type */
1192 "DTrace CPC Module", /* name of module */
1193 &dcpc_ops, /* driver ops */
1196 static struct modlinkage modlinkage = {
1197 MODREV_1,
1198 (void *)&modldrv,
1199 NULL
1203 _init(void)
1205 return (mod_install(&modlinkage));
1209 _info(struct modinfo *modinfop)
1211 return (mod_info(&modlinkage, modinfop));
1215 _fini(void)
1217 return (mod_remove(&modlinkage));