4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/errno.h>
28 #include <sys/cpuvar.h>
30 #include <sys/modctl.h>
31 #include <sys/cmn_err.h>
33 #include <sys/sunddi.h>
34 #include <sys/ksynch.h>
38 #include <sys/cap_util.h>
39 #include <sys/cpc_pcbe.h>
40 #include <sys/cpc_impl.h>
41 #include <sys/dtrace_impl.h>
44 * DTrace CPU Performance Counter Provider
45 * ---------------------------------------
47 * The DTrace cpc provider allows DTrace consumers to access the CPU
48 * performance counter overflow mechanism of a CPU. The configuration
49 * presented in a probe specification is programmed into the performance
50 * counter hardware of all available CPUs on a system. Programming the
51 * hardware causes a counter on each CPU to begin counting events of the
52 * given type. When the specified number of events have occurred, an overflow
53 * interrupt will be generated and the probe is fired.
55 * The required configuration for the performance counter is encoded into
56 * the probe specification and this includes the performance counter event
57 * name, processor mode, overflow rate and an optional unit mask.
59 * Most processors provide several counters (PICs) which can count all or a
60 * subset of the events available for a given CPU. However, when overflow
61 * profiling is being used, not all CPUs can detect which counter generated the
62 * overflow interrupt. In this case we cannot reliably determine which counter
63 * overflowed and we therefore only allow such CPUs to configure one event at
64 * a time. Processors that can determine the counter which overflowed are
65 * allowed to program as many events at one time as possible (in theory up to
66 * the number of instrumentation counters supported by that platform).
67 * Therefore, multiple consumers can enable multiple probes at the same time
68 * on such platforms. Platforms which cannot determine the source of an
69 * overflow interrupt are only allowed to program a single event at one time.
71 * The performance counter hardware is made available to consumers on a
72 * first-come, first-served basis. Only a finite amount of hardware resource
73 * is available and, while we make every attempt to accomodate requests from
74 * consumers, we must deny requests when hardware resources have been exhausted.
75 * A consumer will fail to enable probes when resources are currently in use.
77 * The cpc provider contends for shared hardware resources along with other
78 * consumers of the kernel CPU performance counter subsystem (e.g. cpustat(8)).
79 * Only one such consumer can use the performance counters at any one time and
80 * counters are made available on a first-come, first-served basis. As with
81 * cpustat, the cpc provider has priority over per-LWP libcpc usage (e.g.
82 * cputrack(1)). Invoking the cpc provider will cause all existing per-LWP
83 * counter contexts to be invalidated.
86 typedef struct dcpc_probe
{
87 char dcpc_event_name
[CPC_MAX_EVENT_LEN
];
88 int dcpc_flag
; /* flags (USER/SYS) */
89 uint32_t dcpc_ovfval
; /* overflow value */
90 int64_t dcpc_umask
; /* umask/emask for this event */
91 int dcpc_picno
; /* pic this event is programmed in */
92 int dcpc_enabled
; /* probe is actually enabled? */
93 int dcpc_disabling
; /* probe is currently being disabled */
94 dtrace_id_t dcpc_id
; /* probeid this request is enabling */
95 int dcpc_actv_req_idx
; /* idx into dcpc_actv_reqs[] */
98 static dev_info_t
*dcpc_devi
;
99 static dtrace_provider_id_t dcpc_pid
;
100 static dcpc_probe_t
**dcpc_actv_reqs
;
101 static uint32_t dcpc_enablings
= 0;
102 static int dcpc_ovf_mask
= 0;
103 static int dcpc_mult_ovf_cap
= 0;
104 static int dcpc_mask_type
= 0;
107 * When the dcpc provider is loaded, dcpc_min_overflow is set to either
108 * DCPC_MIN_OVF_DEFAULT or the value that dcpc-min-overflow is set to in
109 * the dcpc.conf file. Decrease this value to set probes with smaller
110 * overflow values. Remember that very small values could render a system
111 * unusable with frequently occurring events.
113 #define DCPC_MIN_OVF_DEFAULT 5000
114 static uint32_t dcpc_min_overflow
;
116 static int dcpc_aframes
= 0; /* override for artificial frame setting */
118 #define DCPC_ARTIFICIAL_FRAMES 8
119 #elif defined(__sparc)
120 #define DCPC_ARTIFICIAL_FRAMES 2
124 * Called from the platform overflow interrupt handler. 'bitmap' is a mask
125 * which contains the pic(s) that have overflowed.
128 dcpc_fire(uint64_t bitmap
)
133 * No counter was marked as overflowing. Shout about it and get out.
135 if ((bitmap
& dcpc_ovf_mask
) == 0) {
136 cmn_err(CE_NOTE
, "dcpc_fire: no counter overflow found\n");
141 * This is the common case of a processor that doesn't support
142 * multiple overflow events. Such systems are only allowed a single
143 * enabling and therefore we just look for the first entry in
144 * the active request array.
146 if (!dcpc_mult_ovf_cap
) {
147 for (i
= 0; i
< cpc_ncounters
; i
++) {
148 if (dcpc_actv_reqs
[i
] != NULL
) {
149 dtrace_probe(dcpc_actv_reqs
[i
]->dcpc_id
,
150 CPU
->cpu_cpcprofile_pc
,
151 CPU
->cpu_cpcprofile_upc
, 0, 0, 0);
159 * This is a processor capable of handling multiple overflow events.
160 * Iterate over the array of active requests and locate the counters
161 * that overflowed (note: it is possible for more than one counter to
162 * have overflowed at the same time).
164 for (i
= 0; i
< cpc_ncounters
; i
++) {
165 if (dcpc_actv_reqs
[i
] != NULL
&&
166 (bitmap
& (1ULL << dcpc_actv_reqs
[i
]->dcpc_picno
))) {
167 dtrace_probe(dcpc_actv_reqs
[i
]->dcpc_id
,
168 CPU
->cpu_cpcprofile_pc
,
169 CPU
->cpu_cpcprofile_upc
, 0, 0, 0);
175 dcpc_create_probe(dtrace_provider_id_t id
, const char *probename
,
176 char *eventname
, int64_t umask
, uint32_t ovfval
, char flag
)
179 int nr_frames
= DCPC_ARTIFICIAL_FRAMES
+ dtrace_mach_aframes();
182 nr_frames
= dcpc_aframes
;
184 if (dtrace_probe_lookup(id
, NULL
, NULL
, probename
) != 0)
187 pp
= kmem_zalloc(sizeof (dcpc_probe_t
), KM_SLEEP
);
188 (void) strncpy(pp
->dcpc_event_name
, eventname
,
189 sizeof (pp
->dcpc_event_name
) - 1);
190 pp
->dcpc_event_name
[sizeof (pp
->dcpc_event_name
) - 1] = '\0';
191 pp
->dcpc_flag
= flag
| CPC_OVF_NOTIFY_EMT
;
192 pp
->dcpc_ovfval
= ovfval
;
193 pp
->dcpc_umask
= umask
;
194 pp
->dcpc_actv_req_idx
= pp
->dcpc_picno
= pp
->dcpc_disabling
= -1;
196 pp
->dcpc_id
= dtrace_probe_create(id
, NULL
, NULL
, probename
,
202 dcpc_provide(void *arg
, const dtrace_probedesc_t
*desc
)
205 * The format of a probe is:
207 * event_name-mode-{optional_umask}-overflow_rate
209 * DC_refill_from_system-user-0x1e-50000, or,
210 * DC_refill_from_system-all-10000
215 char event
[CPC_MAX_EVENT_LEN
];
216 long umask
= -1, val
= 0;
220 * The 'cpc' provider offers no probes by default.
225 len
= strlen(desc
->dtpd_name
);
226 p
= str
= kmem_alloc(len
+ 1, KM_SLEEP
);
227 (void) strcpy(str
, desc
->dtpd_name
);
230 * We have a poor man's strtok() going on here. Replace any hyphens
231 * in the the probe name with NULL characters in order to make it
232 * easy to parse the string with regular string functions.
234 for (i
= 0; i
< len
; i
++) {
240 * The first part of the string must be either a platform event
241 * name or a generic event name.
244 (void) strncpy(event
, p
, CPC_MAX_EVENT_LEN
- 1);
245 event
[CPC_MAX_EVENT_LEN
- 1] = '\0';
248 * The next part of the name is the mode specification. Valid
249 * settings are "user", "kernel" or "all".
253 if (strcmp(p
, "user") == 0)
254 flag
|= CPC_COUNT_USER
;
255 else if (strcmp(p
, "kernel") == 0)
256 flag
|= CPC_COUNT_SYSTEM
;
257 else if (strcmp(p
, "all") == 0)
258 flag
|= CPC_COUNT_USER
| CPC_COUNT_SYSTEM
;
263 * Next we either have a mask specification followed by an overflow
264 * rate or just an overflow rate on its own.
267 if (p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
269 * A unit mask can only be specified if:
270 * 1) this performance counter back end supports masks.
271 * 2) the specified event is platform specific.
272 * 3) a valid hex number is converted.
273 * 4) no extraneous characters follow the mask specification.
275 if (dcpc_mask_type
!= 0 && strncmp(event
, "PAPI", 4) != 0 &&
276 ddi_strtol(p
, &end
, 16, &umask
) == 0 &&
277 end
== p
+ strlen(p
)) {
285 * This final part must be an overflow value which has to be greater
286 * than the minimum permissible overflow rate.
288 if ((ddi_strtol(p
, &end
, 10, &val
) != 0) || end
!= p
+ strlen(p
) ||
289 val
< dcpc_min_overflow
)
293 * Validate the event and create the probe.
295 for (i
= 0; i
< cpc_ncounters
; i
++) {
296 char *events
, *cp
, *p
, *end
;
300 if ((events
= kcpc_list_events(i
)) == NULL
)
303 llen
= strlen(events
);
304 p
= cp
= ddi_strdup(events
, KM_NOSLEEP
);
307 for (j
= 0; j
< llen
; j
++) {
312 while (p
< end
&& found
== 0) {
313 if (strcmp(p
, event
) == 0) {
314 dcpc_create_probe(dcpc_pid
, desc
->dtpd_name
,
315 event
, umask
, (uint32_t)val
, flag
);
320 kmem_free(cp
, llen
+ 1);
327 kmem_free(str
, len
+ 1);
332 dcpc_destroy(void *arg
, dtrace_id_t id
, void *parg
)
334 dcpc_probe_t
*pp
= parg
;
336 ASSERT(pp
->dcpc_enabled
== 0);
337 kmem_free(pp
, sizeof (dcpc_probe_t
));
342 dcpc_mode(void *arg
, dtrace_id_t id
, void *parg
)
344 if (CPU
->cpu_cpcprofile_pc
== 0) {
345 return (DTRACE_MODE_NOPRIV_DROP
| DTRACE_MODE_USER
);
347 return (DTRACE_MODE_NOPRIV_DROP
| DTRACE_MODE_KERNEL
);
352 dcpc_populate_set(cpu_t
*c
, dcpc_probe_t
*pp
, kcpc_set_t
*set
, int reqno
)
357 (void) strncpy(set
->ks_req
[reqno
].kr_event
, pp
->dcpc_event_name
,
359 set
->ks_req
[reqno
].kr_config
= NULL
;
360 set
->ks_req
[reqno
].kr_index
= reqno
;
361 set
->ks_req
[reqno
].kr_picnum
= -1;
362 set
->ks_req
[reqno
].kr_flags
= pp
->dcpc_flag
;
365 * If a unit mask has been specified then detect which attribute
366 * the platform needs. For now, it's either "umask" or "emask".
368 if (pp
->dcpc_umask
>= 0) {
369 set
->ks_req
[reqno
].kr_attr
=
370 kmem_zalloc(sizeof (kcpc_attr_t
), KM_SLEEP
);
371 set
->ks_req
[reqno
].kr_nattrs
= 1;
372 if (dcpc_mask_type
& DCPC_UMASK
)
373 (void) strncpy(set
->ks_req
[reqno
].kr_attr
->ka_name
,
376 (void) strncpy(set
->ks_req
[reqno
].kr_attr
->ka_name
,
378 set
->ks_req
[reqno
].kr_attr
->ka_val
= pp
->dcpc_umask
;
380 set
->ks_req
[reqno
].kr_attr
= NULL
;
381 set
->ks_req
[reqno
].kr_nattrs
= 0;
385 * If this probe is enabled, obtain its current countdown value
386 * and use that. The CPUs cpc context might not exist yet if we
387 * are dealing with a CPU that is just coming online.
389 if (pp
->dcpc_enabled
&& (c
->cpu_cpc_ctx
!= NULL
)) {
390 oset
= c
->cpu_cpc_ctx
->kc_set
;
392 for (i
= 0; i
< oset
->ks_nreqs
; i
++) {
393 if (strcmp(oset
->ks_req
[i
].kr_event
,
394 set
->ks_req
[reqno
].kr_event
) == 0) {
395 set
->ks_req
[reqno
].kr_preset
=
396 *(oset
->ks_req
[i
].kr_data
);
400 set
->ks_req
[reqno
].kr_preset
= UINT64_MAX
- pp
->dcpc_ovfval
;
408 * Create a fresh request set for the enablings represented in the
409 * 'dcpc_actv_reqs' array which contains the probes we want to be
410 * in the set. This can be called for several reasons:
412 * 1) We are on a single or multi overflow platform and we have no
413 * current events so we can just create the set and initialize it.
414 * 2) We are on a multi-overflow platform and we already have one or
415 * more existing events and we are adding a new enabling. Create a
416 * new set and copy old requests in and then add the new request.
417 * 3) We are on a multi-overflow platform and we have just removed an
418 * enabling but we still have enablings whch are valid. Create a new
419 * set and copy in still valid requests.
422 dcpc_create_set(cpu_t
*c
)
425 int active_requests
= 0;
429 * First get a count of the number of currently active requests.
430 * Note that dcpc_actv_reqs[] should always reflect which requests
431 * we want to be in the set that is to be created. It is the
432 * responsibility of the caller of dcpc_create_set() to adjust that
433 * array accordingly beforehand.
435 for (i
= 0; i
< cpc_ncounters
; i
++) {
436 if (dcpc_actv_reqs
[i
] != NULL
)
440 set
= kmem_zalloc(sizeof (kcpc_set_t
), KM_SLEEP
);
443 kmem_zalloc(sizeof (kcpc_request_t
) * active_requests
, KM_SLEEP
);
446 kmem_zalloc(active_requests
* sizeof (uint64_t), KM_SLEEP
);
449 * Look for valid entries in the active requests array and populate
450 * the request set for any entries found.
452 for (i
= 0; i
< cpc_ncounters
; i
++) {
453 if (dcpc_actv_reqs
[i
] != NULL
) {
454 dcpc_populate_set(c
, dcpc_actv_reqs
[i
], set
, reqno
);
463 dcpc_program_cpu_event(cpu_t
*c
)
466 kcpc_ctx_t
*ctx
, *octx
;
469 set
= dcpc_create_set(c
);
471 set
->ks_ctx
= ctx
= kcpc_ctx_alloc(KM_SLEEP
);
473 ctx
->kc_cpuid
= c
->cpu_id
;
475 if (kcpc_assign_reqs(set
, ctx
) != 0)
478 if (kcpc_configure_reqs(ctx
, set
, &subcode
) != 0)
481 for (i
= 0; i
< set
->ks_nreqs
; i
++) {
482 for (j
= 0; j
< cpc_ncounters
; j
++) {
483 if (dcpc_actv_reqs
[j
] != NULL
&&
484 strcmp(set
->ks_req
[i
].kr_event
,
485 dcpc_actv_reqs
[j
]->dcpc_event_name
) == 0) {
486 dcpc_actv_reqs
[j
]->dcpc_picno
=
487 set
->ks_req
[i
].kr_picnum
;
493 * If we already have an active enabling then save the current cpc
496 octx
= c
->cpu_cpc_ctx
;
498 kcpc_cpu_program(c
, ctx
);
501 kcpc_set_t
*oset
= octx
->kc_set
;
502 kmem_free(oset
->ks_data
, oset
->ks_nreqs
* sizeof (uint64_t));
503 kcpc_free_configs(oset
);
512 * We failed to configure this request up so free things up and
515 kcpc_free_configs(set
);
516 kmem_free(set
->ks_data
, set
->ks_nreqs
* sizeof (uint64_t));
524 dcpc_disable_cpu(cpu_t
*c
)
530 * Leave this CPU alone if it's already offline.
532 if (c
->cpu_flags
& CPU_OFFLINE
)
536 * Grab CPUs CPC context before kcpc_cpu_stop() stops counters and
539 ctx
= c
->cpu_cpc_ctx
;
541 kcpc_cpu_stop(c
, B_FALSE
);
545 kcpc_free_configs(set
);
546 kmem_free(set
->ks_data
, set
->ks_nreqs
* sizeof (uint64_t));
552 * The dcpc_*_interrupts() routines are responsible for manipulating the
553 * per-CPU dcpc interrupt state byte. The purpose of the state byte is to
554 * synchronize processing of hardware overflow interrupts wth configuration
555 * changes made to the CPU performance counter subsystem by the dcpc provider.
557 * The dcpc provider claims ownership of the overflow interrupt mechanism
558 * by transitioning the state byte from DCPC_INTR_INACTIVE (indicating the
559 * dcpc provider is not in use) to DCPC_INTR_FREE (the dcpc provider owns the
560 * overflow mechanism and interrupts may be processed). Before modifying
561 * a CPUs configuration state the state byte is transitioned from
562 * DCPC_INTR_FREE to DCPC_INTR_CONFIG ("configuration in process" state).
563 * The hardware overflow handler, kcpc_hw_overflow_intr(), will only process
564 * an interrupt when a configuration is not in process (i.e. the state is
565 * marked as free). During interrupt processing the state is set to
566 * DCPC_INTR_PROCESSING by the overflow handler. When the last dcpc based
567 * enabling is removed, the state byte is set to DCPC_INTR_INACTIVE to indicate
568 * the dcpc provider is no longer interested in overflow interrupts.
571 dcpc_block_interrupts(void)
576 ASSERT(cpu_core
[c
->cpu_id
].cpuc_dcpc_intr_state
!= DCPC_INTR_INACTIVE
);
579 state
= &cpu_core
[c
->cpu_id
].cpuc_dcpc_intr_state
;
581 while (atomic_cas_8(state
, DCPC_INTR_FREE
,
582 DCPC_INTR_CONFIG
) != DCPC_INTR_FREE
)
585 } while ((c
= c
->cpu_next
) != cpu_list
);
589 * Set all CPUs dcpc interrupt state to DCPC_INTR_FREE to indicate that
590 * overflow interrupts can be processed safely.
593 dcpc_release_interrupts(void)
597 ASSERT(cpu_core
[c
->cpu_id
].cpuc_dcpc_intr_state
!= DCPC_INTR_INACTIVE
);
600 cpu_core
[c
->cpu_id
].cpuc_dcpc_intr_state
= DCPC_INTR_FREE
;
602 } while ((c
= c
->cpu_next
) != cpu_list
);
606 * Transition all CPUs dcpc interrupt state from DCPC_INTR_INACTIVE to
607 * to DCPC_INTR_FREE. This indicates that the dcpc provider is now
608 * responsible for handling all overflow interrupt activity. Should only be
609 * called before enabling the first dcpc based probe.
612 dcpc_claim_interrupts(void)
616 ASSERT(cpu_core
[c
->cpu_id
].cpuc_dcpc_intr_state
== DCPC_INTR_INACTIVE
);
619 cpu_core
[c
->cpu_id
].cpuc_dcpc_intr_state
= DCPC_INTR_FREE
;
621 } while ((c
= c
->cpu_next
) != cpu_list
);
625 * Set all CPUs dcpc interrupt state to DCPC_INTR_INACTIVE to indicate that
626 * the dcpc provider is no longer processing overflow interrupts. Only called
627 * during removal of the last dcpc based enabling.
630 dcpc_surrender_interrupts(void)
634 ASSERT(cpu_core
[c
->cpu_id
].cpuc_dcpc_intr_state
!= DCPC_INTR_INACTIVE
);
637 cpu_core
[c
->cpu_id
].cpuc_dcpc_intr_state
= DCPC_INTR_INACTIVE
;
639 } while ((c
= c
->cpu_next
) != cpu_list
);
643 * dcpc_program_event() can be called owing to a new enabling or if a multi
644 * overflow platform has disabled a request but needs to program the requests
645 * that are still valid.
647 * Every invocation of dcpc_program_event() will create a new kcpc_ctx_t
648 * and a new request set which contains the new enabling and any old enablings
649 * which are still valid (possible with multi-overflow platforms).
652 dcpc_program_event(dcpc_probe_t
*pp
)
657 ASSERT(MUTEX_HELD(&cpu_lock
));
661 dcpc_block_interrupts();
667 * Skip CPUs that are currently offline.
669 if (c
->cpu_flags
& CPU_OFFLINE
)
673 * Stop counters but preserve existing DTrace CPC context
676 * If we come here when the first event is programmed for a CPU,
677 * there should be no DTrace CPC context installed. In this
678 * case, kcpc_cpu_stop() will ensure that there is no other
679 * context on the CPU.
681 * If we add new enabling to the original one, the CPU should
682 * have the old DTrace CPC context which we need to keep around
683 * since dcpc_program_event() will add to it.
685 if (c
->cpu_cpc_ctx
!= NULL
)
686 kcpc_cpu_stop(c
, B_TRUE
);
687 } while ((c
= c
->cpu_next
) != cpu_list
);
689 dcpc_release_interrupts();
692 * If this enabling is being removed (in the case of a multi event
693 * capable system with more than one active enabling), we can now
694 * update the active request array to reflect the enablings that need
695 * to be reprogrammed.
697 if (pp
->dcpc_disabling
== 1)
698 dcpc_actv_reqs
[pp
->dcpc_actv_req_idx
] = NULL
;
702 * Skip CPUs that are currently offline.
704 if (c
->cpu_flags
& CPU_OFFLINE
)
707 ret
= dcpc_program_cpu_event(c
);
708 } while ((c
= c
->cpu_next
) != cpu_list
&& ret
== 0);
711 * If dcpc_program_cpu_event() fails then it is because we couldn't
712 * configure the requests in the set for the CPU and not because of
713 * an error programming the hardware. If we have a failure here then
714 * we assume no CPUs have been programmed in the above step as they
715 * are all configured identically.
718 pp
->dcpc_enabled
= 0;
723 if (pp
->dcpc_disabling
!= 1)
724 pp
->dcpc_enabled
= 1;
733 dcpc_enable(void *arg
, dtrace_id_t id
, void *parg
)
735 dcpc_probe_t
*pp
= parg
;
739 ASSERT(MUTEX_HELD(&cpu_lock
));
742 * Bail out if the counters are being used by a libcpc consumer.
744 rw_enter(&kcpc_cpuctx_lock
, RW_READER
);
745 if (kcpc_cpuctx
> 0) {
746 rw_exit(&kcpc_cpuctx_lock
);
751 rw_exit(&kcpc_cpuctx_lock
);
754 * Locate this enabling in the first free entry of the active
757 for (i
= 0; i
< cpc_ncounters
; i
++) {
758 if (dcpc_actv_reqs
[i
] == NULL
) {
759 dcpc_actv_reqs
[i
] = pp
;
760 pp
->dcpc_actv_req_idx
= i
;
767 * If we couldn't find a slot for this probe then there is no
775 ASSERT(pp
->dcpc_actv_req_idx
>= 0);
778 * DTrace is taking over CPC contexts, so stop collecting
779 * capacity/utilization data for all CPUs.
781 if (dtrace_cpc_in_use
== 1)
785 * The following must hold true if we are to (attempt to) enable
788 * 1) No enablings currently exist. We allow all platforms to
789 * proceed if this is true.
793 * 2) If the platform is multi overflow capable and there are
794 * less valid enablings than there are counters. There is no
795 * guarantee that a platform can accommodate as many events as
796 * it has counters for but we will at least try to program
797 * up to that many requests.
799 * The 'dcpc_enablings' variable is implictly protected by locking
800 * provided by the DTrace framework and the cpu management framework.
802 if (dcpc_enablings
== 0 || (dcpc_mult_ovf_cap
&&
803 dcpc_enablings
< cpc_ncounters
)) {
805 * Before attempting to program the first enabling we need to
806 * invalidate any lwp-based contexts and lay claim to the
807 * overflow interrupt mechanism.
809 if (dcpc_enablings
== 0) {
810 kcpc_invalidate_all();
811 dcpc_claim_interrupts();
814 if (dcpc_program_event(pp
) == 0) {
821 * If active enablings existed before we failed to enable this probe
822 * on a multi event capable platform then we need to restart counters
823 * as they will have been stopped in the attempted configuration. The
824 * context should now just contain the request prior to this failed
827 if (dcpc_enablings
> 0 && dcpc_mult_ovf_cap
) {
830 ASSERT(dcpc_mult_ovf_cap
== 1);
833 * Skip CPUs that are currently offline.
835 if (c
->cpu_flags
& CPU_OFFLINE
)
838 kcpc_cpu_program(c
, c
->cpu_cpc_ctx
);
839 } while ((c
= c
->cpu_next
) != cpu_list
);
843 * Give up any claim to the overflow interrupt mechanism if no
844 * dcpc based enablings exist.
846 if (dcpc_enablings
== 0)
847 dcpc_surrender_interrupts();
850 dcpc_actv_reqs
[pp
->dcpc_actv_req_idx
] = NULL
;
851 pp
->dcpc_actv_req_idx
= pp
->dcpc_picno
= -1;
854 * If all probes are removed, enable capacity/utilization data
855 * collection for every CPU.
857 if (dtrace_cpc_in_use
== 0)
864 * If only one enabling is active then remove the context and free
865 * everything up. If there are multiple enablings active then remove this
866 * one, its associated meta-data and re-program the hardware.
870 dcpc_disable(void *arg
, dtrace_id_t id
, void *parg
)
873 dcpc_probe_t
*pp
= parg
;
875 ASSERT(MUTEX_HELD(&cpu_lock
));
880 * This probe didn't actually make it as far as being fully enabled
881 * so we needn't do anything with it.
883 if (pp
->dcpc_enabled
== 0) {
885 * If we actually allocated this request a slot in the
886 * request array but failed to enabled it then remove the
887 * entry in the array.
889 if (pp
->dcpc_actv_req_idx
>= 0) {
890 dcpc_actv_reqs
[pp
->dcpc_actv_req_idx
] = NULL
;
891 pp
->dcpc_actv_req_idx
= pp
->dcpc_picno
=
892 pp
->dcpc_disabling
= -1;
900 * If this is the only enabling then stop all the counters and
901 * free up the meta-data.
903 if (dcpc_enablings
== 1) {
904 ASSERT(dtrace_cpc_in_use
== 1);
906 dcpc_block_interrupts();
912 } while ((c
= c
->cpu_next
) != cpu_list
);
914 dcpc_actv_reqs
[pp
->dcpc_actv_req_idx
] = NULL
;
915 dcpc_surrender_interrupts();
918 * This platform can support multiple overflow events and
919 * the enabling being disabled is not the last one. Remove this
920 * enabling and re-program the hardware with the new config.
922 ASSERT(dcpc_mult_ovf_cap
);
923 ASSERT(dcpc_enablings
> 1);
925 pp
->dcpc_disabling
= 1;
926 (void) dcpc_program_event(pp
);
933 pp
->dcpc_enabled
= 0;
934 pp
->dcpc_actv_req_idx
= pp
->dcpc_picno
= pp
->dcpc_disabling
= -1;
937 * If all probes are removed, enable capacity/utilization data
938 * collection for every CPU
940 if (dtrace_cpc_in_use
== 0)
946 dcpc_cpu_setup(cpu_setup_t what
, processorid_t cpu
, void *arg
)
951 ASSERT(MUTEX_HELD(&cpu_lock
));
956 * Offline CPUs are not allowed to take part so remove this
957 * CPU if we are actively tracing.
959 if (dtrace_cpc_in_use
) {
961 state
= &cpu_core
[c
->cpu_id
].cpuc_dcpc_intr_state
;
964 * Indicate that a configuration is in process in
965 * order to stop overflow interrupts being processed
966 * on this CPU while we disable it.
968 while (atomic_cas_8(state
, DCPC_INTR_FREE
,
969 DCPC_INTR_CONFIG
) != DCPC_INTR_FREE
)
975 * Reset this CPUs interrupt state as the configuration
978 cpu_core
[c
->cpu_id
].cpuc_dcpc_intr_state
=
987 * This CPU is being initialized or brought online so program
988 * it with the current request set if we are actively tracing.
990 if (dtrace_cpc_in_use
) {
992 (void) dcpc_program_cpu_event(c
);
1003 static dtrace_pattr_t dcpc_attr
= {
1004 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
1005 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
1006 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
1007 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_CPU
},
1008 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_COMMON
},
1011 static dtrace_pops_t dcpc_pops
= {
1026 dcpc_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
1033 dcpc_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
1038 case DDI_INFO_DEVT2DEVINFO
:
1039 *result
= (void *)dcpc_devi
;
1040 error
= DDI_SUCCESS
;
1042 case DDI_INFO_DEVT2INSTANCE
:
1044 error
= DDI_SUCCESS
;
1047 error
= DDI_FAILURE
;
1053 dcpc_detach(dev_info_t
*devi
, ddi_detach_cmd_t cmd
)
1059 return (DDI_SUCCESS
);
1061 return (DDI_FAILURE
);
1064 if (dtrace_unregister(dcpc_pid
) != 0)
1065 return (DDI_FAILURE
);
1067 ddi_remove_minor_node(devi
, NULL
);
1069 mutex_enter(&cpu_lock
);
1070 unregister_cpu_setup_func(dcpc_cpu_setup
, NULL
);
1071 mutex_exit(&cpu_lock
);
1073 kmem_free(dcpc_actv_reqs
, cpc_ncounters
* sizeof (dcpc_probe_t
*));
1075 kcpc_unregister_dcpc();
1077 return (DDI_SUCCESS
);
1081 dcpc_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
1090 return (DDI_SUCCESS
);
1092 return (DDI_FAILURE
);
1095 if (kcpc_pcbe_loaded() == -1)
1096 return (DDI_FAILURE
);
1098 caps
= kcpc_pcbe_capabilities();
1100 if (!(caps
& CPC_CAP_OVERFLOW_INTERRUPT
)) {
1101 cmn_err(CE_NOTE
, "!dcpc: Counter Overflow not supported"\
1102 " on this processor");
1103 return (DDI_FAILURE
);
1106 if (ddi_create_minor_node(devi
, "dcpc", S_IFCHR
, 0,
1107 DDI_PSEUDO
, 0) == DDI_FAILURE
||
1108 dtrace_register("cpc", &dcpc_attr
, DTRACE_PRIV_KERNEL
,
1109 NULL
, &dcpc_pops
, NULL
, &dcpc_pid
) != 0) {
1110 ddi_remove_minor_node(devi
, NULL
);
1111 return (DDI_FAILURE
);
1114 mutex_enter(&cpu_lock
);
1115 register_cpu_setup_func(dcpc_cpu_setup
, NULL
);
1116 mutex_exit(&cpu_lock
);
1118 dcpc_ovf_mask
= (1 << cpc_ncounters
) - 1;
1119 ASSERT(dcpc_ovf_mask
!= 0);
1121 if (caps
& CPC_CAP_OVERFLOW_PRECISE
)
1122 dcpc_mult_ovf_cap
= 1;
1125 * Determine which, if any, mask attribute the back-end can use.
1127 attrs
= kcpc_list_attrs();
1128 if (strstr(attrs
, "umask") != NULL
)
1129 dcpc_mask_type
|= DCPC_UMASK
;
1130 else if (strstr(attrs
, "emask") != NULL
)
1131 dcpc_mask_type
|= DCPC_EMASK
;
1134 * The dcpc_actv_reqs array is used to store the requests that
1135 * we currently have programmed. The order of requests in this
1136 * array is not necessarily the order that the event appears in
1137 * the kcpc_request_t array. Once entered into a slot in the array
1138 * the entry is not moved until it's removed.
1141 kmem_zalloc(cpc_ncounters
* sizeof (dcpc_probe_t
*), KM_SLEEP
);
1143 dcpc_min_overflow
= ddi_prop_get_int(DDI_DEV_T_ANY
, devi
,
1144 DDI_PROP_DONTPASS
, "dcpc-min-overflow", DCPC_MIN_OVF_DEFAULT
);
1146 kcpc_register_dcpc(dcpc_fire
);
1148 ddi_report_dev(devi
);
1151 return (DDI_SUCCESS
);
1154 static struct cb_ops dcpc_cb_ops
= {
1155 dcpc_open
, /* open */
1157 nulldev
, /* strategy */
1158 nulldev
, /* print */
1166 nochpoll
, /* poll */
1167 ddi_prop_op
, /* cb_prop_op */
1169 D_NEW
| D_MP
/* Driver compatibility flag */
1172 static struct dev_ops dcpc_ops
= {
1173 DEVO_REV
, /* devo_rev, */
1175 dcpc_info
, /* get_dev_info */
1176 nulldev
, /* identify */
1177 nulldev
, /* probe */
1178 dcpc_attach
, /* attach */
1179 dcpc_detach
, /* detach */
1181 &dcpc_cb_ops
, /* driver operations */
1182 NULL
, /* bus operations */
1183 nodev
, /* dev power */
1184 ddi_quiesce_not_needed
/* quiesce */
1188 * Module linkage information for the kernel.
1190 static struct modldrv modldrv
= {
1191 &mod_driverops
, /* module type */
1192 "DTrace CPC Module", /* name of module */
1193 &dcpc_ops
, /* driver ops */
1196 static struct modlinkage modlinkage
= {
1205 return (mod_install(&modlinkage
));
1209 _info(struct modinfo
*modinfop
)
1211 return (mod_info(&modlinkage
, modinfop
));
1217 return (mod_remove(&modlinkage
));