1 /* Performance event support for sparc64.
3 * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
5 * This code is based almost entirely upon the x86 perf event
8 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
9 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
10 * Copyright (C) 2009 Jaswinder Singh Rajput
11 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
12 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
15 #include <linux/perf_event.h>
16 #include <linux/kprobes.h>
17 #include <linux/kernel.h>
18 #include <linux/kdebug.h>
19 #include <linux/mutex.h>
21 #include <asm/cpudata.h>
22 #include <asm/atomic.h>
26 /* Sparc64 chips have two performance counters, 32-bits each, with
27 * overflow interrupts generated on transition from 0xffffffff to 0.
28 * The counters are accessed in one go using a 64-bit register.
30 * Both counters are controlled using a single control register. The
31 * only way to stop all sampling is to clear all of the context (user,
32 * supervisor, hypervisor) sampling enable bits. But these bits apply
33 * to both counters, thus the two counters can't be enabled/disabled
36 * The control register has two event fields, one for each of the two
37 * counters. It's thus nearly impossible to have one counter going
38 * while keeping the other one stopped. Therefore it is possible to
39 * get overflow interrupts for counters not currently "in use" and
40 * that condition must be checked in the overflow interrupt handler.
42 * So we use a hack, in that we program inactive counters with the
43 * "sw_count0" and "sw_count1" events. These count how many times
44 * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an
45 * unusual way to encode a NOP and therefore will not trigger in
49 #define MAX_HWEVENTS 2
50 #define MAX_PERIOD ((1UL << 32) - 1)
52 #define PIC_UPPER_INDEX 0
53 #define PIC_LOWER_INDEX 1
55 struct cpu_hw_events
{
56 struct perf_event
*events
[MAX_HWEVENTS
];
57 unsigned long used_mask
[BITS_TO_LONGS(MAX_HWEVENTS
)];
58 unsigned long active_mask
[BITS_TO_LONGS(MAX_HWEVENTS
)];
62 DEFINE_PER_CPU(struct cpu_hw_events
, cpu_hw_events
) = { .enabled
= 1, };
64 struct perf_event_map
{
68 #define PIC_UPPER 0x01
69 #define PIC_LOWER 0x02
72 static unsigned long perf_event_encode(const struct perf_event_map
*pmap
)
74 return ((unsigned long) pmap
->encoding
<< 16) | pmap
->pic_mask
;
77 static void perf_event_decode(unsigned long val
, u16
*enc
, u8
*msk
)
83 #define C(x) PERF_COUNT_HW_CACHE_##x
85 #define CACHE_OP_UNSUPPORTED 0xfffe
86 #define CACHE_OP_NONSENSE 0xffff
88 typedef struct perf_event_map cache_map_t
89 [PERF_COUNT_HW_CACHE_MAX
]
90 [PERF_COUNT_HW_CACHE_OP_MAX
]
91 [PERF_COUNT_HW_CACHE_RESULT_MAX
];
94 const struct perf_event_map
*(*event_map
)(int);
95 const cache_map_t
*cache_map
;
106 static const struct perf_event_map ultra3_perfmon_event_map
[] = {
107 [PERF_COUNT_HW_CPU_CYCLES
] = { 0x0000, PIC_UPPER
| PIC_LOWER
},
108 [PERF_COUNT_HW_INSTRUCTIONS
] = { 0x0001, PIC_UPPER
| PIC_LOWER
},
109 [PERF_COUNT_HW_CACHE_REFERENCES
] = { 0x0009, PIC_LOWER
},
110 [PERF_COUNT_HW_CACHE_MISSES
] = { 0x0009, PIC_UPPER
},
113 static const struct perf_event_map
*ultra3_event_map(int event_id
)
115 return &ultra3_perfmon_event_map
[event_id
];
118 static const cache_map_t ultra3_cache_map
= {
121 [C(RESULT_ACCESS
)] = { 0x09, PIC_LOWER
, },
122 [C(RESULT_MISS
)] = { 0x09, PIC_UPPER
, },
125 [C(RESULT_ACCESS
)] = { 0x0a, PIC_LOWER
},
126 [C(RESULT_MISS
)] = { 0x0a, PIC_UPPER
},
129 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
130 [C(RESULT_MISS
)] = { CACHE_OP_UNSUPPORTED
},
135 [C(RESULT_ACCESS
)] = { 0x09, PIC_LOWER
, },
136 [C(RESULT_MISS
)] = { 0x09, PIC_UPPER
, },
139 [ C(RESULT_ACCESS
) ] = { CACHE_OP_NONSENSE
},
140 [ C(RESULT_MISS
) ] = { CACHE_OP_NONSENSE
},
142 [ C(OP_PREFETCH
) ] = {
143 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
144 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
149 [C(RESULT_ACCESS
)] = { 0x0c, PIC_LOWER
, },
150 [C(RESULT_MISS
)] = { 0x0c, PIC_UPPER
, },
153 [C(RESULT_ACCESS
)] = { 0x0c, PIC_LOWER
},
154 [C(RESULT_MISS
)] = { 0x0c, PIC_UPPER
},
157 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
158 [C(RESULT_MISS
)] = { CACHE_OP_UNSUPPORTED
},
163 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
164 [C(RESULT_MISS
)] = { 0x12, PIC_UPPER
, },
167 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
168 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
170 [ C(OP_PREFETCH
) ] = {
171 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
172 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
177 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
178 [C(RESULT_MISS
)] = { 0x11, PIC_UPPER
, },
181 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
182 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
184 [ C(OP_PREFETCH
) ] = {
185 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
186 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
191 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
192 [C(RESULT_MISS
)] = { CACHE_OP_UNSUPPORTED
},
195 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
196 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
198 [ C(OP_PREFETCH
) ] = {
199 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
200 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
205 static const struct sparc_pmu ultra3_pmu
= {
206 .event_map
= ultra3_event_map
,
207 .cache_map
= &ultra3_cache_map
,
208 .max_events
= ARRAY_SIZE(ultra3_perfmon_event_map
),
216 /* Niagara1 is very limited. The upper PIC is hard-locked to count
217 * only instructions, so it is free running which creates all kinds of
218 * problems. Some hardware designs make one wonder if the creator
219 * even looked at how this stuff gets used by software.
221 static const struct perf_event_map niagara1_perfmon_event_map
[] = {
222 [PERF_COUNT_HW_CPU_CYCLES
] = { 0x00, PIC_UPPER
},
223 [PERF_COUNT_HW_INSTRUCTIONS
] = { 0x00, PIC_UPPER
},
224 [PERF_COUNT_HW_CACHE_REFERENCES
] = { 0, PIC_NONE
},
225 [PERF_COUNT_HW_CACHE_MISSES
] = { 0x03, PIC_LOWER
},
228 static const struct perf_event_map
*niagara1_event_map(int event_id
)
230 return &niagara1_perfmon_event_map
[event_id
];
233 static const cache_map_t niagara1_cache_map
= {
236 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
237 [C(RESULT_MISS
)] = { 0x03, PIC_LOWER
, },
240 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
241 [C(RESULT_MISS
)] = { 0x03, PIC_LOWER
, },
244 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
245 [C(RESULT_MISS
)] = { CACHE_OP_UNSUPPORTED
},
250 [C(RESULT_ACCESS
)] = { 0x00, PIC_UPPER
},
251 [C(RESULT_MISS
)] = { 0x02, PIC_LOWER
, },
254 [ C(RESULT_ACCESS
) ] = { CACHE_OP_NONSENSE
},
255 [ C(RESULT_MISS
) ] = { CACHE_OP_NONSENSE
},
257 [ C(OP_PREFETCH
) ] = {
258 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
259 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
264 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
265 [C(RESULT_MISS
)] = { 0x07, PIC_LOWER
, },
268 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
269 [C(RESULT_MISS
)] = { 0x07, PIC_LOWER
, },
272 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
273 [C(RESULT_MISS
)] = { CACHE_OP_UNSUPPORTED
},
278 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
279 [C(RESULT_MISS
)] = { 0x05, PIC_LOWER
, },
282 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
283 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
285 [ C(OP_PREFETCH
) ] = {
286 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
287 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
292 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
293 [C(RESULT_MISS
)] = { 0x04, PIC_LOWER
, },
296 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
297 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
299 [ C(OP_PREFETCH
) ] = {
300 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
301 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
306 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
307 [C(RESULT_MISS
)] = { CACHE_OP_UNSUPPORTED
},
310 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
311 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
313 [ C(OP_PREFETCH
) ] = {
314 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
315 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
320 static const struct sparc_pmu niagara1_pmu
= {
321 .event_map
= niagara1_event_map
,
322 .cache_map
= &niagara1_cache_map
,
323 .max_events
= ARRAY_SIZE(niagara1_perfmon_event_map
),
331 static const struct perf_event_map niagara2_perfmon_event_map
[] = {
332 [PERF_COUNT_HW_CPU_CYCLES
] = { 0x02ff, PIC_UPPER
| PIC_LOWER
},
333 [PERF_COUNT_HW_INSTRUCTIONS
] = { 0x02ff, PIC_UPPER
| PIC_LOWER
},
334 [PERF_COUNT_HW_CACHE_REFERENCES
] = { 0x0208, PIC_UPPER
| PIC_LOWER
},
335 [PERF_COUNT_HW_CACHE_MISSES
] = { 0x0302, PIC_UPPER
| PIC_LOWER
},
336 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS
] = { 0x0201, PIC_UPPER
| PIC_LOWER
},
337 [PERF_COUNT_HW_BRANCH_MISSES
] = { 0x0202, PIC_UPPER
| PIC_LOWER
},
340 static const struct perf_event_map
*niagara2_event_map(int event_id
)
342 return &niagara2_perfmon_event_map
[event_id
];
345 static const cache_map_t niagara2_cache_map
= {
348 [C(RESULT_ACCESS
)] = { 0x0208, PIC_UPPER
| PIC_LOWER
, },
349 [C(RESULT_MISS
)] = { 0x0302, PIC_UPPER
| PIC_LOWER
, },
352 [C(RESULT_ACCESS
)] = { 0x0210, PIC_UPPER
| PIC_LOWER
, },
353 [C(RESULT_MISS
)] = { 0x0302, PIC_UPPER
| PIC_LOWER
, },
356 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
357 [C(RESULT_MISS
)] = { CACHE_OP_UNSUPPORTED
},
362 [C(RESULT_ACCESS
)] = { 0x02ff, PIC_UPPER
| PIC_LOWER
, },
363 [C(RESULT_MISS
)] = { 0x0301, PIC_UPPER
| PIC_LOWER
, },
366 [ C(RESULT_ACCESS
) ] = { CACHE_OP_NONSENSE
},
367 [ C(RESULT_MISS
) ] = { CACHE_OP_NONSENSE
},
369 [ C(OP_PREFETCH
) ] = {
370 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
371 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
376 [C(RESULT_ACCESS
)] = { 0x0208, PIC_UPPER
| PIC_LOWER
, },
377 [C(RESULT_MISS
)] = { 0x0330, PIC_UPPER
| PIC_LOWER
, },
380 [C(RESULT_ACCESS
)] = { 0x0210, PIC_UPPER
| PIC_LOWER
, },
381 [C(RESULT_MISS
)] = { 0x0320, PIC_UPPER
| PIC_LOWER
, },
384 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
385 [C(RESULT_MISS
)] = { CACHE_OP_UNSUPPORTED
},
390 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
391 [C(RESULT_MISS
)] = { 0x0b08, PIC_UPPER
| PIC_LOWER
, },
394 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
395 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
397 [ C(OP_PREFETCH
) ] = {
398 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
399 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
404 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
405 [C(RESULT_MISS
)] = { 0xb04, PIC_UPPER
| PIC_LOWER
, },
408 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
409 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
411 [ C(OP_PREFETCH
) ] = {
412 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
413 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
418 [C(RESULT_ACCESS
)] = { CACHE_OP_UNSUPPORTED
},
419 [C(RESULT_MISS
)] = { CACHE_OP_UNSUPPORTED
},
422 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
423 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
425 [ C(OP_PREFETCH
) ] = {
426 [ C(RESULT_ACCESS
) ] = { CACHE_OP_UNSUPPORTED
},
427 [ C(RESULT_MISS
) ] = { CACHE_OP_UNSUPPORTED
},
432 static const struct sparc_pmu niagara2_pmu
= {
433 .event_map
= niagara2_event_map
,
434 .cache_map
= &niagara2_cache_map
,
435 .max_events
= ARRAY_SIZE(niagara2_perfmon_event_map
),
445 static const struct sparc_pmu
*sparc_pmu __read_mostly
;
447 static u64
event_encoding(u64 event_id
, int idx
)
449 if (idx
== PIC_UPPER_INDEX
)
450 event_id
<<= sparc_pmu
->upper_shift
;
452 event_id
<<= sparc_pmu
->lower_shift
;
456 static u64
mask_for_index(int idx
)
458 return event_encoding(sparc_pmu
->event_mask
, idx
);
461 static u64
nop_for_index(int idx
)
463 return event_encoding(idx
== PIC_UPPER_INDEX
?
464 sparc_pmu
->upper_nop
:
465 sparc_pmu
->lower_nop
, idx
);
468 static inline void sparc_pmu_enable_event(struct cpu_hw_events
*cpuc
, struct hw_perf_event
*hwc
, int idx
)
470 u64 val
, mask
= mask_for_index(idx
);
477 pcr_ops
->write(cpuc
->pcr
);
480 static inline void sparc_pmu_disable_event(struct cpu_hw_events
*cpuc
, struct hw_perf_event
*hwc
, int idx
)
482 u64 mask
= mask_for_index(idx
);
483 u64 nop
= nop_for_index(idx
);
491 pcr_ops
->write(cpuc
->pcr
);
494 void hw_perf_enable(void)
496 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
508 for (i
= 0; i
< MAX_HWEVENTS
; i
++) {
509 struct perf_event
*cp
= cpuc
->events
[i
];
510 struct hw_perf_event
*hwc
;
515 val
|= hwc
->config_base
;
520 pcr_ops
->write(cpuc
->pcr
);
523 void hw_perf_disable(void)
525 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
534 val
&= ~(PCR_UTRACE
| PCR_STRACE
|
535 sparc_pmu
->hv_bit
| sparc_pmu
->irq_bit
);
538 pcr_ops
->write(cpuc
->pcr
);
541 static u32
read_pmc(int idx
)
546 if (idx
== PIC_UPPER_INDEX
)
549 return val
& 0xffffffff;
552 static void write_pmc(int idx
, u64 val
)
554 u64 shift
, mask
, pic
;
557 if (idx
== PIC_UPPER_INDEX
)
560 mask
= ((u64
) 0xffffffff) << shift
;
569 static int sparc_perf_event_set_period(struct perf_event
*event
,
570 struct hw_perf_event
*hwc
, int idx
)
572 s64 left
= atomic64_read(&hwc
->period_left
);
573 s64 period
= hwc
->sample_period
;
576 if (unlikely(left
<= -period
)) {
578 atomic64_set(&hwc
->period_left
, left
);
579 hwc
->last_period
= period
;
583 if (unlikely(left
<= 0)) {
585 atomic64_set(&hwc
->period_left
, left
);
586 hwc
->last_period
= period
;
589 if (left
> MAX_PERIOD
)
592 atomic64_set(&hwc
->prev_count
, (u64
)-left
);
594 write_pmc(idx
, (u64
)(-left
) & 0xffffffff);
596 perf_event_update_userpage(event
);
601 static int sparc_pmu_enable(struct perf_event
*event
)
603 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
604 struct hw_perf_event
*hwc
= &event
->hw
;
607 if (test_and_set_bit(idx
, cpuc
->used_mask
))
610 sparc_pmu_disable_event(cpuc
, hwc
, idx
);
612 cpuc
->events
[idx
] = event
;
613 set_bit(idx
, cpuc
->active_mask
);
615 sparc_perf_event_set_period(event
, hwc
, idx
);
616 sparc_pmu_enable_event(cpuc
, hwc
, idx
);
617 perf_event_update_userpage(event
);
621 static u64
sparc_perf_event_update(struct perf_event
*event
,
622 struct hw_perf_event
*hwc
, int idx
)
625 u64 prev_raw_count
, new_raw_count
;
629 prev_raw_count
= atomic64_read(&hwc
->prev_count
);
630 new_raw_count
= read_pmc(idx
);
632 if (atomic64_cmpxchg(&hwc
->prev_count
, prev_raw_count
,
633 new_raw_count
) != prev_raw_count
)
636 delta
= (new_raw_count
<< shift
) - (prev_raw_count
<< shift
);
639 atomic64_add(delta
, &event
->count
);
640 atomic64_sub(delta
, &hwc
->period_left
);
642 return new_raw_count
;
645 static void sparc_pmu_disable(struct perf_event
*event
)
647 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
648 struct hw_perf_event
*hwc
= &event
->hw
;
651 clear_bit(idx
, cpuc
->active_mask
);
652 sparc_pmu_disable_event(cpuc
, hwc
, idx
);
656 sparc_perf_event_update(event
, hwc
, idx
);
657 cpuc
->events
[idx
] = NULL
;
658 clear_bit(idx
, cpuc
->used_mask
);
660 perf_event_update_userpage(event
);
663 static void sparc_pmu_read(struct perf_event
*event
)
665 struct hw_perf_event
*hwc
= &event
->hw
;
667 sparc_perf_event_update(event
, hwc
, hwc
->idx
);
670 static void sparc_pmu_unthrottle(struct perf_event
*event
)
672 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
673 struct hw_perf_event
*hwc
= &event
->hw
;
675 sparc_pmu_enable_event(cpuc
, hwc
, hwc
->idx
);
678 static atomic_t active_events
= ATOMIC_INIT(0);
679 static DEFINE_MUTEX(pmc_grab_mutex
);
681 static void perf_stop_nmi_watchdog(void *unused
)
683 struct cpu_hw_events
*cpuc
= &__get_cpu_var(cpu_hw_events
);
685 stop_nmi_watchdog(NULL
);
686 cpuc
->pcr
= pcr_ops
->read();
689 void perf_event_grab_pmc(void)
691 if (atomic_inc_not_zero(&active_events
))
694 mutex_lock(&pmc_grab_mutex
);
695 if (atomic_read(&active_events
) == 0) {
696 if (atomic_read(&nmi_active
) > 0) {
697 on_each_cpu(perf_stop_nmi_watchdog
, NULL
, 1);
698 BUG_ON(atomic_read(&nmi_active
) != 0);
700 atomic_inc(&active_events
);
702 mutex_unlock(&pmc_grab_mutex
);
705 void perf_event_release_pmc(void)
707 if (atomic_dec_and_mutex_lock(&active_events
, &pmc_grab_mutex
)) {
708 if (atomic_read(&nmi_active
) == 0)
709 on_each_cpu(start_nmi_watchdog
, NULL
, 1);
710 mutex_unlock(&pmc_grab_mutex
);
714 static const struct perf_event_map
*sparc_map_cache_event(u64 config
)
716 unsigned int cache_type
, cache_op
, cache_result
;
717 const struct perf_event_map
*pmap
;
719 if (!sparc_pmu
->cache_map
)
720 return ERR_PTR(-ENOENT
);
722 cache_type
= (config
>> 0) & 0xff;
723 if (cache_type
>= PERF_COUNT_HW_CACHE_MAX
)
724 return ERR_PTR(-EINVAL
);
726 cache_op
= (config
>> 8) & 0xff;
727 if (cache_op
>= PERF_COUNT_HW_CACHE_OP_MAX
)
728 return ERR_PTR(-EINVAL
);
730 cache_result
= (config
>> 16) & 0xff;
731 if (cache_result
>= PERF_COUNT_HW_CACHE_RESULT_MAX
)
732 return ERR_PTR(-EINVAL
);
734 pmap
= &((*sparc_pmu
->cache_map
)[cache_type
][cache_op
][cache_result
]);
736 if (pmap
->encoding
== CACHE_OP_UNSUPPORTED
)
737 return ERR_PTR(-ENOENT
);
739 if (pmap
->encoding
== CACHE_OP_NONSENSE
)
740 return ERR_PTR(-EINVAL
);
745 static void hw_perf_event_destroy(struct perf_event
*event
)
747 perf_event_release_pmc();
750 /* Make sure all events can be scheduled into the hardware at
751 * the same time. This is simplified by the fact that we only
752 * need to support 2 simultaneous HW events.
754 static int sparc_check_constraints(unsigned long *events
, int n_ev
)
756 if (n_ev
<= perf_max_events
) {
763 perf_event_decode(events
[0], &dummy
, &msk1
);
764 perf_event_decode(events
[1], &dummy
, &msk2
);
766 /* If both events can go on any counter, OK. */
767 if (msk1
== (PIC_UPPER
| PIC_LOWER
) &&
768 msk2
== (PIC_UPPER
| PIC_LOWER
))
771 /* If one event is limited to a specific counter,
772 * and the other can go on both, OK.
774 if ((msk1
== PIC_UPPER
|| msk1
== PIC_LOWER
) &&
775 msk2
== (PIC_UPPER
| PIC_LOWER
))
777 if ((msk2
== PIC_UPPER
|| msk2
== PIC_LOWER
) &&
778 msk1
== (PIC_UPPER
| PIC_LOWER
))
781 /* If the events are fixed to different counters, OK. */
782 if ((msk1
== PIC_UPPER
&& msk2
== PIC_LOWER
) ||
783 (msk1
== PIC_LOWER
&& msk2
== PIC_UPPER
))
786 /* Otherwise, there is a conflict. */
792 static int check_excludes(struct perf_event
**evts
, int n_prev
, int n_new
)
794 int eu
= 0, ek
= 0, eh
= 0;
795 struct perf_event
*event
;
803 for (i
= 0; i
< n
; i
++) {
806 eu
= event
->attr
.exclude_user
;
807 ek
= event
->attr
.exclude_kernel
;
808 eh
= event
->attr
.exclude_hv
;
810 } else if (event
->attr
.exclude_user
!= eu
||
811 event
->attr
.exclude_kernel
!= ek
||
812 event
->attr
.exclude_hv
!= eh
) {
820 static int collect_events(struct perf_event
*group
, int max_count
,
821 struct perf_event
*evts
[], unsigned long *events
)
823 struct perf_event
*event
;
826 if (!is_software_event(group
)) {
830 events
[n
++] = group
->hw
.event_base
;
832 list_for_each_entry(event
, &group
->sibling_list
, group_entry
) {
833 if (!is_software_event(event
) &&
834 event
->state
!= PERF_EVENT_STATE_OFF
) {
838 events
[n
++] = event
->hw
.event_base
;
844 static int __hw_perf_event_init(struct perf_event
*event
)
846 struct perf_event_attr
*attr
= &event
->attr
;
847 struct perf_event
*evts
[MAX_HWEVENTS
];
848 struct hw_perf_event
*hwc
= &event
->hw
;
849 unsigned long events
[MAX_HWEVENTS
];
850 const struct perf_event_map
*pmap
;
854 if (atomic_read(&nmi_active
) < 0)
857 if (attr
->type
== PERF_TYPE_HARDWARE
) {
858 if (attr
->config
>= sparc_pmu
->max_events
)
860 pmap
= sparc_pmu
->event_map(attr
->config
);
861 } else if (attr
->type
== PERF_TYPE_HW_CACHE
) {
862 pmap
= sparc_map_cache_event(attr
->config
);
864 return PTR_ERR(pmap
);
868 /* We save the enable bits in the config_base. So to
869 * turn off sampling just write 'config', and to enable
870 * things write 'config | config_base'.
872 hwc
->config_base
= sparc_pmu
->irq_bit
;
873 if (!attr
->exclude_user
)
874 hwc
->config_base
|= PCR_UTRACE
;
875 if (!attr
->exclude_kernel
)
876 hwc
->config_base
|= PCR_STRACE
;
877 if (!attr
->exclude_hv
)
878 hwc
->config_base
|= sparc_pmu
->hv_bit
;
880 hwc
->event_base
= perf_event_encode(pmap
);
882 enc
= pmap
->encoding
;
885 if (event
->group_leader
!= event
) {
886 n
= collect_events(event
->group_leader
,
892 events
[n
] = hwc
->event_base
;
895 if (check_excludes(evts
, n
, 1))
898 if (sparc_check_constraints(events
, n
+ 1))
901 /* Try to do all error checking before this point, as unwinding
902 * state after grabbing the PMC is difficult.
904 perf_event_grab_pmc();
905 event
->destroy
= hw_perf_event_destroy
;
907 if (!hwc
->sample_period
) {
908 hwc
->sample_period
= MAX_PERIOD
;
909 hwc
->last_period
= hwc
->sample_period
;
910 atomic64_set(&hwc
->period_left
, hwc
->sample_period
);
913 if (pmap
->pic_mask
& PIC_UPPER
) {
914 hwc
->idx
= PIC_UPPER_INDEX
;
915 enc
<<= sparc_pmu
->upper_shift
;
917 hwc
->idx
= PIC_LOWER_INDEX
;
918 enc
<<= sparc_pmu
->lower_shift
;
925 static const struct pmu pmu
= {
926 .enable
= sparc_pmu_enable
,
927 .disable
= sparc_pmu_disable
,
928 .read
= sparc_pmu_read
,
929 .unthrottle
= sparc_pmu_unthrottle
,
932 const struct pmu
*hw_perf_event_init(struct perf_event
*event
)
934 int err
= __hw_perf_event_init(event
);
941 void perf_event_print_debug(void)
950 local_irq_save(flags
);
952 cpu
= smp_processor_id();
954 pcr
= pcr_ops
->read();
958 pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
961 local_irq_restore(flags
);
964 static int __kprobes
perf_event_nmi_handler(struct notifier_block
*self
,
965 unsigned long cmd
, void *__args
)
967 struct die_args
*args
= __args
;
968 struct perf_sample_data data
;
969 struct cpu_hw_events
*cpuc
;
970 struct pt_regs
*regs
;
973 if (!atomic_read(&active_events
))
988 cpuc
= &__get_cpu_var(cpu_hw_events
);
989 for (idx
= 0; idx
< MAX_HWEVENTS
; idx
++) {
990 struct perf_event
*event
= cpuc
->events
[idx
];
991 struct hw_perf_event
*hwc
;
994 if (!test_bit(idx
, cpuc
->active_mask
))
997 val
= sparc_perf_event_update(event
, hwc
, idx
);
998 if (val
& (1ULL << 31))
1001 data
.period
= event
->hw
.last_period
;
1002 if (!sparc_perf_event_set_period(event
, hwc
, idx
))
1005 if (perf_event_overflow(event
, 1, &data
, regs
))
1006 sparc_pmu_disable_event(cpuc
, hwc
, idx
);
1012 static __read_mostly
struct notifier_block perf_event_nmi_notifier
= {
1013 .notifier_call
= perf_event_nmi_handler
,
1016 static bool __init
supported_pmu(void)
1018 if (!strcmp(sparc_pmu_type
, "ultra3") ||
1019 !strcmp(sparc_pmu_type
, "ultra3+") ||
1020 !strcmp(sparc_pmu_type
, "ultra3i") ||
1021 !strcmp(sparc_pmu_type
, "ultra4+")) {
1022 sparc_pmu
= &ultra3_pmu
;
1025 if (!strcmp(sparc_pmu_type
, "niagara")) {
1026 sparc_pmu
= &niagara1_pmu
;
1029 if (!strcmp(sparc_pmu_type
, "niagara2")) {
1030 sparc_pmu
= &niagara2_pmu
;
1036 void __init
init_hw_perf_events(void)
1038 pr_info("Performance events: ");
1040 if (!supported_pmu()) {
1041 pr_cont("No support for PMU type '%s'\n", sparc_pmu_type
);
1045 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type
);
1047 /* All sparc64 PMUs currently have 2 events. But this simple
1048 * driver only supports one active event at a time.
1050 perf_max_events
= 1;
1052 register_die_notifier(&perf_event_nmi_notifier
);