1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2008-2017 Andes Technology Corporation
5 * Reference ARMv7: Jean Pihet <jpihet@mvista.com>
6 * 2010 (c) MontaVista Software, LLC.
9 #include <linux/perf_event.h>
10 #include <linux/bitmap.h>
11 #include <linux/export.h>
12 #include <linux/kernel.h>
14 #include <linux/platform_device.h>
15 #include <linux/slab.h>
16 #include <linux/spinlock.h>
17 #include <linux/pm_runtime.h>
18 #include <linux/ftrace.h>
19 #include <linux/uaccess.h>
20 #include <linux/sched/clock.h>
21 #include <linux/percpu-defs.h>
24 #include <asm/irq_regs.h>
25 #include <asm/nds32.h>
26 #include <asm/stacktrace.h>
27 #include <asm/perf_event.h>
28 #include <nds32_intrinsic.h>
30 /* Set at runtime when we know what CPU type we are. */
31 static struct nds32_pmu
*cpu_pmu
;
33 static DEFINE_PER_CPU(struct pmu_hw_events
, cpu_hw_events
);
34 static void nds32_pmu_start(struct nds32_pmu
*cpu_pmu
);
35 static void nds32_pmu_stop(struct nds32_pmu
*cpu_pmu
);
36 static struct platform_device_id cpu_pmu_plat_device_ids
[] = {
37 {.name
= "nds32-pfm"},
41 static int nds32_pmu_map_cache_event(const unsigned int (*cache_map
)
42 [PERF_COUNT_HW_CACHE_MAX
]
43 [PERF_COUNT_HW_CACHE_OP_MAX
]
44 [PERF_COUNT_HW_CACHE_RESULT_MAX
], u64 config
)
46 unsigned int cache_type
, cache_op
, cache_result
, ret
;
48 cache_type
= (config
>> 0) & 0xff;
49 if (cache_type
>= PERF_COUNT_HW_CACHE_MAX
)
52 cache_op
= (config
>> 8) & 0xff;
53 if (cache_op
>= PERF_COUNT_HW_CACHE_OP_MAX
)
56 cache_result
= (config
>> 16) & 0xff;
57 if (cache_result
>= PERF_COUNT_HW_CACHE_RESULT_MAX
)
60 ret
= (int)(*cache_map
)[cache_type
][cache_op
][cache_result
];
62 if (ret
== CACHE_OP_UNSUPPORTED
)
69 nds32_pmu_map_hw_event(const unsigned int (*event_map
)[PERF_COUNT_HW_MAX
],
74 if (config
>= PERF_COUNT_HW_MAX
)
77 mapping
= (*event_map
)[config
];
78 return mapping
== HW_OP_UNSUPPORTED
? -ENOENT
: mapping
;
81 static int nds32_pmu_map_raw_event(u32 raw_event_mask
, u64 config
)
83 int ev_type
= (int)(config
& raw_event_mask
);
84 int idx
= config
>> 8;
88 ev_type
= PFM_OFFSET_MAGIC_0
+ ev_type
;
89 if (ev_type
>= SPAV3_0_SEL_LAST
|| ev_type
<= SPAV3_0_SEL_BASE
)
93 ev_type
= PFM_OFFSET_MAGIC_1
+ ev_type
;
94 if (ev_type
>= SPAV3_1_SEL_LAST
|| ev_type
<= SPAV3_1_SEL_BASE
)
98 ev_type
= PFM_OFFSET_MAGIC_2
+ ev_type
;
99 if (ev_type
>= SPAV3_2_SEL_LAST
|| ev_type
<= SPAV3_2_SEL_BASE
)
110 nds32_pmu_map_event(struct perf_event
*event
,
111 const unsigned int (*event_map
)[PERF_COUNT_HW_MAX
],
112 const unsigned int (*cache_map
)
113 [PERF_COUNT_HW_CACHE_MAX
]
114 [PERF_COUNT_HW_CACHE_OP_MAX
]
115 [PERF_COUNT_HW_CACHE_RESULT_MAX
], u32 raw_event_mask
)
117 u64 config
= event
->attr
.config
;
119 switch (event
->attr
.type
) {
120 case PERF_TYPE_HARDWARE
:
121 return nds32_pmu_map_hw_event(event_map
, config
);
122 case PERF_TYPE_HW_CACHE
:
123 return nds32_pmu_map_cache_event(cache_map
, config
);
125 return nds32_pmu_map_raw_event(raw_event_mask
, config
);
131 static int nds32_spav3_map_event(struct perf_event
*event
)
133 return nds32_pmu_map_event(event
, &nds32_pfm_perf_map
,
134 &nds32_pfm_perf_cache_map
, SOFTWARE_EVENT_MASK
);
137 static inline u32
nds32_pfm_getreset_flags(void)
139 /* Read overflow status */
140 u32 val
= __nds32__mfsr(NDS32_SR_PFM_CTL
);
143 /* Write overflow bit to clear status, and others keep it 0 */
144 u32 ov_flag
= PFM_CTL_OVF
[0] | PFM_CTL_OVF
[1] | PFM_CTL_OVF
[2];
146 __nds32__mtsr(val
| ov_flag
, NDS32_SR_PFM_CTL
);
151 static inline int nds32_pfm_has_overflowed(u32 pfm
)
153 u32 ov_flag
= PFM_CTL_OVF
[0] | PFM_CTL_OVF
[1] | PFM_CTL_OVF
[2];
155 return pfm
& ov_flag
;
158 static inline int nds32_pfm_counter_has_overflowed(u32 pfm
, int idx
)
164 mask
= PFM_CTL_OVF
[0];
167 mask
= PFM_CTL_OVF
[1];
170 mask
= PFM_CTL_OVF
[2];
173 pr_err("%s index wrong\n", __func__
);
180 * Set the next IRQ period, based on the hwc->period_left value.
181 * To be called with the event disabled in hw:
183 int nds32_pmu_event_set_period(struct perf_event
*event
)
185 struct nds32_pmu
*nds32_pmu
= to_nds32_pmu(event
->pmu
);
186 struct hw_perf_event
*hwc
= &event
->hw
;
187 s64 left
= local64_read(&hwc
->period_left
);
188 s64 period
= hwc
->sample_period
;
191 /* The period may have been changed by PERF_EVENT_IOC_PERIOD */
192 if (unlikely(period
!= hwc
->last_period
))
193 left
= period
- (hwc
->last_period
- left
);
195 if (unlikely(left
<= -period
)) {
197 local64_set(&hwc
->period_left
, left
);
198 hwc
->last_period
= period
;
202 if (unlikely(left
<= 0)) {
204 local64_set(&hwc
->period_left
, left
);
205 hwc
->last_period
= period
;
209 if (left
> (s64
)nds32_pmu
->max_period
)
210 left
= nds32_pmu
->max_period
;
213 * The hw event starts counting from this event offset,
214 * mark it to be able to extract future "deltas":
216 local64_set(&hwc
->prev_count
, (u64
)(-left
));
218 nds32_pmu
->write_counter(event
, (u64
)(-left
) & nds32_pmu
->max_period
);
220 perf_event_update_userpage(event
);
225 static irqreturn_t
nds32_pmu_handle_irq(int irq_num
, void *dev
)
228 struct perf_sample_data data
;
229 struct nds32_pmu
*cpu_pmu
= (struct nds32_pmu
*)dev
;
230 struct pmu_hw_events
*cpuc
= cpu_pmu
->get_hw_events();
231 struct pt_regs
*regs
;
234 * Get and reset the IRQ flags
236 pfm
= nds32_pfm_getreset_flags();
239 * Did an overflow occur?
241 if (!nds32_pfm_has_overflowed(pfm
))
245 * Handle the counter(s) overflow(s)
247 regs
= get_irq_regs();
249 nds32_pmu_stop(cpu_pmu
);
250 for (idx
= 0; idx
< cpu_pmu
->num_events
; ++idx
) {
251 struct perf_event
*event
= cpuc
->events
[idx
];
252 struct hw_perf_event
*hwc
;
254 /* Ignore if we don't have an event. */
259 * We have a single interrupt for all counters. Check that
260 * each counter has overflowed before we process it.
262 if (!nds32_pfm_counter_has_overflowed(pfm
, idx
))
266 nds32_pmu_event_update(event
);
267 perf_sample_data_init(&data
, 0, hwc
->last_period
);
268 if (!nds32_pmu_event_set_period(event
))
271 if (perf_event_overflow(event
, &data
, regs
))
272 cpu_pmu
->disable(event
);
274 nds32_pmu_start(cpu_pmu
);
276 * Handle the pending perf events.
278 * Note: this call *must* be run with interrupts disabled. For
279 * platforms that can have the PMU interrupts raised as an NMI, this
287 static inline int nds32_pfm_counter_valid(struct nds32_pmu
*cpu_pmu
, int idx
)
289 return ((idx
>= 0) && (idx
< cpu_pmu
->num_events
));
292 static inline int nds32_pfm_disable_counter(int idx
)
294 unsigned int val
= __nds32__mfsr(NDS32_SR_PFM_CTL
);
297 mask
= PFM_CTL_EN
[idx
];
299 val
&= ~(PFM_CTL_OVF
[0] | PFM_CTL_OVF
[1] | PFM_CTL_OVF
[2]);
300 __nds32__mtsr_isb(val
, NDS32_SR_PFM_CTL
);
305 * Add an event filter to a given event.
307 static int nds32_pmu_set_event_filter(struct hw_perf_event
*event
,
308 struct perf_event_attr
*attr
)
310 unsigned long config_base
= 0;
311 int idx
= event
->idx
;
312 unsigned long no_kernel_tracing
= 0;
313 unsigned long no_user_tracing
= 0;
314 /* If index is -1, do not do anything */
318 no_kernel_tracing
= PFM_CTL_KS
[idx
];
319 no_user_tracing
= PFM_CTL_KU
[idx
];
321 * Default: enable both kernel and user mode tracing.
323 if (attr
->exclude_user
)
324 config_base
|= no_user_tracing
;
326 if (attr
->exclude_kernel
)
327 config_base
|= no_kernel_tracing
;
330 * Install the filter into config_base as this is used to
331 * construct the event type.
333 event
->config_base
|= config_base
;
337 static inline void nds32_pfm_write_evtsel(int idx
, u32 evnum
)
340 u32 ori_val
= __nds32__mfsr(NDS32_SR_PFM_CTL
);
342 u32 no_kernel_mask
= 0;
343 u32 no_user_mask
= 0;
346 offset
= PFM_CTL_OFFSEL
[idx
];
347 /* Clear previous mode selection, and write new one */
348 no_kernel_mask
= PFM_CTL_KS
[idx
];
349 no_user_mask
= PFM_CTL_KU
[idx
];
350 ori_val
&= ~no_kernel_mask
;
351 ori_val
&= ~no_user_mask
;
352 if (evnum
& no_kernel_mask
)
353 ori_val
|= no_kernel_mask
;
355 if (evnum
& no_user_mask
)
356 ori_val
|= no_user_mask
;
358 /* Clear previous event selection */
359 ev_mask
= PFM_CTL_SEL
[idx
];
361 evnum
&= SOFTWARE_EVENT_MASK
;
363 /* undo the linear mapping */
364 evnum
= get_converted_evet_hw_num(evnum
);
365 val
= ori_val
| (evnum
<< offset
);
366 val
&= ~(PFM_CTL_OVF
[0] | PFM_CTL_OVF
[1] | PFM_CTL_OVF
[2]);
367 __nds32__mtsr_isb(val
, NDS32_SR_PFM_CTL
);
370 static inline int nds32_pfm_enable_counter(int idx
)
372 unsigned int val
= __nds32__mfsr(NDS32_SR_PFM_CTL
);
375 mask
= PFM_CTL_EN
[idx
];
377 val
&= ~(PFM_CTL_OVF
[0] | PFM_CTL_OVF
[1] | PFM_CTL_OVF
[2]);
378 __nds32__mtsr_isb(val
, NDS32_SR_PFM_CTL
);
382 static inline int nds32_pfm_enable_intens(int idx
)
384 unsigned int val
= __nds32__mfsr(NDS32_SR_PFM_CTL
);
387 mask
= PFM_CTL_IE
[idx
];
389 val
&= ~(PFM_CTL_OVF
[0] | PFM_CTL_OVF
[1] | PFM_CTL_OVF
[2]);
390 __nds32__mtsr_isb(val
, NDS32_SR_PFM_CTL
);
394 static inline int nds32_pfm_disable_intens(int idx
)
396 unsigned int val
= __nds32__mfsr(NDS32_SR_PFM_CTL
);
399 mask
= PFM_CTL_IE
[idx
];
401 val
&= ~(PFM_CTL_OVF
[0] | PFM_CTL_OVF
[1] | PFM_CTL_OVF
[2]);
402 __nds32__mtsr_isb(val
, NDS32_SR_PFM_CTL
);
406 static int event_requires_mode_exclusion(struct perf_event_attr
*attr
)
408 /* Other modes NDS32 does not support */
409 return attr
->exclude_user
|| attr
->exclude_kernel
;
412 static void nds32_pmu_enable_event(struct perf_event
*event
)
415 unsigned int evnum
= 0;
416 struct hw_perf_event
*hwc
= &event
->hw
;
417 struct nds32_pmu
*cpu_pmu
= to_nds32_pmu(event
->pmu
);
418 struct pmu_hw_events
*events
= cpu_pmu
->get_hw_events();
421 if (!nds32_pfm_counter_valid(cpu_pmu
, idx
)) {
422 pr_err("CPU enabling wrong pfm counter IRQ enable\n");
427 * Enable counter and interrupt, and set the counter to count
428 * the event that we're interested in.
430 raw_spin_lock_irqsave(&events
->pmu_lock
, flags
);
435 nds32_pfm_disable_counter(idx
);
438 * Check whether we need to exclude the counter from certain modes.
440 if ((!cpu_pmu
->set_event_filter
||
441 cpu_pmu
->set_event_filter(hwc
, &event
->attr
)) &&
442 event_requires_mode_exclusion(&event
->attr
)) {
444 ("NDS32 performance counters do not support mode exclusion\n");
445 hwc
->config_base
= 0;
448 evnum
= hwc
->config_base
;
449 nds32_pfm_write_evtsel(idx
, evnum
);
452 * Enable interrupt for this counter
454 nds32_pfm_enable_intens(idx
);
459 nds32_pfm_enable_counter(idx
);
461 raw_spin_unlock_irqrestore(&events
->pmu_lock
, flags
);
464 static void nds32_pmu_disable_event(struct perf_event
*event
)
467 struct hw_perf_event
*hwc
= &event
->hw
;
468 struct nds32_pmu
*cpu_pmu
= to_nds32_pmu(event
->pmu
);
469 struct pmu_hw_events
*events
= cpu_pmu
->get_hw_events();
472 if (!nds32_pfm_counter_valid(cpu_pmu
, idx
)) {
473 pr_err("CPU disabling wrong pfm counter IRQ enable %d\n", idx
);
478 * Disable counter and interrupt
480 raw_spin_lock_irqsave(&events
->pmu_lock
, flags
);
485 nds32_pfm_disable_counter(idx
);
488 * Disable interrupt for this counter
490 nds32_pfm_disable_intens(idx
);
492 raw_spin_unlock_irqrestore(&events
->pmu_lock
, flags
);
495 static inline u32
nds32_pmu_read_counter(struct perf_event
*event
)
497 struct nds32_pmu
*cpu_pmu
= to_nds32_pmu(event
->pmu
);
498 struct hw_perf_event
*hwc
= &event
->hw
;
502 if (!nds32_pfm_counter_valid(cpu_pmu
, idx
)) {
503 pr_err("CPU reading wrong counter %d\n", idx
);
507 count
= __nds32__mfsr(NDS32_SR_PFMC0
);
510 count
= __nds32__mfsr(NDS32_SR_PFMC1
);
513 count
= __nds32__mfsr(NDS32_SR_PFMC2
);
517 ("%s: CPU has no performance counters %d\n",
524 static inline void nds32_pmu_write_counter(struct perf_event
*event
, u32 value
)
526 struct nds32_pmu
*cpu_pmu
= to_nds32_pmu(event
->pmu
);
527 struct hw_perf_event
*hwc
= &event
->hw
;
530 if (!nds32_pfm_counter_valid(cpu_pmu
, idx
)) {
531 pr_err("CPU writing wrong counter %d\n", idx
);
535 __nds32__mtsr_isb(value
, NDS32_SR_PFMC0
);
538 __nds32__mtsr_isb(value
, NDS32_SR_PFMC1
);
541 __nds32__mtsr_isb(value
, NDS32_SR_PFMC2
);
545 ("%s: CPU has no performance counters %d\n",
551 static int nds32_pmu_get_event_idx(struct pmu_hw_events
*cpuc
,
552 struct perf_event
*event
)
555 struct hw_perf_event
*hwc
= &event
->hw
;
557 * Current implementation maps cycles, instruction count and cache-miss
558 * to specific counter.
559 * However, multiple of the 3 counters are able to count these events.
562 * SOFTWARE_EVENT_MASK mask for getting event num ,
563 * This is defined by Jia-Rung, you can change the polocies.
564 * However, do not exceed 8 bits. This is hardware specific.
565 * The last number is SPAv3_2_SEL_LAST.
567 unsigned long evtype
= hwc
->config_base
& SOFTWARE_EVENT_MASK
;
569 idx
= get_converted_event_idx(evtype
);
571 * Try to get the counter for correpsonding event
573 if (evtype
== SPAV3_0_SEL_TOTAL_CYCLES
) {
574 if (!test_and_set_bit(idx
, cpuc
->used_mask
))
576 if (!test_and_set_bit(NDS32_IDX_COUNTER0
, cpuc
->used_mask
))
577 return NDS32_IDX_COUNTER0
;
578 if (!test_and_set_bit(NDS32_IDX_COUNTER1
, cpuc
->used_mask
))
579 return NDS32_IDX_COUNTER1
;
580 } else if (evtype
== SPAV3_1_SEL_COMPLETED_INSTRUCTION
) {
581 if (!test_and_set_bit(idx
, cpuc
->used_mask
))
583 else if (!test_and_set_bit(NDS32_IDX_COUNTER1
, cpuc
->used_mask
))
584 return NDS32_IDX_COUNTER1
;
585 else if (!test_and_set_bit
586 (NDS32_IDX_CYCLE_COUNTER
, cpuc
->used_mask
))
587 return NDS32_IDX_CYCLE_COUNTER
;
589 if (!test_and_set_bit(idx
, cpuc
->used_mask
))
595 static void nds32_pmu_start(struct nds32_pmu
*cpu_pmu
)
599 struct pmu_hw_events
*events
= cpu_pmu
->get_hw_events();
601 raw_spin_lock_irqsave(&events
->pmu_lock
, flags
);
603 /* Enable all counters , NDS PFM has 3 counters */
604 val
= __nds32__mfsr(NDS32_SR_PFM_CTL
);
605 val
|= (PFM_CTL_EN
[0] | PFM_CTL_EN
[1] | PFM_CTL_EN
[2]);
606 val
&= ~(PFM_CTL_OVF
[0] | PFM_CTL_OVF
[1] | PFM_CTL_OVF
[2]);
607 __nds32__mtsr_isb(val
, NDS32_SR_PFM_CTL
);
609 raw_spin_unlock_irqrestore(&events
->pmu_lock
, flags
);
612 static void nds32_pmu_stop(struct nds32_pmu
*cpu_pmu
)
616 struct pmu_hw_events
*events
= cpu_pmu
->get_hw_events();
618 raw_spin_lock_irqsave(&events
->pmu_lock
, flags
);
620 /* Disable all counters , NDS PFM has 3 counters */
621 val
= __nds32__mfsr(NDS32_SR_PFM_CTL
);
622 val
&= ~(PFM_CTL_EN
[0] | PFM_CTL_EN
[1] | PFM_CTL_EN
[2]);
623 val
&= ~(PFM_CTL_OVF
[0] | PFM_CTL_OVF
[1] | PFM_CTL_OVF
[2]);
624 __nds32__mtsr_isb(val
, NDS32_SR_PFM_CTL
);
626 raw_spin_unlock_irqrestore(&events
->pmu_lock
, flags
);
629 static void nds32_pmu_reset(void *info
)
633 val
|= (PFM_CTL_OVF
[0] | PFM_CTL_OVF
[1] | PFM_CTL_OVF
[2]);
634 __nds32__mtsr(val
, NDS32_SR_PFM_CTL
);
635 __nds32__mtsr(0, NDS32_SR_PFM_CTL
);
636 __nds32__mtsr(0, NDS32_SR_PFMC0
);
637 __nds32__mtsr(0, NDS32_SR_PFMC1
);
638 __nds32__mtsr(0, NDS32_SR_PFMC2
);
641 static void nds32_pmu_init(struct nds32_pmu
*cpu_pmu
)
643 cpu_pmu
->handle_irq
= nds32_pmu_handle_irq
;
644 cpu_pmu
->enable
= nds32_pmu_enable_event
;
645 cpu_pmu
->disable
= nds32_pmu_disable_event
;
646 cpu_pmu
->read_counter
= nds32_pmu_read_counter
;
647 cpu_pmu
->write_counter
= nds32_pmu_write_counter
;
648 cpu_pmu
->get_event_idx
= nds32_pmu_get_event_idx
;
649 cpu_pmu
->start
= nds32_pmu_start
;
650 cpu_pmu
->stop
= nds32_pmu_stop
;
651 cpu_pmu
->reset
= nds32_pmu_reset
;
652 cpu_pmu
->max_period
= 0xFFFFFFFF; /* Maximum counts */
655 static u32
nds32_read_num_pfm_events(void)
657 /* NDS32 SPAv3 PMU support 3 counter */
661 static int device_pmu_init(struct nds32_pmu
*cpu_pmu
)
663 nds32_pmu_init(cpu_pmu
);
665 * This name should be devive-specific name, whatever you like :)
666 * I think "PMU" will be a good generic name.
668 cpu_pmu
->name
= "nds32v3-pmu";
669 cpu_pmu
->map_event
= nds32_spav3_map_event
;
670 cpu_pmu
->num_events
= nds32_read_num_pfm_events();
671 cpu_pmu
->set_event_filter
= nds32_pmu_set_event_filter
;
676 * CPU PMU identification and probing.
678 static int probe_current_pmu(struct nds32_pmu
*pmu
)
685 * If ther are various CPU types with its own PMU, initialize with
687 * the corresponding one
689 device_pmu_init(pmu
);
694 static void nds32_pmu_enable(struct pmu
*pmu
)
696 struct nds32_pmu
*nds32_pmu
= to_nds32_pmu(pmu
);
697 struct pmu_hw_events
*hw_events
= nds32_pmu
->get_hw_events();
698 int enabled
= bitmap_weight(hw_events
->used_mask
,
699 nds32_pmu
->num_events
);
702 nds32_pmu
->start(nds32_pmu
);
705 static void nds32_pmu_disable(struct pmu
*pmu
)
707 struct nds32_pmu
*nds32_pmu
= to_nds32_pmu(pmu
);
709 nds32_pmu
->stop(nds32_pmu
);
712 static void nds32_pmu_release_hardware(struct nds32_pmu
*nds32_pmu
)
714 nds32_pmu
->free_irq(nds32_pmu
);
715 pm_runtime_put_sync(&nds32_pmu
->plat_device
->dev
);
718 static irqreturn_t
nds32_pmu_dispatch_irq(int irq
, void *dev
)
720 struct nds32_pmu
*nds32_pmu
= (struct nds32_pmu
*)dev
;
722 u64 start_clock
, finish_clock
;
724 start_clock
= local_clock();
725 ret
= nds32_pmu
->handle_irq(irq
, dev
);
726 finish_clock
= local_clock();
728 perf_sample_event_took(finish_clock
- start_clock
);
732 static int nds32_pmu_reserve_hardware(struct nds32_pmu
*nds32_pmu
)
735 struct platform_device
*pmu_device
= nds32_pmu
->plat_device
;
740 pm_runtime_get_sync(&pmu_device
->dev
);
741 err
= nds32_pmu
->request_irq(nds32_pmu
, nds32_pmu_dispatch_irq
);
743 nds32_pmu_release_hardware(nds32_pmu
);
751 validate_event(struct pmu
*pmu
, struct pmu_hw_events
*hw_events
,
752 struct perf_event
*event
)
754 struct nds32_pmu
*nds32_pmu
= to_nds32_pmu(event
->pmu
);
756 if (is_software_event(event
))
759 if (event
->pmu
!= pmu
)
762 if (event
->state
< PERF_EVENT_STATE_OFF
)
765 if (event
->state
== PERF_EVENT_STATE_OFF
&& !event
->attr
.enable_on_exec
)
768 return nds32_pmu
->get_event_idx(hw_events
, event
) >= 0;
771 static int validate_group(struct perf_event
*event
)
773 struct perf_event
*sibling
, *leader
= event
->group_leader
;
774 struct pmu_hw_events fake_pmu
;
775 DECLARE_BITMAP(fake_used_mask
, MAX_COUNTERS
);
777 * Initialize the fake PMU. We only need to populate the
778 * used_mask for the purposes of validation.
780 memset(fake_used_mask
, 0, sizeof(fake_used_mask
));
782 if (!validate_event(event
->pmu
, &fake_pmu
, leader
))
785 for_each_sibling_event(sibling
, leader
) {
786 if (!validate_event(event
->pmu
, &fake_pmu
, sibling
))
790 if (!validate_event(event
->pmu
, &fake_pmu
, event
))
796 static int __hw_perf_event_init(struct perf_event
*event
)
798 struct nds32_pmu
*nds32_pmu
= to_nds32_pmu(event
->pmu
);
799 struct hw_perf_event
*hwc
= &event
->hw
;
802 mapping
= nds32_pmu
->map_event(event
);
805 pr_debug("event %x:%llx not supported\n", event
->attr
.type
,
811 * We don't assign an index until we actually place the event onto
812 * hardware. Use -1 to signify that we haven't decided where to put it
813 * yet. For SMP systems, each core has it's own PMU so we can't do any
814 * clever allocation or constraints checking at this point.
817 hwc
->config_base
= 0;
822 * Check whether we need to exclude the counter from certain modes.
824 if ((!nds32_pmu
->set_event_filter
||
825 nds32_pmu
->set_event_filter(hwc
, &event
->attr
)) &&
826 event_requires_mode_exclusion(&event
->attr
)) {
828 ("NDS performance counters do not support mode exclusion\n");
833 * Store the event encoding into the config_base field.
835 hwc
->config_base
|= (unsigned long)mapping
;
837 if (!hwc
->sample_period
) {
839 * For non-sampling runs, limit the sample_period to half
840 * of the counter width. That way, the new counter value
841 * is far less likely to overtake the previous one unless
842 * you have some serious IRQ latency issues.
844 hwc
->sample_period
= nds32_pmu
->max_period
>> 1;
845 hwc
->last_period
= hwc
->sample_period
;
846 local64_set(&hwc
->period_left
, hwc
->sample_period
);
849 if (event
->group_leader
!= event
) {
850 if (validate_group(event
) != 0)
857 static int nds32_pmu_event_init(struct perf_event
*event
)
859 struct nds32_pmu
*nds32_pmu
= to_nds32_pmu(event
->pmu
);
861 atomic_t
*active_events
= &nds32_pmu
->active_events
;
863 /* does not support taken branch sampling */
864 if (has_branch_stack(event
))
867 if (nds32_pmu
->map_event(event
) == -ENOENT
)
870 if (!atomic_inc_not_zero(active_events
)) {
871 if (atomic_read(active_events
) == 0) {
872 /* Register irq handler */
873 err
= nds32_pmu_reserve_hardware(nds32_pmu
);
877 atomic_inc(active_events
);
883 err
= __hw_perf_event_init(event
);
888 static void nds32_start(struct perf_event
*event
, int flags
)
890 struct nds32_pmu
*nds32_pmu
= to_nds32_pmu(event
->pmu
);
891 struct hw_perf_event
*hwc
= &event
->hw
;
893 * NDS pmu always has to reprogram the period, so ignore
894 * PERF_EF_RELOAD, see the comment below.
896 if (flags
& PERF_EF_RELOAD
)
897 WARN_ON_ONCE(!(hwc
->state
& PERF_HES_UPTODATE
));
900 /* Set the period for the event. */
901 nds32_pmu_event_set_period(event
);
903 nds32_pmu
->enable(event
);
906 static int nds32_pmu_add(struct perf_event
*event
, int flags
)
908 struct nds32_pmu
*nds32_pmu
= to_nds32_pmu(event
->pmu
);
909 struct pmu_hw_events
*hw_events
= nds32_pmu
->get_hw_events();
910 struct hw_perf_event
*hwc
= &event
->hw
;
914 perf_pmu_disable(event
->pmu
);
916 /* If we don't have a space for the counter then finish early. */
917 idx
= nds32_pmu
->get_event_idx(hw_events
, event
);
924 * If there is an event in the counter we are going to use then make
925 * sure it is disabled.
928 nds32_pmu
->disable(event
);
929 hw_events
->events
[idx
] = event
;
931 hwc
->state
= PERF_HES_STOPPED
| PERF_HES_UPTODATE
;
932 if (flags
& PERF_EF_START
)
933 nds32_start(event
, PERF_EF_RELOAD
);
935 /* Propagate our changes to the userspace mapping. */
936 perf_event_update_userpage(event
);
939 perf_pmu_enable(event
->pmu
);
943 u64
nds32_pmu_event_update(struct perf_event
*event
)
945 struct nds32_pmu
*nds32_pmu
= to_nds32_pmu(event
->pmu
);
946 struct hw_perf_event
*hwc
= &event
->hw
;
947 u64 delta
, prev_raw_count
, new_raw_count
;
950 prev_raw_count
= local64_read(&hwc
->prev_count
);
951 new_raw_count
= nds32_pmu
->read_counter(event
);
953 if (local64_cmpxchg(&hwc
->prev_count
, prev_raw_count
,
954 new_raw_count
) != prev_raw_count
) {
958 * Whether overflow or not, "unsigned substraction"
959 * will always get their delta
961 delta
= (new_raw_count
- prev_raw_count
) & nds32_pmu
->max_period
;
963 local64_add(delta
, &event
->count
);
964 local64_sub(delta
, &hwc
->period_left
);
966 return new_raw_count
;
969 static void nds32_stop(struct perf_event
*event
, int flags
)
971 struct nds32_pmu
*nds32_pmu
= to_nds32_pmu(event
->pmu
);
972 struct hw_perf_event
*hwc
= &event
->hw
;
974 * NDS pmu always has to update the counter, so ignore
975 * PERF_EF_UPDATE, see comments in nds32_start().
977 if (!(hwc
->state
& PERF_HES_STOPPED
)) {
978 nds32_pmu
->disable(event
);
979 nds32_pmu_event_update(event
);
980 hwc
->state
|= PERF_HES_STOPPED
| PERF_HES_UPTODATE
;
984 static void nds32_pmu_del(struct perf_event
*event
, int flags
)
986 struct nds32_pmu
*nds32_pmu
= to_nds32_pmu(event
->pmu
);
987 struct pmu_hw_events
*hw_events
= nds32_pmu
->get_hw_events();
988 struct hw_perf_event
*hwc
= &event
->hw
;
991 nds32_stop(event
, PERF_EF_UPDATE
);
992 hw_events
->events
[idx
] = NULL
;
993 clear_bit(idx
, hw_events
->used_mask
);
995 perf_event_update_userpage(event
);
998 static void nds32_pmu_read(struct perf_event
*event
)
1000 nds32_pmu_event_update(event
);
1003 /* Please refer to SPAv3 for more hardware specific details */
1004 PMU_FORMAT_ATTR(event
, "config:0-63");
1006 static struct attribute
*nds32_arch_formats_attr
[] = {
1007 &format_attr_event
.attr
,
1011 static struct attribute_group nds32_pmu_format_group
= {
1013 .attrs
= nds32_arch_formats_attr
,
1016 static ssize_t
nds32_pmu_cpumask_show(struct device
*dev
,
1017 struct device_attribute
*attr
,
1023 static DEVICE_ATTR(cpus
, 0444, nds32_pmu_cpumask_show
, NULL
);
1025 static struct attribute
*nds32_pmu_common_attrs
[] = {
1026 &dev_attr_cpus
.attr
,
1030 static struct attribute_group nds32_pmu_common_group
= {
1031 .attrs
= nds32_pmu_common_attrs
,
1034 static const struct attribute_group
*nds32_pmu_attr_groups
[] = {
1035 &nds32_pmu_format_group
,
1036 &nds32_pmu_common_group
,
1040 static void nds32_init(struct nds32_pmu
*nds32_pmu
)
1042 atomic_set(&nds32_pmu
->active_events
, 0);
1044 nds32_pmu
->pmu
= (struct pmu
) {
1045 .pmu_enable
= nds32_pmu_enable
,
1046 .pmu_disable
= nds32_pmu_disable
,
1047 .attr_groups
= nds32_pmu_attr_groups
,
1048 .event_init
= nds32_pmu_event_init
,
1049 .add
= nds32_pmu_add
,
1050 .del
= nds32_pmu_del
,
1051 .start
= nds32_start
,
1053 .read
= nds32_pmu_read
,
1057 int nds32_pmu_register(struct nds32_pmu
*nds32_pmu
, int type
)
1059 nds32_init(nds32_pmu
);
1060 pm_runtime_enable(&nds32_pmu
->plat_device
->dev
);
1061 pr_info("enabled with %s PMU driver, %d counters available\n",
1062 nds32_pmu
->name
, nds32_pmu
->num_events
);
1063 return perf_pmu_register(&nds32_pmu
->pmu
, nds32_pmu
->name
, type
);
1066 static struct pmu_hw_events
*cpu_pmu_get_cpu_events(void)
1068 return this_cpu_ptr(&cpu_hw_events
);
1071 static int cpu_pmu_request_irq(struct nds32_pmu
*cpu_pmu
, irq_handler_t handler
)
1074 struct platform_device
*pmu_device
= cpu_pmu
->plat_device
;
1079 irqs
= min(pmu_device
->num_resources
, num_possible_cpus());
1081 pr_err("no irqs for PMUs defined\n");
1085 irq
= platform_get_irq(pmu_device
, 0);
1086 err
= request_irq(irq
, handler
, IRQF_NOBALANCING
, "nds32-pfm",
1089 pr_err("unable to request IRQ%d for NDS PMU counters\n",
1096 static void cpu_pmu_free_irq(struct nds32_pmu
*cpu_pmu
)
1099 struct platform_device
*pmu_device
= cpu_pmu
->plat_device
;
1101 irq
= platform_get_irq(pmu_device
, 0);
1103 free_irq(irq
, cpu_pmu
);
1106 static void cpu_pmu_init(struct nds32_pmu
*cpu_pmu
)
1109 struct pmu_hw_events
*events
= &per_cpu(cpu_hw_events
, cpu
);
1111 raw_spin_lock_init(&events
->pmu_lock
);
1113 cpu_pmu
->get_hw_events
= cpu_pmu_get_cpu_events
;
1114 cpu_pmu
->request_irq
= cpu_pmu_request_irq
;
1115 cpu_pmu
->free_irq
= cpu_pmu_free_irq
;
1117 /* Ensure the PMU has sane values out of reset. */
1119 on_each_cpu(cpu_pmu
->reset
, cpu_pmu
, 1);
1122 static const struct of_device_id cpu_pmu_of_device_ids
[] = {
1123 {.compatible
= "andestech,nds32v3-pmu",
1124 .data
= device_pmu_init
},
1128 static int cpu_pmu_device_probe(struct platform_device
*pdev
)
1130 const struct of_device_id
*of_id
;
1131 int (*init_fn
)(struct nds32_pmu
*nds32_pmu
);
1132 struct device_node
*node
= pdev
->dev
.of_node
;
1133 struct nds32_pmu
*pmu
;
1137 pr_notice("[perf] attempt to register multiple PMU devices!\n");
1141 pmu
= kzalloc(sizeof(*pmu
), GFP_KERNEL
);
1145 of_id
= of_match_node(cpu_pmu_of_device_ids
, pdev
->dev
.of_node
);
1146 if (node
&& of_id
) {
1147 init_fn
= of_id
->data
;
1150 ret
= probe_current_pmu(pmu
);
1154 pr_notice("[perf] failed to probe PMU!\n");
1159 cpu_pmu
->plat_device
= pdev
;
1160 cpu_pmu_init(cpu_pmu
);
1161 ret
= nds32_pmu_register(cpu_pmu
, PERF_TYPE_RAW
);
1167 pr_notice("[perf] failed to register PMU devices!\n");
1172 static struct platform_driver cpu_pmu_driver
= {
1174 .name
= "nds32-pfm",
1175 .of_match_table
= cpu_pmu_of_device_ids
,
1177 .probe
= cpu_pmu_device_probe
,
1178 .id_table
= cpu_pmu_plat_device_ids
,
1181 static int __init
register_pmu_driver(void)
1185 err
= platform_driver_register(&cpu_pmu_driver
);
1187 pr_notice("[perf] PMU initialization failed\n");
1189 pr_notice("[perf] PMU initialization done\n");
1194 device_initcall(register_pmu_driver
);
1197 * References: arch/nds32/kernel/traps.c:__dump()
1198 * You will need to know the NDS ABI first.
1200 static int unwind_frame_kernel(struct stackframe
*frame
)
1203 #ifdef CONFIG_FRAME_POINTER
1204 /* 0x3 means misalignment */
1205 if (!kstack_end((void *)frame
->fp
) &&
1206 !((unsigned long)frame
->fp
& 0x3) &&
1207 ((unsigned long)frame
->fp
>= TASK_SIZE
)) {
1209 * The array index is based on the ABI, the below graph
1210 * illustrate the reasons.
1211 * Function call procedure: "smw" and "lmw" will always
1212 * update SP and FP for you automatically.
1214 * Stack Relative Address
1217 * |LP| <-- SP(before smw) <-- FP(after smw) -1
1221 * | | <-- SP(after smw) -3
1223 frame
->lp
= ((unsigned long *)frame
->fp
)[-1];
1224 frame
->fp
= ((unsigned long *)frame
->fp
)[FP_OFFSET
];
1225 /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
1226 if (__kernel_text_address(frame
->lp
))
1227 frame
->lp
= ftrace_graph_ret_addr
1228 (NULL
, &graph
, frame
->lp
, NULL
);
1236 * You can refer to arch/nds32/kernel/traps.c:__dump()
1237 * Treat "sp" as "fp", but the "sp" is one frame ahead of "fp".
1238 * And, the "sp" is not always correct.
1240 * Stack Relative Address
1243 * |LP| <-- SP(before smw) -1
1245 * | | <-- SP(after smw) -2
1248 if (!kstack_end((void *)frame
->sp
)) {
1249 frame
->lp
= ((unsigned long *)frame
->sp
)[1];
1250 /* TODO: How to deal with the value in first
1251 * "sp" is not correct?
1253 if (__kernel_text_address(frame
->lp
))
1254 frame
->lp
= ftrace_graph_ret_addr
1255 (tsk
, &graph
, frame
->lp
, NULL
);
1257 frame
->sp
= ((unsigned long *)frame
->sp
) + 1;
1267 walk_stackframe(struct stackframe
*frame
,
1268 int (*fn_record
)(struct stackframe
*, void *),
1274 if (fn_record(frame
, data
))
1277 ret
= unwind_frame_kernel(frame
);
1284 * Gets called by walk_stackframe() for every stackframe. This will be called
1285 * whist unwinding the stackframe and is like a subroutine return so we use
1288 static int callchain_trace(struct stackframe
*fr
, void *data
)
1290 struct perf_callchain_entry_ctx
*entry
= data
;
1292 perf_callchain_store(entry
, fr
->lp
);
1297 * Get the return address for a single stackframe and return a pointer to the
1300 static unsigned long
1301 user_backtrace(struct perf_callchain_entry_ctx
*entry
, unsigned long fp
)
1303 struct frame_tail buftail
;
1304 unsigned long lp
= 0;
1305 unsigned long *user_frame_tail
=
1306 (unsigned long *)(fp
- (unsigned long)sizeof(buftail
));
1308 /* Check accessibility of one struct frame_tail beyond */
1309 if (!access_ok(user_frame_tail
, sizeof(buftail
)))
1311 if (__copy_from_user_inatomic
1312 (&buftail
, user_frame_tail
, sizeof(buftail
)))
1316 * Refer to unwind_frame_kernel() for more illurstration
1318 lp
= buftail
.stack_lp
; /* ((unsigned long *)fp)[-1] */
1319 fp
= buftail
.stack_fp
; /* ((unsigned long *)fp)[FP_OFFSET] */
1320 perf_callchain_store(entry
, lp
);
1324 static unsigned long
1325 user_backtrace_opt_size(struct perf_callchain_entry_ctx
*entry
,
1328 struct frame_tail_opt_size buftail
;
1329 unsigned long lp
= 0;
1331 unsigned long *user_frame_tail
=
1332 (unsigned long *)(fp
- (unsigned long)sizeof(buftail
));
1334 /* Check accessibility of one struct frame_tail beyond */
1335 if (!access_ok(user_frame_tail
, sizeof(buftail
)))
1337 if (__copy_from_user_inatomic
1338 (&buftail
, user_frame_tail
, sizeof(buftail
)))
1342 * Refer to unwind_frame_kernel() for more illurstration
1344 lp
= buftail
.stack_lp
; /* ((unsigned long *)fp)[-1] */
1345 fp
= buftail
.stack_fp
; /* ((unsigned long *)fp)[FP_OFFSET] */
1347 perf_callchain_store(entry
, lp
);
1352 * This will be called when the target is in user mode
1353 * This function will only be called when we use
1354 * "PERF_SAMPLE_CALLCHAIN" in
1355 * kernel/events/core.c:perf_prepare_sample()
1357 * How to trigger perf_callchain_[user/kernel] :
1358 * $ perf record -e cpu-clock --call-graph fp ./program
1359 * $ perf report --call-graph
1361 unsigned long leaf_fp
;
1363 perf_callchain_user(struct perf_callchain_entry_ctx
*entry
,
1364 struct pt_regs
*regs
)
1366 unsigned long fp
= 0;
1367 unsigned long gp
= 0;
1368 unsigned long lp
= 0;
1369 unsigned long sp
= 0;
1370 unsigned long *user_frame_tail
;
1374 if (perf_guest_cbs
&& perf_guest_cbs
->is_in_guest()) {
1375 /* We don't support guest os callchain now */
1379 perf_callchain_store(entry
, regs
->ipc
);
1384 if (entry
->nr
< PERF_MAX_STACK_DEPTH
&&
1385 (unsigned long)fp
&& !((unsigned long)fp
& 0x7) && fp
> sp
) {
1387 (unsigned long *)(fp
- (unsigned long)sizeof(fp
));
1389 if (!access_ok(user_frame_tail
, sizeof(fp
)))
1392 if (__copy_from_user_inatomic
1393 (&leaf_fp
, user_frame_tail
, sizeof(fp
)))
1396 if (leaf_fp
== lp
) {
1398 * Maybe this is non leaf function
1399 * with optimize for size,
1400 * or maybe this is the function
1401 * with optimize for size
1403 struct frame_tail buftail
;
1406 (unsigned long *)(fp
-
1407 (unsigned long)sizeof(buftail
));
1409 if (!access_ok(user_frame_tail
, sizeof(buftail
)))
1412 if (__copy_from_user_inatomic
1413 (&buftail
, user_frame_tail
, sizeof(buftail
)))
1416 if (buftail
.stack_fp
== gp
) {
1417 /* non leaf function with optimize
1418 * for size condition
1420 struct frame_tail_opt_size buftail_opt_size
;
1423 (unsigned long *)(fp
- (unsigned long)
1424 sizeof(buftail_opt_size
));
1426 if (!access_ok(user_frame_tail
,
1427 sizeof(buftail_opt_size
)))
1430 if (__copy_from_user_inatomic
1431 (&buftail_opt_size
, user_frame_tail
,
1432 sizeof(buftail_opt_size
)))
1435 perf_callchain_store(entry
, lp
);
1436 fp
= buftail_opt_size
.stack_fp
;
1438 while ((entry
->nr
< PERF_MAX_STACK_DEPTH
) &&
1439 (unsigned long)fp
&&
1440 !((unsigned long)fp
& 0x7) &&
1443 fp
= user_backtrace_opt_size(entry
, fp
);
1447 /* this is the function
1448 * without optimize for size
1450 fp
= buftail
.stack_fp
;
1451 perf_callchain_store(entry
, lp
);
1452 while ((entry
->nr
< PERF_MAX_STACK_DEPTH
) &&
1453 (unsigned long)fp
&&
1454 !((unsigned long)fp
& 0x7) &&
1457 fp
= user_backtrace(entry
, fp
);
1461 /* this is leaf function */
1463 perf_callchain_store(entry
, lp
);
1465 /* previous function callcahin */
1466 while ((entry
->nr
< PERF_MAX_STACK_DEPTH
) &&
1467 (unsigned long)fp
&&
1468 !((unsigned long)fp
& 0x7) && fp
> sp
) {
1470 fp
= user_backtrace(entry
, fp
);
1477 /* This will be called when the target is in kernel mode */
1479 perf_callchain_kernel(struct perf_callchain_entry_ctx
*entry
,
1480 struct pt_regs
*regs
)
1482 struct stackframe fr
;
1484 if (perf_guest_cbs
&& perf_guest_cbs
->is_in_guest()) {
1485 /* We don't support guest os callchain now */
1491 walk_stackframe(&fr
, callchain_trace
, entry
);
1494 unsigned long perf_instruction_pointer(struct pt_regs
*regs
)
1496 /* However, NDS32 does not support virtualization */
1497 if (perf_guest_cbs
&& perf_guest_cbs
->is_in_guest())
1498 return perf_guest_cbs
->get_guest_ip();
1500 return instruction_pointer(regs
);
1503 unsigned long perf_misc_flags(struct pt_regs
*regs
)
1507 /* However, NDS32 does not support virtualization */
1508 if (perf_guest_cbs
&& perf_guest_cbs
->is_in_guest()) {
1509 if (perf_guest_cbs
->is_user_mode())
1510 misc
|= PERF_RECORD_MISC_GUEST_USER
;
1512 misc
|= PERF_RECORD_MISC_GUEST_KERNEL
;
1514 if (user_mode(regs
))
1515 misc
|= PERF_RECORD_MISC_USER
;
1517 misc
|= PERF_RECORD_MISC_KERNEL
;