1 // SPDX-License-Identifier: GPL-2.0
3 * RISC-V performance counter support.
5 * Copyright (C) 2021 Western Digital Corporation or its affiliates.
7 * This implementation is based on old RISC-V perf and ARM perf event code
8 * which are in turn based on sparc64 and x86 code.
11 #include <linux/cpumask.h>
12 #include <linux/irq.h>
13 #include <linux/irqdesc.h>
14 #include <linux/perf/riscv_pmu.h>
15 #include <linux/printk.h>
16 #include <linux/smp.h>
17 #include <linux/sched_clock.h>
21 static bool riscv_perf_user_access(struct perf_event
*event
)
23 return ((event
->attr
.type
== PERF_TYPE_HARDWARE
) ||
24 (event
->attr
.type
== PERF_TYPE_HW_CACHE
) ||
25 (event
->attr
.type
== PERF_TYPE_RAW
)) &&
26 !!(event
->hw
.flags
& PERF_EVENT_FLAG_USER_READ_CNT
) &&
27 (event
->hw
.idx
!= -1);
30 void arch_perf_update_userpage(struct perf_event
*event
,
31 struct perf_event_mmap_page
*userpg
, u64 now
)
33 struct clock_read_data
*rd
;
37 userpg
->cap_user_time
= 0;
38 userpg
->cap_user_time_zero
= 0;
39 userpg
->cap_user_time_short
= 0;
40 userpg
->cap_user_rdpmc
= riscv_perf_user_access(event
);
43 * The counters are 64-bit but the priv spec doesn't mandate all the
44 * bits to be implemented: that's why, counter width can vary based on
47 if (userpg
->cap_user_rdpmc
)
48 userpg
->pmc_width
= to_riscv_pmu(event
->pmu
)->ctr_get_width(event
->hw
.idx
) + 1;
51 rd
= sched_clock_read_begin(&seq
);
53 userpg
->time_mult
= rd
->mult
;
54 userpg
->time_shift
= rd
->shift
;
55 userpg
->time_zero
= rd
->epoch_ns
;
56 userpg
->time_cycles
= rd
->epoch_cyc
;
57 userpg
->time_mask
= rd
->sched_clock_mask
;
60 * Subtract the cycle base, such that software that
61 * doesn't know about cap_user_time_short still 'works'
64 ns
= mul_u64_u32_shr(rd
->epoch_cyc
, rd
->mult
, rd
->shift
);
65 userpg
->time_zero
-= ns
;
67 } while (sched_clock_read_retry(seq
));
69 userpg
->time_offset
= userpg
->time_zero
- now
;
72 * time_shift is not expected to be greater than 31 due to
73 * the original published conversion algorithm shifting a
74 * 32-bit value (now specifies a 64-bit value) - refer
75 * perf_event_mmap_page documentation in perf_event.h.
77 if (userpg
->time_shift
== 32) {
78 userpg
->time_shift
= 31;
79 userpg
->time_mult
>>= 1;
83 * Internal timekeeping for enabled/running/stopped times
84 * is always computed with the sched_clock.
86 userpg
->cap_user_time
= 1;
87 userpg
->cap_user_time_zero
= 1;
88 userpg
->cap_user_time_short
= 1;
91 static unsigned long csr_read_num(int csr_num
)
93 #define switchcase_csr_read(__csr_num, __val) {\
95 __val = csr_read(__csr_num); \
97 #define switchcase_csr_read_2(__csr_num, __val) {\
98 switchcase_csr_read(__csr_num + 0, __val) \
99 switchcase_csr_read(__csr_num + 1, __val)}
100 #define switchcase_csr_read_4(__csr_num, __val) {\
101 switchcase_csr_read_2(__csr_num + 0, __val) \
102 switchcase_csr_read_2(__csr_num + 2, __val)}
103 #define switchcase_csr_read_8(__csr_num, __val) {\
104 switchcase_csr_read_4(__csr_num + 0, __val) \
105 switchcase_csr_read_4(__csr_num + 4, __val)}
106 #define switchcase_csr_read_16(__csr_num, __val) {\
107 switchcase_csr_read_8(__csr_num + 0, __val) \
108 switchcase_csr_read_8(__csr_num + 8, __val)}
109 #define switchcase_csr_read_32(__csr_num, __val) {\
110 switchcase_csr_read_16(__csr_num + 0, __val) \
111 switchcase_csr_read_16(__csr_num + 16, __val)}
113 unsigned long ret
= 0;
116 switchcase_csr_read_32(CSR_CYCLE
, ret
)
117 switchcase_csr_read_32(CSR_CYCLEH
, ret
)
123 #undef switchcase_csr_read_32
124 #undef switchcase_csr_read_16
125 #undef switchcase_csr_read_8
126 #undef switchcase_csr_read_4
127 #undef switchcase_csr_read_2
128 #undef switchcase_csr_read
132 * Read the CSR of a corresponding counter.
134 unsigned long riscv_pmu_ctr_read_csr(unsigned long csr
)
136 if (csr
< CSR_CYCLE
|| csr
> CSR_HPMCOUNTER31H
||
137 (csr
> CSR_HPMCOUNTER31
&& csr
< CSR_CYCLEH
)) {
138 pr_err("Invalid performance counter csr %lx\n", csr
);
142 return csr_read_num(csr
);
145 u64
riscv_pmu_ctr_get_width_mask(struct perf_event
*event
)
148 struct riscv_pmu
*rvpmu
= to_riscv_pmu(event
->pmu
);
149 struct hw_perf_event
*hwc
= &event
->hw
;
152 /* Handle init case where idx is not initialized yet */
153 cwidth
= rvpmu
->ctr_get_width(0);
155 cwidth
= rvpmu
->ctr_get_width(hwc
->idx
);
157 return GENMASK_ULL(cwidth
, 0);
160 u64
riscv_pmu_event_update(struct perf_event
*event
)
162 struct riscv_pmu
*rvpmu
= to_riscv_pmu(event
->pmu
);
163 struct hw_perf_event
*hwc
= &event
->hw
;
164 u64 prev_raw_count
, new_raw_count
;
168 if (!rvpmu
->ctr_read
|| (hwc
->state
& PERF_HES_UPTODATE
))
171 cmask
= riscv_pmu_ctr_get_width_mask(event
);
174 prev_raw_count
= local64_read(&hwc
->prev_count
);
175 new_raw_count
= rvpmu
->ctr_read(event
);
176 oldval
= local64_cmpxchg(&hwc
->prev_count
, prev_raw_count
,
178 } while (oldval
!= prev_raw_count
);
180 delta
= (new_raw_count
- prev_raw_count
) & cmask
;
181 local64_add(delta
, &event
->count
);
182 local64_sub(delta
, &hwc
->period_left
);
187 void riscv_pmu_stop(struct perf_event
*event
, int flags
)
189 struct hw_perf_event
*hwc
= &event
->hw
;
190 struct riscv_pmu
*rvpmu
= to_riscv_pmu(event
->pmu
);
192 if (!(hwc
->state
& PERF_HES_STOPPED
)) {
193 if (rvpmu
->ctr_stop
) {
194 rvpmu
->ctr_stop(event
, 0);
195 hwc
->state
|= PERF_HES_STOPPED
;
197 riscv_pmu_event_update(event
);
198 hwc
->state
|= PERF_HES_UPTODATE
;
202 int riscv_pmu_event_set_period(struct perf_event
*event
)
204 struct hw_perf_event
*hwc
= &event
->hw
;
205 s64 left
= local64_read(&hwc
->period_left
);
206 s64 period
= hwc
->sample_period
;
208 uint64_t max_period
= riscv_pmu_ctr_get_width_mask(event
);
210 if (unlikely(left
<= -period
)) {
212 local64_set(&hwc
->period_left
, left
);
213 hwc
->last_period
= period
;
217 if (unlikely(left
<= 0)) {
219 local64_set(&hwc
->period_left
, left
);
220 hwc
->last_period
= period
;
225 * Limit the maximum period to prevent the counter value
226 * from overtaking the one we are about to program. In
227 * effect we are reducing max_period to account for
228 * interrupt latency (and we are being very conservative).
230 if (left
> (max_period
>> 1))
231 left
= (max_period
>> 1);
233 local64_set(&hwc
->prev_count
, (u64
)-left
);
235 perf_event_update_userpage(event
);
240 void riscv_pmu_start(struct perf_event
*event
, int flags
)
242 struct hw_perf_event
*hwc
= &event
->hw
;
243 struct riscv_pmu
*rvpmu
= to_riscv_pmu(event
->pmu
);
244 uint64_t max_period
= riscv_pmu_ctr_get_width_mask(event
);
247 if (flags
& PERF_EF_RELOAD
)
248 WARN_ON_ONCE(!(event
->hw
.state
& PERF_HES_UPTODATE
));
251 riscv_pmu_event_set_period(event
);
252 init_val
= local64_read(&hwc
->prev_count
) & max_period
;
253 rvpmu
->ctr_start(event
, init_val
);
254 perf_event_update_userpage(event
);
257 static int riscv_pmu_add(struct perf_event
*event
, int flags
)
259 struct riscv_pmu
*rvpmu
= to_riscv_pmu(event
->pmu
);
260 struct cpu_hw_events
*cpuc
= this_cpu_ptr(rvpmu
->hw_events
);
261 struct hw_perf_event
*hwc
= &event
->hw
;
264 idx
= rvpmu
->ctr_get_idx(event
);
269 cpuc
->events
[idx
] = event
;
271 hwc
->state
= PERF_HES_UPTODATE
| PERF_HES_STOPPED
;
272 if (flags
& PERF_EF_START
)
273 riscv_pmu_start(event
, PERF_EF_RELOAD
);
275 /* Propagate our changes to the userspace mapping. */
276 perf_event_update_userpage(event
);
281 static void riscv_pmu_del(struct perf_event
*event
, int flags
)
283 struct riscv_pmu
*rvpmu
= to_riscv_pmu(event
->pmu
);
284 struct cpu_hw_events
*cpuc
= this_cpu_ptr(rvpmu
->hw_events
);
285 struct hw_perf_event
*hwc
= &event
->hw
;
287 riscv_pmu_stop(event
, PERF_EF_UPDATE
);
288 cpuc
->events
[hwc
->idx
] = NULL
;
289 /* The firmware need to reset the counter mapping */
291 rvpmu
->ctr_stop(event
, RISCV_PMU_STOP_FLAG_RESET
);
293 if (rvpmu
->ctr_clear_idx
)
294 rvpmu
->ctr_clear_idx(event
);
295 perf_event_update_userpage(event
);
299 static void riscv_pmu_read(struct perf_event
*event
)
301 riscv_pmu_event_update(event
);
304 static int riscv_pmu_event_init(struct perf_event
*event
)
306 struct hw_perf_event
*hwc
= &event
->hw
;
307 struct riscv_pmu
*rvpmu
= to_riscv_pmu(event
->pmu
);
309 u64 event_config
= 0;
312 /* driver does not support branch stack sampling */
313 if (has_branch_stack(event
))
317 mapped_event
= rvpmu
->event_map(event
, &event_config
);
318 if (mapped_event
< 0) {
319 pr_debug("event %x:%llx not supported\n", event
->attr
.type
,
325 * idx is set to -1 because the index of a general event should not be
326 * decided until binding to some counter in pmu->add().
327 * config will contain the information about counter CSR
328 * the idx will contain the counter index
330 hwc
->config
= event_config
;
332 hwc
->event_base
= mapped_event
;
334 if (rvpmu
->event_init
)
335 rvpmu
->event_init(event
);
337 if (!is_sampling_event(event
)) {
339 * For non-sampling runs, limit the sample_period to half
340 * of the counter width. That way, the new counter value
341 * is far less likely to overtake the previous one unless
342 * you have some serious IRQ latency issues.
344 cmask
= riscv_pmu_ctr_get_width_mask(event
);
345 hwc
->sample_period
= cmask
>> 1;
346 hwc
->last_period
= hwc
->sample_period
;
347 local64_set(&hwc
->period_left
, hwc
->sample_period
);
353 static int riscv_pmu_event_idx(struct perf_event
*event
)
355 struct riscv_pmu
*rvpmu
= to_riscv_pmu(event
->pmu
);
357 if (!(event
->hw
.flags
& PERF_EVENT_FLAG_USER_READ_CNT
))
360 if (rvpmu
->csr_index
)
361 return rvpmu
->csr_index(event
) + 1;
366 static void riscv_pmu_event_mapped(struct perf_event
*event
, struct mm_struct
*mm
)
368 struct riscv_pmu
*rvpmu
= to_riscv_pmu(event
->pmu
);
370 if (rvpmu
->event_mapped
) {
371 rvpmu
->event_mapped(event
, mm
);
372 perf_event_update_userpage(event
);
376 static void riscv_pmu_event_unmapped(struct perf_event
*event
, struct mm_struct
*mm
)
378 struct riscv_pmu
*rvpmu
= to_riscv_pmu(event
->pmu
);
380 if (rvpmu
->event_unmapped
) {
381 rvpmu
->event_unmapped(event
, mm
);
382 perf_event_update_userpage(event
);
386 struct riscv_pmu
*riscv_pmu_alloc(void)
388 struct riscv_pmu
*pmu
;
390 struct cpu_hw_events
*cpuc
;
392 pmu
= kzalloc(sizeof(*pmu
), GFP_KERNEL
);
396 pmu
->hw_events
= alloc_percpu_gfp(struct cpu_hw_events
, GFP_KERNEL
);
397 if (!pmu
->hw_events
) {
398 pr_info("failed to allocate per-cpu PMU data.\n");
402 for_each_possible_cpu(cpuid
) {
403 cpuc
= per_cpu_ptr(pmu
->hw_events
, cpuid
);
405 for (i
= 0; i
< RISCV_MAX_COUNTERS
; i
++)
406 cpuc
->events
[i
] = NULL
;
407 cpuc
->snapshot_addr
= NULL
;
409 pmu
->pmu
= (struct pmu
) {
410 .event_init
= riscv_pmu_event_init
,
411 .event_mapped
= riscv_pmu_event_mapped
,
412 .event_unmapped
= riscv_pmu_event_unmapped
,
413 .event_idx
= riscv_pmu_event_idx
,
414 .add
= riscv_pmu_add
,
415 .del
= riscv_pmu_del
,
416 .start
= riscv_pmu_start
,
417 .stop
= riscv_pmu_stop
,
418 .read
= riscv_pmu_read
,