1 // SPDX-License-Identifier: GPL-2.0-only
3 * Driver for the L3 cache PMUs in Qualcomm Technologies chips.
5 * The driver supports a distributed cache architecture where the overall
6 * cache for a socket is comprised of multiple slices each with its own PMU.
7 * Access to each individual PMU is provided even though all CPUs share all
8 * the slices. User space needs to aggregate to individual counts to provide
11 * See Documentation/admin-guide/perf/qcom_l3_pmu.rst for more details.
13 * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
16 #include <linux/acpi.h>
17 #include <linux/bitops.h>
18 #include <linux/interrupt.h>
20 #include <linux/list.h>
21 #include <linux/module.h>
22 #include <linux/perf_event.h>
23 #include <linux/platform_device.h>
29 /* Number of counters on each PMU */
30 #define L3_NUM_COUNTERS 8
31 /* Mask for the event type field within perf_event_attr.config and EVTYPE reg */
32 #define L3_EVTYPE_MASK 0xFF
34 * Bit position of the 'long counter' flag within perf_event_attr.config.
35 * Reserve some space between the event type and this flag to allow expansion
36 * in the event type field.
38 #define L3_EVENT_LC_BIT 32
44 /* Perfmon registers */
45 #define L3_HML3_PM_CR 0x000
46 #define L3_HML3_PM_EVCNTR(__cntr) (0x420 + ((__cntr) & 0x7) * 8)
47 #define L3_HML3_PM_CNTCTL(__cntr) (0x120 + ((__cntr) & 0x7) * 8)
48 #define L3_HML3_PM_EVTYPE(__cntr) (0x220 + ((__cntr) & 0x7) * 8)
49 #define L3_HML3_PM_FILTRA 0x300
50 #define L3_HML3_PM_FILTRB 0x308
51 #define L3_HML3_PM_FILTRC 0x310
52 #define L3_HML3_PM_FILTRAM 0x304
53 #define L3_HML3_PM_FILTRBM 0x30C
54 #define L3_HML3_PM_FILTRCM 0x314
56 /* Basic counter registers */
57 #define L3_M_BC_CR 0x500
58 #define L3_M_BC_SATROLL_CR 0x504
59 #define L3_M_BC_CNTENSET 0x508
60 #define L3_M_BC_CNTENCLR 0x50C
61 #define L3_M_BC_INTENSET 0x510
62 #define L3_M_BC_INTENCLR 0x514
63 #define L3_M_BC_GANG 0x718
64 #define L3_M_BC_OVSR 0x740
65 #define L3_M_BC_IRQCTL 0x96C
68 * Bit field definitions
72 #define PM_CR_RESET (0)
74 /* L3_HML3_PM_XCNTCTL/L3_HML3_PM_CNTCTLx */
75 #define PMCNT_RESET (0)
77 /* L3_HML3_PM_EVTYPEx */
78 #define EVSEL(__val) ((__val) & L3_EVTYPE_MASK)
80 /* Reset value for all the filter registers */
81 #define PM_FLTR_RESET (0)
84 #define BC_RESET (1UL << 1)
85 #define BC_ENABLE (1UL << 0)
87 /* L3_M_BC_SATROLL_CR */
88 #define BC_SATROLL_CR_RESET (0)
90 /* L3_M_BC_CNTENSET */
91 #define PMCNTENSET(__cntr) (1UL << ((__cntr) & 0x7))
93 /* L3_M_BC_CNTENCLR */
94 #define PMCNTENCLR(__cntr) (1UL << ((__cntr) & 0x7))
95 #define BC_CNTENCLR_RESET (0xFF)
97 /* L3_M_BC_INTENSET */
98 #define PMINTENSET(__cntr) (1UL << ((__cntr) & 0x7))
100 /* L3_M_BC_INTENCLR */
101 #define PMINTENCLR(__cntr) (1UL << ((__cntr) & 0x7))
102 #define BC_INTENCLR_RESET (0xFF)
105 #define GANG_EN(__cntr) (1UL << ((__cntr) & 0x7))
106 #define BC_GANG_RESET (0)
109 #define PMOVSRCLR(__cntr) (1UL << ((__cntr) & 0x7))
110 #define PMOVSRCLR_RESET (0xFF)
113 #define PMIRQONMSBEN(__cntr) (1UL << ((__cntr) & 0x7))
114 #define BC_IRQCTL_RESET (0x0)
120 #define L3_EVENT_CYCLES 0x01
121 #define L3_EVENT_READ_HIT 0x20
122 #define L3_EVENT_READ_MISS 0x21
123 #define L3_EVENT_READ_HIT_D 0x22
124 #define L3_EVENT_READ_MISS_D 0x23
125 #define L3_EVENT_WRITE_HIT 0x24
126 #define L3_EVENT_WRITE_MISS 0x25
129 * Decoding of settings from perf_event_attr
131 * The config format for perf events is:
132 * - config: bits 0-7: event type
133 * bit 32: HW counter size requested, 0: 32 bits, 1: 64 bits
136 static inline u32
get_event_type(struct perf_event
*event
)
138 return (event
->attr
.config
) & L3_EVTYPE_MASK
;
141 static inline bool event_uses_long_counter(struct perf_event
*event
)
143 return !!(event
->attr
.config
& BIT_ULL(L3_EVENT_LC_BIT
));
146 static inline int event_num_counters(struct perf_event
*event
)
148 return event_uses_long_counter(event
) ? 2 : 1;
152 * Main PMU, inherits from the core perf PMU type
156 struct hlist_node node
;
158 struct perf_event
*events
[L3_NUM_COUNTERS
];
159 unsigned long used_mask
[BITS_TO_LONGS(L3_NUM_COUNTERS
)];
163 #define to_l3cache_pmu(p) (container_of(p, struct l3cache_pmu, pmu))
166 * Type used to group hardware counter operations
168 * Used to implement two types of hardware counters, standard (32bits) and
169 * long (64bits). The hardware supports counter chaining which we use to
170 * implement long counters. This support is exposed via the 'lc' flag field
171 * in perf_event_attr.config.
173 struct l3cache_event_ops
{
174 /* Called to start event monitoring */
175 void (*start
)(struct perf_event
*event
);
176 /* Called to stop event monitoring */
177 void (*stop
)(struct perf_event
*event
, int flags
);
178 /* Called to update the perf_event */
179 void (*update
)(struct perf_event
*event
);
183 * Implementation of long counter operations
185 * 64bit counters are implemented by chaining two of the 32bit physical
186 * counters. The PMU only supports chaining of adjacent even/odd pairs
187 * and for simplicity the driver always configures the odd counter to
188 * count the overflows of the lower-numbered even counter. Note that since
189 * the resulting hardware counter is 64bits no IRQs are required to maintain
190 * the software counter which is also 64bits.
193 static void qcom_l3_cache__64bit_counter_start(struct perf_event
*event
)
195 struct l3cache_pmu
*l3pmu
= to_l3cache_pmu(event
->pmu
);
196 int idx
= event
->hw
.idx
;
197 u32 evsel
= get_event_type(event
);
200 /* Set the odd counter to count the overflows of the even counter */
201 gang
= readl_relaxed(l3pmu
->regs
+ L3_M_BC_GANG
);
202 gang
|= GANG_EN(idx
+ 1);
203 writel_relaxed(gang
, l3pmu
->regs
+ L3_M_BC_GANG
);
205 /* Initialize the hardware counters and reset prev_count*/
206 local64_set(&event
->hw
.prev_count
, 0);
207 writel_relaxed(0, l3pmu
->regs
+ L3_HML3_PM_EVCNTR(idx
+ 1));
208 writel_relaxed(0, l3pmu
->regs
+ L3_HML3_PM_EVCNTR(idx
));
211 * Set the event types, the upper half must use zero and the lower
212 * half the actual event type
214 writel_relaxed(EVSEL(0), l3pmu
->regs
+ L3_HML3_PM_EVTYPE(idx
+ 1));
215 writel_relaxed(EVSEL(evsel
), l3pmu
->regs
+ L3_HML3_PM_EVTYPE(idx
));
217 /* Finally, enable the counters */
218 writel_relaxed(PMCNT_RESET
, l3pmu
->regs
+ L3_HML3_PM_CNTCTL(idx
+ 1));
219 writel_relaxed(PMCNTENSET(idx
+ 1), l3pmu
->regs
+ L3_M_BC_CNTENSET
);
220 writel_relaxed(PMCNT_RESET
, l3pmu
->regs
+ L3_HML3_PM_CNTCTL(idx
));
221 writel_relaxed(PMCNTENSET(idx
), l3pmu
->regs
+ L3_M_BC_CNTENSET
);
224 static void qcom_l3_cache__64bit_counter_stop(struct perf_event
*event
,
227 struct l3cache_pmu
*l3pmu
= to_l3cache_pmu(event
->pmu
);
228 int idx
= event
->hw
.idx
;
229 u32 gang
= readl_relaxed(l3pmu
->regs
+ L3_M_BC_GANG
);
231 /* Disable the counters */
232 writel_relaxed(PMCNTENCLR(idx
), l3pmu
->regs
+ L3_M_BC_CNTENCLR
);
233 writel_relaxed(PMCNTENCLR(idx
+ 1), l3pmu
->regs
+ L3_M_BC_CNTENCLR
);
235 /* Disable chaining */
236 writel_relaxed(gang
& ~GANG_EN(idx
+ 1), l3pmu
->regs
+ L3_M_BC_GANG
);
239 static void qcom_l3_cache__64bit_counter_update(struct perf_event
*event
)
241 struct l3cache_pmu
*l3pmu
= to_l3cache_pmu(event
->pmu
);
242 int idx
= event
->hw
.idx
;
247 prev
= local64_read(&event
->hw
.prev_count
);
249 hi
= readl_relaxed(l3pmu
->regs
+ L3_HML3_PM_EVCNTR(idx
+ 1));
250 lo
= readl_relaxed(l3pmu
->regs
+ L3_HML3_PM_EVCNTR(idx
));
251 } while (hi
!= readl_relaxed(l3pmu
->regs
+ L3_HML3_PM_EVCNTR(idx
+ 1)));
252 new = ((u64
)hi
<< 32) | lo
;
253 } while (local64_cmpxchg(&event
->hw
.prev_count
, prev
, new) != prev
);
255 local64_add(new - prev
, &event
->count
);
258 static const struct l3cache_event_ops event_ops_long
= {
259 .start
= qcom_l3_cache__64bit_counter_start
,
260 .stop
= qcom_l3_cache__64bit_counter_stop
,
261 .update
= qcom_l3_cache__64bit_counter_update
,
265 * Implementation of standard counter operations
267 * 32bit counters use a single physical counter and a hardware feature that
268 * asserts the overflow IRQ on the toggling of the most significant bit in
269 * the counter. This feature allows the counters to be left free-running
270 * without needing the usual reprogramming required to properly handle races
271 * during concurrent calls to update.
274 static void qcom_l3_cache__32bit_counter_start(struct perf_event
*event
)
276 struct l3cache_pmu
*l3pmu
= to_l3cache_pmu(event
->pmu
);
277 int idx
= event
->hw
.idx
;
278 u32 evsel
= get_event_type(event
);
279 u32 irqctl
= readl_relaxed(l3pmu
->regs
+ L3_M_BC_IRQCTL
);
281 /* Set the counter to assert the overflow IRQ on MSB toggling */
282 writel_relaxed(irqctl
| PMIRQONMSBEN(idx
), l3pmu
->regs
+ L3_M_BC_IRQCTL
);
284 /* Initialize the hardware counter and reset prev_count*/
285 local64_set(&event
->hw
.prev_count
, 0);
286 writel_relaxed(0, l3pmu
->regs
+ L3_HML3_PM_EVCNTR(idx
));
288 /* Set the event type */
289 writel_relaxed(EVSEL(evsel
), l3pmu
->regs
+ L3_HML3_PM_EVTYPE(idx
));
291 /* Enable interrupt generation by this counter */
292 writel_relaxed(PMINTENSET(idx
), l3pmu
->regs
+ L3_M_BC_INTENSET
);
294 /* Finally, enable the counter */
295 writel_relaxed(PMCNT_RESET
, l3pmu
->regs
+ L3_HML3_PM_CNTCTL(idx
));
296 writel_relaxed(PMCNTENSET(idx
), l3pmu
->regs
+ L3_M_BC_CNTENSET
);
299 static void qcom_l3_cache__32bit_counter_stop(struct perf_event
*event
,
302 struct l3cache_pmu
*l3pmu
= to_l3cache_pmu(event
->pmu
);
303 int idx
= event
->hw
.idx
;
304 u32 irqctl
= readl_relaxed(l3pmu
->regs
+ L3_M_BC_IRQCTL
);
306 /* Disable the counter */
307 writel_relaxed(PMCNTENCLR(idx
), l3pmu
->regs
+ L3_M_BC_CNTENCLR
);
309 /* Disable interrupt generation by this counter */
310 writel_relaxed(PMINTENCLR(idx
), l3pmu
->regs
+ L3_M_BC_INTENCLR
);
312 /* Set the counter to not assert the overflow IRQ on MSB toggling */
313 writel_relaxed(irqctl
& ~PMIRQONMSBEN(idx
), l3pmu
->regs
+ L3_M_BC_IRQCTL
);
316 static void qcom_l3_cache__32bit_counter_update(struct perf_event
*event
)
318 struct l3cache_pmu
*l3pmu
= to_l3cache_pmu(event
->pmu
);
319 int idx
= event
->hw
.idx
;
323 prev
= local64_read(&event
->hw
.prev_count
);
324 new = readl_relaxed(l3pmu
->regs
+ L3_HML3_PM_EVCNTR(idx
));
325 } while (local64_cmpxchg(&event
->hw
.prev_count
, prev
, new) != prev
);
327 local64_add(new - prev
, &event
->count
);
330 static const struct l3cache_event_ops event_ops_std
= {
331 .start
= qcom_l3_cache__32bit_counter_start
,
332 .stop
= qcom_l3_cache__32bit_counter_stop
,
333 .update
= qcom_l3_cache__32bit_counter_update
,
336 /* Retrieve the appropriate operations for the given event */
338 const struct l3cache_event_ops
*l3cache_event_get_ops(struct perf_event
*event
)
340 if (event_uses_long_counter(event
))
341 return &event_ops_long
;
343 return &event_ops_std
;
347 * Top level PMU functions.
350 static inline void qcom_l3_cache__init(struct l3cache_pmu
*l3pmu
)
354 writel_relaxed(BC_RESET
, l3pmu
->regs
+ L3_M_BC_CR
);
357 * Use writel for the first programming command to ensure the basic
358 * counter unit is stopped before proceeding
360 writel(BC_SATROLL_CR_RESET
, l3pmu
->regs
+ L3_M_BC_SATROLL_CR
);
362 writel_relaxed(BC_CNTENCLR_RESET
, l3pmu
->regs
+ L3_M_BC_CNTENCLR
);
363 writel_relaxed(BC_INTENCLR_RESET
, l3pmu
->regs
+ L3_M_BC_INTENCLR
);
364 writel_relaxed(PMOVSRCLR_RESET
, l3pmu
->regs
+ L3_M_BC_OVSR
);
365 writel_relaxed(BC_GANG_RESET
, l3pmu
->regs
+ L3_M_BC_GANG
);
366 writel_relaxed(BC_IRQCTL_RESET
, l3pmu
->regs
+ L3_M_BC_IRQCTL
);
367 writel_relaxed(PM_CR_RESET
, l3pmu
->regs
+ L3_HML3_PM_CR
);
369 for (i
= 0; i
< L3_NUM_COUNTERS
; ++i
) {
370 writel_relaxed(PMCNT_RESET
, l3pmu
->regs
+ L3_HML3_PM_CNTCTL(i
));
371 writel_relaxed(EVSEL(0), l3pmu
->regs
+ L3_HML3_PM_EVTYPE(i
));
374 writel_relaxed(PM_FLTR_RESET
, l3pmu
->regs
+ L3_HML3_PM_FILTRA
);
375 writel_relaxed(PM_FLTR_RESET
, l3pmu
->regs
+ L3_HML3_PM_FILTRAM
);
376 writel_relaxed(PM_FLTR_RESET
, l3pmu
->regs
+ L3_HML3_PM_FILTRB
);
377 writel_relaxed(PM_FLTR_RESET
, l3pmu
->regs
+ L3_HML3_PM_FILTRBM
);
378 writel_relaxed(PM_FLTR_RESET
, l3pmu
->regs
+ L3_HML3_PM_FILTRC
);
379 writel_relaxed(PM_FLTR_RESET
, l3pmu
->regs
+ L3_HML3_PM_FILTRCM
);
382 * Use writel here to ensure all programming commands are done
385 writel(BC_ENABLE
, l3pmu
->regs
+ L3_M_BC_CR
);
388 static irqreturn_t
qcom_l3_cache__handle_irq(int irq_num
, void *data
)
390 struct l3cache_pmu
*l3pmu
= data
;
391 /* Read the overflow status register */
392 long status
= readl_relaxed(l3pmu
->regs
+ L3_M_BC_OVSR
);
398 /* Clear the bits we read on the overflow status register */
399 writel_relaxed(status
, l3pmu
->regs
+ L3_M_BC_OVSR
);
401 for_each_set_bit(idx
, &status
, L3_NUM_COUNTERS
) {
402 struct perf_event
*event
;
403 const struct l3cache_event_ops
*ops
;
405 event
= l3pmu
->events
[idx
];
410 * Since the IRQ is not enabled for events using long counters
411 * we should never see one of those here, however, be consistent
412 * and use the ops indirections like in the other operations.
415 ops
= l3cache_event_get_ops(event
);
423 * Implementation of abstract pmu functionality required by
424 * the core perf events code.
427 static void qcom_l3_cache__pmu_enable(struct pmu
*pmu
)
429 struct l3cache_pmu
*l3pmu
= to_l3cache_pmu(pmu
);
431 /* Ensure the other programming commands are observed before enabling */
434 writel_relaxed(BC_ENABLE
, l3pmu
->regs
+ L3_M_BC_CR
);
437 static void qcom_l3_cache__pmu_disable(struct pmu
*pmu
)
439 struct l3cache_pmu
*l3pmu
= to_l3cache_pmu(pmu
);
441 writel_relaxed(0, l3pmu
->regs
+ L3_M_BC_CR
);
443 /* Ensure the basic counter unit is stopped before proceeding */
448 * We must NOT create groups containing events from multiple hardware PMUs,
449 * although mixing different software and hardware PMUs is allowed.
451 static bool qcom_l3_cache__validate_event_group(struct perf_event
*event
)
453 struct perf_event
*leader
= event
->group_leader
;
454 struct perf_event
*sibling
;
457 if (leader
->pmu
!= event
->pmu
&& !is_software_event(leader
))
460 counters
= event_num_counters(event
);
461 counters
+= event_num_counters(leader
);
463 for_each_sibling_event(sibling
, leader
) {
464 if (is_software_event(sibling
))
466 if (sibling
->pmu
!= event
->pmu
)
468 counters
+= event_num_counters(sibling
);
472 * If the group requires more counters than the HW has, it
473 * cannot ever be scheduled.
475 return counters
<= L3_NUM_COUNTERS
;
478 static int qcom_l3_cache__event_init(struct perf_event
*event
)
480 struct l3cache_pmu
*l3pmu
= to_l3cache_pmu(event
->pmu
);
481 struct hw_perf_event
*hwc
= &event
->hw
;
484 * Is the event for this PMU?
486 if (event
->attr
.type
!= event
->pmu
->type
)
490 * Sampling not supported since these events are not core-attributable.
492 if (hwc
->sample_period
)
496 * Task mode not available, we run the counters as socket counters,
497 * not attributable to any CPU and therefore cannot attribute per-task.
502 /* Validate the group */
503 if (!qcom_l3_cache__validate_event_group(event
))
509 * Many perf core operations (eg. events rotation) operate on a
510 * single CPU context. This is obvious for CPU PMUs, where one
511 * expects the same sets of events being observed on all CPUs,
512 * but can lead to issues for off-core PMUs, like this one, where
513 * each event could be theoretically assigned to a different CPU.
514 * To mitigate this, we enforce CPU assignment to one designated
515 * processor (the one described in the "cpumask" attribute exported
516 * by the PMU device). perf user space tools honor this and avoid
517 * opening more than one copy of the events.
519 event
->cpu
= cpumask_first(&l3pmu
->cpumask
);
524 static void qcom_l3_cache__event_start(struct perf_event
*event
, int flags
)
526 struct hw_perf_event
*hwc
= &event
->hw
;
527 const struct l3cache_event_ops
*ops
= l3cache_event_get_ops(event
);
533 static void qcom_l3_cache__event_stop(struct perf_event
*event
, int flags
)
535 struct hw_perf_event
*hwc
= &event
->hw
;
536 const struct l3cache_event_ops
*ops
= l3cache_event_get_ops(event
);
538 if (hwc
->state
& PERF_HES_STOPPED
)
541 ops
->stop(event
, flags
);
542 if (flags
& PERF_EF_UPDATE
)
544 hwc
->state
|= PERF_HES_STOPPED
| PERF_HES_UPTODATE
;
547 static int qcom_l3_cache__event_add(struct perf_event
*event
, int flags
)
549 struct l3cache_pmu
*l3pmu
= to_l3cache_pmu(event
->pmu
);
550 struct hw_perf_event
*hwc
= &event
->hw
;
551 int order
= event_uses_long_counter(event
) ? 1 : 0;
555 * Try to allocate a counter.
557 idx
= bitmap_find_free_region(l3pmu
->used_mask
, L3_NUM_COUNTERS
, order
);
559 /* The counters are all in use. */
563 hwc
->state
= PERF_HES_STOPPED
| PERF_HES_UPTODATE
;
564 l3pmu
->events
[idx
] = event
;
566 if (flags
& PERF_EF_START
)
567 qcom_l3_cache__event_start(event
, 0);
569 /* Propagate changes to the userspace mapping. */
570 perf_event_update_userpage(event
);
575 static void qcom_l3_cache__event_del(struct perf_event
*event
, int flags
)
577 struct l3cache_pmu
*l3pmu
= to_l3cache_pmu(event
->pmu
);
578 struct hw_perf_event
*hwc
= &event
->hw
;
579 int order
= event_uses_long_counter(event
) ? 1 : 0;
581 /* Stop and clean up */
582 qcom_l3_cache__event_stop(event
, flags
| PERF_EF_UPDATE
);
583 l3pmu
->events
[hwc
->idx
] = NULL
;
584 bitmap_release_region(l3pmu
->used_mask
, hwc
->idx
, order
);
586 /* Propagate changes to the userspace mapping. */
587 perf_event_update_userpage(event
);
590 static void qcom_l3_cache__event_read(struct perf_event
*event
)
592 const struct l3cache_event_ops
*ops
= l3cache_event_get_ops(event
);
598 * Add sysfs attributes
601 * - formats, used by perf user space and other tools to configure events
602 * - events, used by perf user space and other tools to create events
603 * symbolically, e.g.:
604 * perf stat -a -e l3cache_0_0/event=read-miss/ ls
605 * perf stat -a -e l3cache_0_0/event=0x21/ ls
606 * - cpumask, used by perf user space and other tools to know on which CPUs
612 #define L3CACHE_PMU_FORMAT_ATTR(_name, _config) \
613 (&((struct dev_ext_attribute[]) { \
614 { .attr = __ATTR(_name, 0444, device_show_string, NULL), \
615 .var = (void *) _config, } \
618 static struct attribute
*qcom_l3_cache_pmu_formats
[] = {
619 L3CACHE_PMU_FORMAT_ATTR(event
, "config:0-7"),
620 L3CACHE_PMU_FORMAT_ATTR(lc
, "config:" __stringify(L3_EVENT_LC_BIT
)),
624 static const struct attribute_group qcom_l3_cache_pmu_format_group
= {
626 .attrs
= qcom_l3_cache_pmu_formats
,
631 static ssize_t
l3cache_pmu_event_show(struct device
*dev
,
632 struct device_attribute
*attr
, char *page
)
634 struct perf_pmu_events_attr
*pmu_attr
;
636 pmu_attr
= container_of(attr
, struct perf_pmu_events_attr
, attr
);
637 return sysfs_emit(page
, "event=0x%02llx\n", pmu_attr
->id
);
640 #define L3CACHE_EVENT_ATTR(_name, _id) \
641 PMU_EVENT_ATTR_ID(_name, l3cache_pmu_event_show, _id)
643 static struct attribute
*qcom_l3_cache_pmu_events
[] = {
644 L3CACHE_EVENT_ATTR(cycles
, L3_EVENT_CYCLES
),
645 L3CACHE_EVENT_ATTR(read
-hit
, L3_EVENT_READ_HIT
),
646 L3CACHE_EVENT_ATTR(read
-miss
, L3_EVENT_READ_MISS
),
647 L3CACHE_EVENT_ATTR(read
-hit
-d
-side
, L3_EVENT_READ_HIT_D
),
648 L3CACHE_EVENT_ATTR(read
-miss
-d
-side
, L3_EVENT_READ_MISS_D
),
649 L3CACHE_EVENT_ATTR(write
-hit
, L3_EVENT_WRITE_HIT
),
650 L3CACHE_EVENT_ATTR(write
-miss
, L3_EVENT_WRITE_MISS
),
654 static const struct attribute_group qcom_l3_cache_pmu_events_group
= {
656 .attrs
= qcom_l3_cache_pmu_events
,
661 static ssize_t
cpumask_show(struct device
*dev
,
662 struct device_attribute
*attr
, char *buf
)
664 struct l3cache_pmu
*l3pmu
= to_l3cache_pmu(dev_get_drvdata(dev
));
666 return cpumap_print_to_pagebuf(true, buf
, &l3pmu
->cpumask
);
669 static DEVICE_ATTR_RO(cpumask
);
671 static struct attribute
*qcom_l3_cache_pmu_cpumask_attrs
[] = {
672 &dev_attr_cpumask
.attr
,
676 static const struct attribute_group qcom_l3_cache_pmu_cpumask_attr_group
= {
677 .attrs
= qcom_l3_cache_pmu_cpumask_attrs
,
681 * Per PMU device attribute groups
683 static const struct attribute_group
*qcom_l3_cache_pmu_attr_grps
[] = {
684 &qcom_l3_cache_pmu_format_group
,
685 &qcom_l3_cache_pmu_events_group
,
686 &qcom_l3_cache_pmu_cpumask_attr_group
,
691 * Probing functions and data.
694 static int qcom_l3_cache_pmu_online_cpu(unsigned int cpu
, struct hlist_node
*node
)
696 struct l3cache_pmu
*l3pmu
= hlist_entry_safe(node
, struct l3cache_pmu
, node
);
698 /* If there is not a CPU/PMU association pick this CPU */
699 if (cpumask_empty(&l3pmu
->cpumask
))
700 cpumask_set_cpu(cpu
, &l3pmu
->cpumask
);
705 static int qcom_l3_cache_pmu_offline_cpu(unsigned int cpu
, struct hlist_node
*node
)
707 struct l3cache_pmu
*l3pmu
= hlist_entry_safe(node
, struct l3cache_pmu
, node
);
710 if (!cpumask_test_and_clear_cpu(cpu
, &l3pmu
->cpumask
))
712 target
= cpumask_any_but(cpu_online_mask
, cpu
);
713 if (target
>= nr_cpu_ids
)
715 perf_pmu_migrate_context(&l3pmu
->pmu
, cpu
, target
);
716 cpumask_set_cpu(target
, &l3pmu
->cpumask
);
720 static int qcom_l3_cache_pmu_probe(struct platform_device
*pdev
)
722 struct l3cache_pmu
*l3pmu
;
723 struct acpi_device
*acpi_dev
;
724 struct resource
*memrc
;
728 /* Initialize the PMU data structures */
730 acpi_dev
= ACPI_COMPANION(&pdev
->dev
);
734 l3pmu
= devm_kzalloc(&pdev
->dev
, sizeof(*l3pmu
), GFP_KERNEL
);
735 name
= devm_kasprintf(&pdev
->dev
, GFP_KERNEL
, "l3cache_%s_%s",
736 acpi_device_uid(acpi_dev_parent(acpi_dev
)),
737 acpi_device_uid(acpi_dev
));
741 l3pmu
->pmu
= (struct pmu
) {
742 .parent
= &pdev
->dev
,
743 .task_ctx_nr
= perf_invalid_context
,
745 .pmu_enable
= qcom_l3_cache__pmu_enable
,
746 .pmu_disable
= qcom_l3_cache__pmu_disable
,
747 .event_init
= qcom_l3_cache__event_init
,
748 .add
= qcom_l3_cache__event_add
,
749 .del
= qcom_l3_cache__event_del
,
750 .start
= qcom_l3_cache__event_start
,
751 .stop
= qcom_l3_cache__event_stop
,
752 .read
= qcom_l3_cache__event_read
,
754 .attr_groups
= qcom_l3_cache_pmu_attr_grps
,
755 .capabilities
= PERF_PMU_CAP_NO_EXCLUDE
,
758 l3pmu
->regs
= devm_platform_get_and_ioremap_resource(pdev
, 0, &memrc
);
759 if (IS_ERR(l3pmu
->regs
))
760 return PTR_ERR(l3pmu
->regs
);
762 qcom_l3_cache__init(l3pmu
);
764 ret
= platform_get_irq(pdev
, 0);
768 ret
= devm_request_irq(&pdev
->dev
, ret
, qcom_l3_cache__handle_irq
, 0,
771 dev_err(&pdev
->dev
, "Request for IRQ failed for slice @%pa\n",
776 /* Add this instance to the list used by the offline callback */
777 ret
= cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE
, &l3pmu
->node
);
779 dev_err(&pdev
->dev
, "Error %d registering hotplug", ret
);
783 ret
= perf_pmu_register(&l3pmu
->pmu
, name
, -1);
785 dev_err(&pdev
->dev
, "Failed to register L3 cache PMU (%d)\n", ret
);
789 dev_info(&pdev
->dev
, "Registered %s, type: %d\n", name
, l3pmu
->pmu
.type
);
794 static const struct acpi_device_id qcom_l3_cache_pmu_acpi_match
[] = {
798 MODULE_DEVICE_TABLE(acpi
, qcom_l3_cache_pmu_acpi_match
);
800 static struct platform_driver qcom_l3_cache_pmu_driver
= {
802 .name
= "qcom-l3cache-pmu",
803 .acpi_match_table
= ACPI_PTR(qcom_l3_cache_pmu_acpi_match
),
804 .suppress_bind_attrs
= true,
806 .probe
= qcom_l3_cache_pmu_probe
,
809 static int __init
register_qcom_l3_cache_pmu_driver(void)
813 /* Install a hook to update the reader CPU in case it goes offline */
814 ret
= cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE
,
815 "perf/qcom/l3cache:online",
816 qcom_l3_cache_pmu_online_cpu
,
817 qcom_l3_cache_pmu_offline_cpu
);
821 return platform_driver_register(&qcom_l3_cache_pmu_driver
);
823 device_initcall(register_qcom_l3_cache_pmu_driver
);