1 // SPDX-License-Identifier: GPL-2.0
3 * ARM CoreSight Architecture PMU driver.
5 * This driver adds support for uncore PMU based on ARM CoreSight Performance
6 * Monitoring Unit Architecture. The PMU is accessible via MMIO registers and
7 * like other uncore PMUs, it does not support process specific events and
8 * cannot be used in sampling mode.
10 * This code is based on other uncore PMUs like ARM DSU PMU. It provides a
11 * generic implementation to operate the PMU according to CoreSight PMU
12 * architecture and ACPI ARM PMU table (APMT) documents below:
13 * - ARM CoreSight PMU architecture document number: ARM IHI 0091 A.a-00bet0.
14 * - APMT document number: ARM DEN0117.
16 * The user should refer to the vendor technical documentation to get details
17 * about the supported events.
19 * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
23 #include <linux/acpi.h>
24 #include <linux/cacheinfo.h>
25 #include <linux/ctype.h>
26 #include <linux/interrupt.h>
27 #include <linux/io-64-nonatomic-lo-hi.h>
28 #include <linux/module.h>
29 #include <linux/mutex.h>
31 #include <linux/perf_event.h>
32 #include <linux/platform_device.h>
34 #include "arm_cspmu.h"
36 #define PMUNAME "arm_cspmu"
37 #define DRVNAME "arm-cs-arch-pmu"
39 #define ARM_CSPMU_CPUMASK_ATTR(_name, _config) \
40 ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_cpumask_show, \
41 (unsigned long)_config)
44 * CoreSight PMU Arch register offsets.
46 #define PMEVCNTR_LO 0x0
47 #define PMEVCNTR_HI 0x4
48 #define PMEVTYPER 0x400
49 #define PMCCFILTR 0x47C
50 #define PMEVFILTR 0xA00
51 #define PMCNTENSET 0xC00
52 #define PMCNTENCLR 0xC20
53 #define PMINTENSET 0xC40
54 #define PMINTENCLR 0xC60
55 #define PMOVSCLR 0xC80
56 #define PMOVSSET 0xCC0
61 /* PMCFGR register field */
62 #define PMCFGR_NCG GENMASK(31, 28)
63 #define PMCFGR_HDBG BIT(24)
64 #define PMCFGR_TRO BIT(23)
65 #define PMCFGR_SS BIT(22)
66 #define PMCFGR_FZO BIT(21)
67 #define PMCFGR_MSI BIT(20)
68 #define PMCFGR_UEN BIT(19)
69 #define PMCFGR_NA BIT(17)
70 #define PMCFGR_EX BIT(16)
71 #define PMCFGR_CCD BIT(15)
72 #define PMCFGR_CC BIT(14)
73 #define PMCFGR_SIZE GENMASK(13, 8)
74 #define PMCFGR_N GENMASK(7, 0)
76 /* PMCR register field */
77 #define PMCR_TRO BIT(11)
78 #define PMCR_HDBG BIT(10)
79 #define PMCR_FZO BIT(9)
80 #define PMCR_NA BIT(8)
81 #define PMCR_DP BIT(5)
88 /* Each SET/CLR register supports up to 32 counters. */
89 #define ARM_CSPMU_SET_CLR_COUNTER_SHIFT 5
90 #define ARM_CSPMU_SET_CLR_COUNTER_NUM \
91 (1 << ARM_CSPMU_SET_CLR_COUNTER_SHIFT)
93 /* Convert counter idx into SET/CLR register number. */
94 #define COUNTER_TO_SET_CLR_ID(idx) \
95 (idx >> ARM_CSPMU_SET_CLR_COUNTER_SHIFT)
97 /* Convert counter idx into SET/CLR register bit. */
98 #define COUNTER_TO_SET_CLR_BIT(idx) \
99 (idx & (ARM_CSPMU_SET_CLR_COUNTER_NUM - 1))
101 #define ARM_CSPMU_ACTIVE_CPU_MASK 0x0
102 #define ARM_CSPMU_ASSOCIATED_CPU_MASK 0x1
105 * Maximum poll count for reading counter value using high-low-high sequence.
107 #define HILOHI_MAX_POLL 1000
109 static unsigned long arm_cspmu_cpuhp_state
;
111 static DEFINE_MUTEX(arm_cspmu_lock
);
113 static void arm_cspmu_set_ev_filter(struct arm_cspmu
*cspmu
,
114 struct hw_perf_event
*hwc
, u32 filter
);
116 static struct acpi_apmt_node
*arm_cspmu_apmt_node(struct device
*dev
)
118 struct acpi_apmt_node
**ptr
= dev_get_platdata(dev
);
120 return ptr
? *ptr
: NULL
;
124 * In CoreSight PMU architecture, all of the MMIO registers are 32-bit except
125 * counter register. The counter register can be implemented as 32-bit or 64-bit
126 * register depending on the value of PMCFGR.SIZE field. For 64-bit access,
127 * single-copy 64-bit atomic support is implementation defined. APMT node flag
128 * is used to identify if the PMU supports 64-bit single copy atomic. If 64-bit
129 * single copy atomic is not supported, the driver treats the register as a pair
130 * of 32-bit register.
134 * Read 64-bit register as a pair of 32-bit registers using hi-lo-hi sequence.
136 static u64
read_reg64_hilohi(const void __iomem
*addr
, u32 max_poll_count
)
141 /* Use high-low-high sequence to avoid tearing */
143 if (max_poll_count
-- == 0) {
144 pr_err("ARM CSPMU: timeout hi-low-high sequence\n");
148 val_hi
= readl(addr
+ 4);
149 val_lo
= readl(addr
);
150 } while (val_hi
!= readl(addr
+ 4));
152 val
= (((u64
)val_hi
<< 32) | val_lo
);
157 /* Check if cycle counter is supported. */
158 static inline bool supports_cycle_counter(const struct arm_cspmu
*cspmu
)
160 return (cspmu
->pmcfgr
& PMCFGR_CC
);
163 /* Get counter size, which is (PMCFGR_SIZE + 1). */
164 static inline u32
counter_size(const struct arm_cspmu
*cspmu
)
166 return FIELD_GET(PMCFGR_SIZE
, cspmu
->pmcfgr
) + 1;
169 /* Get counter mask. */
170 static inline u64
counter_mask(const struct arm_cspmu
*cspmu
)
172 return GENMASK_ULL(counter_size(cspmu
) - 1, 0);
175 /* Check if counter is implemented as 64-bit register. */
176 static inline bool use_64b_counter_reg(const struct arm_cspmu
*cspmu
)
178 return (counter_size(cspmu
) > 32);
181 ssize_t
arm_cspmu_sysfs_event_show(struct device
*dev
,
182 struct device_attribute
*attr
, char *buf
)
184 struct perf_pmu_events_attr
*pmu_attr
;
186 pmu_attr
= container_of(attr
, typeof(*pmu_attr
), attr
);
187 return sysfs_emit(buf
, "event=0x%llx\n", pmu_attr
->id
);
189 EXPORT_SYMBOL_GPL(arm_cspmu_sysfs_event_show
);
191 /* Default event list. */
192 static struct attribute
*arm_cspmu_event_attrs
[] = {
193 ARM_CSPMU_EVENT_ATTR(cycles
, ARM_CSPMU_EVT_CYCLES_DEFAULT
),
197 static struct attribute
**
198 arm_cspmu_get_event_attrs(const struct arm_cspmu
*cspmu
)
200 struct attribute
**attrs
;
202 attrs
= devm_kmemdup(cspmu
->dev
, arm_cspmu_event_attrs
,
203 sizeof(arm_cspmu_event_attrs
), GFP_KERNEL
);
209 arm_cspmu_event_attr_is_visible(struct kobject
*kobj
,
210 struct attribute
*attr
, int unused
)
212 struct device
*dev
= kobj_to_dev(kobj
);
213 struct arm_cspmu
*cspmu
= to_arm_cspmu(dev_get_drvdata(dev
));
214 struct perf_pmu_events_attr
*eattr
;
216 eattr
= container_of(attr
, typeof(*eattr
), attr
.attr
);
218 /* Hide cycle event if not supported */
219 if (!supports_cycle_counter(cspmu
) &&
220 eattr
->id
== ARM_CSPMU_EVT_CYCLES_DEFAULT
)
226 static struct attribute
*arm_cspmu_format_attrs
[] = {
227 ARM_CSPMU_FORMAT_EVENT_ATTR
,
228 ARM_CSPMU_FORMAT_FILTER_ATTR
,
232 static struct attribute
**
233 arm_cspmu_get_format_attrs(const struct arm_cspmu
*cspmu
)
235 struct attribute
**attrs
;
237 attrs
= devm_kmemdup(cspmu
->dev
, arm_cspmu_format_attrs
,
238 sizeof(arm_cspmu_format_attrs
), GFP_KERNEL
);
243 static u32
arm_cspmu_event_type(const struct perf_event
*event
)
245 return event
->attr
.config
& ARM_CSPMU_EVENT_MASK
;
248 static bool arm_cspmu_is_cycle_counter_event(const struct perf_event
*event
)
250 return (event
->attr
.config
== ARM_CSPMU_EVT_CYCLES_DEFAULT
);
253 static u32
arm_cspmu_event_filter(const struct perf_event
*event
)
255 return event
->attr
.config1
& ARM_CSPMU_FILTER_MASK
;
258 static ssize_t
arm_cspmu_identifier_show(struct device
*dev
,
259 struct device_attribute
*attr
,
262 struct arm_cspmu
*cspmu
= to_arm_cspmu(dev_get_drvdata(dev
));
264 return sysfs_emit(page
, "%s\n", cspmu
->identifier
);
267 static struct device_attribute arm_cspmu_identifier_attr
=
268 __ATTR(identifier
, 0444, arm_cspmu_identifier_show
, NULL
);
270 static struct attribute
*arm_cspmu_identifier_attrs
[] = {
271 &arm_cspmu_identifier_attr
.attr
,
275 static struct attribute_group arm_cspmu_identifier_attr_group
= {
276 .attrs
= arm_cspmu_identifier_attrs
,
279 static const char *arm_cspmu_get_identifier(const struct arm_cspmu
*cspmu
)
281 const char *identifier
=
282 devm_kasprintf(cspmu
->dev
, GFP_KERNEL
, "%x",
287 static const char *arm_cspmu_type_str
[ACPI_APMT_NODE_TYPE_COUNT
] = {
295 static const char *arm_cspmu_get_name(const struct arm_cspmu
*cspmu
)
298 struct acpi_apmt_node
*apmt_node
;
301 char acpi_hid_string
[ACPI_ID_LEN
] = { 0 };
302 static atomic_t pmu_idx
[ACPI_APMT_NODE_TYPE_COUNT
] = { 0 };
305 apmt_node
= arm_cspmu_apmt_node(dev
);
307 return devm_kasprintf(dev
, GFP_KERNEL
, PMUNAME
"_%u",
308 atomic_fetch_inc(&pmu_idx
[0]));
310 pmu_type
= apmt_node
->type
;
312 if (pmu_type
>= ACPI_APMT_NODE_TYPE_COUNT
) {
313 dev_err(dev
, "unsupported PMU type-%u\n", pmu_type
);
317 if (pmu_type
== ACPI_APMT_NODE_TYPE_ACPI
) {
318 memcpy(acpi_hid_string
,
319 &apmt_node
->inst_primary
,
320 sizeof(apmt_node
->inst_primary
));
321 name
= devm_kasprintf(dev
, GFP_KERNEL
, "%s_%s_%s_%u", PMUNAME
,
322 arm_cspmu_type_str
[pmu_type
],
324 apmt_node
->inst_secondary
);
326 name
= devm_kasprintf(dev
, GFP_KERNEL
, "%s_%s_%d", PMUNAME
,
327 arm_cspmu_type_str
[pmu_type
],
328 atomic_fetch_inc(&pmu_idx
[pmu_type
]));
334 static ssize_t
arm_cspmu_cpumask_show(struct device
*dev
,
335 struct device_attribute
*attr
,
338 struct pmu
*pmu
= dev_get_drvdata(dev
);
339 struct arm_cspmu
*cspmu
= to_arm_cspmu(pmu
);
340 struct dev_ext_attribute
*eattr
=
341 container_of(attr
, struct dev_ext_attribute
, attr
);
342 unsigned long mask_id
= (unsigned long)eattr
->var
;
343 const cpumask_t
*cpumask
;
346 case ARM_CSPMU_ACTIVE_CPU_MASK
:
347 cpumask
= &cspmu
->active_cpu
;
349 case ARM_CSPMU_ASSOCIATED_CPU_MASK
:
350 cpumask
= &cspmu
->associated_cpus
;
355 return cpumap_print_to_pagebuf(true, buf
, cpumask
);
358 static struct attribute
*arm_cspmu_cpumask_attrs
[] = {
359 ARM_CSPMU_CPUMASK_ATTR(cpumask
, ARM_CSPMU_ACTIVE_CPU_MASK
),
360 ARM_CSPMU_CPUMASK_ATTR(associated_cpus
, ARM_CSPMU_ASSOCIATED_CPU_MASK
),
364 static struct attribute_group arm_cspmu_cpumask_attr_group
= {
365 .attrs
= arm_cspmu_cpumask_attrs
,
368 static struct arm_cspmu_impl_match impl_match
[] = {
370 .module_name
= "nvidia_cspmu",
371 .pmiidr_val
= ARM_CSPMU_IMPL_ID_NVIDIA
,
372 .pmiidr_mask
= ARM_CSPMU_PMIIDR_IMPLEMENTER
,
374 .impl_init_ops
= NULL
,
377 .module_name
= "ampere_cspmu",
378 .pmiidr_val
= ARM_CSPMU_IMPL_ID_AMPERE
,
379 .pmiidr_mask
= ARM_CSPMU_PMIIDR_IMPLEMENTER
,
381 .impl_init_ops
= NULL
,
387 static struct arm_cspmu_impl_match
*arm_cspmu_impl_match_get(u32 pmiidr
)
389 struct arm_cspmu_impl_match
*match
= impl_match
;
391 for (; match
->pmiidr_val
; match
++) {
392 u32 mask
= match
->pmiidr_mask
;
394 if ((match
->pmiidr_val
& mask
) == (pmiidr
& mask
))
401 #define DEFAULT_IMPL_OP(name) .name = arm_cspmu_##name
403 static int arm_cspmu_init_impl_ops(struct arm_cspmu
*cspmu
)
406 struct acpi_apmt_node
*apmt_node
= arm_cspmu_apmt_node(cspmu
->dev
);
407 struct arm_cspmu_impl_match
*match
;
409 /* Start with a default PMU implementation */
410 cspmu
->impl
.module
= THIS_MODULE
;
411 cspmu
->impl
.pmiidr
= readl(cspmu
->base0
+ PMIIDR
);
412 cspmu
->impl
.ops
= (struct arm_cspmu_impl_ops
) {
413 DEFAULT_IMPL_OP(get_event_attrs
),
414 DEFAULT_IMPL_OP(get_format_attrs
),
415 DEFAULT_IMPL_OP(get_identifier
),
416 DEFAULT_IMPL_OP(get_name
),
417 DEFAULT_IMPL_OP(is_cycle_counter_event
),
418 DEFAULT_IMPL_OP(event_type
),
419 DEFAULT_IMPL_OP(event_filter
),
420 DEFAULT_IMPL_OP(set_ev_filter
),
421 DEFAULT_IMPL_OP(event_attr_is_visible
),
424 /* Firmware may override implementer/product ID from PMIIDR */
425 if (apmt_node
&& apmt_node
->impl_id
)
426 cspmu
->impl
.pmiidr
= apmt_node
->impl_id
;
428 /* Find implementer specific attribute ops. */
429 match
= arm_cspmu_impl_match_get(cspmu
->impl
.pmiidr
);
431 /* Load implementer module and initialize the callbacks. */
433 mutex_lock(&arm_cspmu_lock
);
435 if (match
->impl_init_ops
) {
436 /* Prevent unload until PMU registration is done. */
437 if (try_module_get(match
->module
)) {
438 cspmu
->impl
.module
= match
->module
;
439 cspmu
->impl
.match
= match
;
440 ret
= match
->impl_init_ops(cspmu
);
442 module_put(match
->module
);
444 WARN(1, "arm_cspmu failed to get module: %s\n",
449 request_module_nowait(match
->module_name
);
453 mutex_unlock(&arm_cspmu_lock
);
459 static struct attribute_group
*
460 arm_cspmu_alloc_event_attr_group(struct arm_cspmu
*cspmu
)
462 struct attribute_group
*event_group
;
463 struct device
*dev
= cspmu
->dev
;
464 const struct arm_cspmu_impl_ops
*impl_ops
= &cspmu
->impl
.ops
;
467 devm_kzalloc(dev
, sizeof(struct attribute_group
), GFP_KERNEL
);
471 event_group
->name
= "events";
472 event_group
->is_visible
= impl_ops
->event_attr_is_visible
;
473 event_group
->attrs
= impl_ops
->get_event_attrs(cspmu
);
475 if (!event_group
->attrs
)
481 static struct attribute_group
*
482 arm_cspmu_alloc_format_attr_group(struct arm_cspmu
*cspmu
)
484 struct attribute_group
*format_group
;
485 struct device
*dev
= cspmu
->dev
;
488 devm_kzalloc(dev
, sizeof(struct attribute_group
), GFP_KERNEL
);
492 format_group
->name
= "format";
493 format_group
->attrs
= cspmu
->impl
.ops
.get_format_attrs(cspmu
);
495 if (!format_group
->attrs
)
501 static int arm_cspmu_alloc_attr_groups(struct arm_cspmu
*cspmu
)
503 const struct attribute_group
**attr_groups
= cspmu
->attr_groups
;
504 const struct arm_cspmu_impl_ops
*impl_ops
= &cspmu
->impl
.ops
;
506 cspmu
->identifier
= impl_ops
->get_identifier(cspmu
);
507 cspmu
->name
= impl_ops
->get_name(cspmu
);
509 if (!cspmu
->identifier
|| !cspmu
->name
)
512 attr_groups
[0] = arm_cspmu_alloc_event_attr_group(cspmu
);
513 attr_groups
[1] = arm_cspmu_alloc_format_attr_group(cspmu
);
514 attr_groups
[2] = &arm_cspmu_identifier_attr_group
;
515 attr_groups
[3] = &arm_cspmu_cpumask_attr_group
;
517 if (!attr_groups
[0] || !attr_groups
[1])
523 static inline void arm_cspmu_reset_counters(struct arm_cspmu
*cspmu
)
525 writel(PMCR_C
| PMCR_P
, cspmu
->base0
+ PMCR
);
528 static inline void arm_cspmu_start_counters(struct arm_cspmu
*cspmu
)
530 writel(PMCR_E
, cspmu
->base0
+ PMCR
);
533 static inline void arm_cspmu_stop_counters(struct arm_cspmu
*cspmu
)
535 writel(0, cspmu
->base0
+ PMCR
);
538 static void arm_cspmu_enable(struct pmu
*pmu
)
541 struct arm_cspmu
*cspmu
= to_arm_cspmu(pmu
);
543 disabled
= bitmap_empty(cspmu
->hw_events
.used_ctrs
,
544 cspmu
->num_logical_ctrs
);
549 arm_cspmu_start_counters(cspmu
);
552 static void arm_cspmu_disable(struct pmu
*pmu
)
554 struct arm_cspmu
*cspmu
= to_arm_cspmu(pmu
);
556 arm_cspmu_stop_counters(cspmu
);
559 static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events
*hw_events
,
560 struct perf_event
*event
)
563 struct arm_cspmu
*cspmu
= to_arm_cspmu(event
->pmu
);
565 if (supports_cycle_counter(cspmu
)) {
566 if (cspmu
->impl
.ops
.is_cycle_counter_event(event
)) {
567 /* Search for available cycle counter. */
568 if (test_and_set_bit(cspmu
->cycle_counter_logical_idx
,
569 hw_events
->used_ctrs
))
572 return cspmu
->cycle_counter_logical_idx
;
576 * Search a regular counter from the used counter bitmap.
577 * The cycle counter divides the bitmap into two parts. Search
578 * the first then second half to exclude the cycle counter bit.
580 idx
= find_first_zero_bit(hw_events
->used_ctrs
,
581 cspmu
->cycle_counter_logical_idx
);
582 if (idx
>= cspmu
->cycle_counter_logical_idx
) {
583 idx
= find_next_zero_bit(
584 hw_events
->used_ctrs
,
585 cspmu
->num_logical_ctrs
,
586 cspmu
->cycle_counter_logical_idx
+ 1);
589 idx
= find_first_zero_bit(hw_events
->used_ctrs
,
590 cspmu
->num_logical_ctrs
);
593 if (idx
>= cspmu
->num_logical_ctrs
)
596 if (cspmu
->impl
.ops
.validate_event
) {
597 ret
= cspmu
->impl
.ops
.validate_event(cspmu
, event
);
602 set_bit(idx
, hw_events
->used_ctrs
);
607 static bool arm_cspmu_validate_event(struct pmu
*pmu
,
608 struct arm_cspmu_hw_events
*hw_events
,
609 struct perf_event
*event
)
611 if (is_software_event(event
))
614 /* Reject groups spanning multiple HW PMUs. */
615 if (event
->pmu
!= pmu
)
618 return (arm_cspmu_get_event_idx(hw_events
, event
) >= 0);
622 * Make sure the group of events can be scheduled at once
625 static bool arm_cspmu_validate_group(struct perf_event
*event
)
627 struct perf_event
*sibling
, *leader
= event
->group_leader
;
628 struct arm_cspmu_hw_events fake_hw_events
;
630 if (event
->group_leader
== event
)
633 memset(&fake_hw_events
, 0, sizeof(fake_hw_events
));
635 if (!arm_cspmu_validate_event(event
->pmu
, &fake_hw_events
, leader
))
638 for_each_sibling_event(sibling
, leader
) {
639 if (!arm_cspmu_validate_event(event
->pmu
, &fake_hw_events
,
644 return arm_cspmu_validate_event(event
->pmu
, &fake_hw_events
, event
);
647 static int arm_cspmu_event_init(struct perf_event
*event
)
649 struct arm_cspmu
*cspmu
;
650 struct hw_perf_event
*hwc
= &event
->hw
;
652 cspmu
= to_arm_cspmu(event
->pmu
);
654 if (event
->attr
.type
!= event
->pmu
->type
)
658 * Following other "uncore" PMUs, we do not support sampling mode or
659 * attach to a task (per-process mode).
661 if (is_sampling_event(event
)) {
662 dev_dbg(cspmu
->pmu
.dev
,
663 "Can't support sampling events\n");
667 if (event
->cpu
< 0 || event
->attach_state
& PERF_ATTACH_TASK
) {
668 dev_dbg(cspmu
->pmu
.dev
,
669 "Can't support per-task counters\n");
674 * Make sure the CPU assignment is on one of the CPUs associated with
677 if (!cpumask_test_cpu(event
->cpu
, &cspmu
->associated_cpus
)) {
678 dev_dbg(cspmu
->pmu
.dev
,
679 "Requested cpu is not associated with the PMU\n");
683 /* Enforce the current active CPU to handle the events in this PMU. */
684 event
->cpu
= cpumask_first(&cspmu
->active_cpu
);
685 if (event
->cpu
>= nr_cpu_ids
)
688 if (!arm_cspmu_validate_group(event
))
692 * The logical counter id is tracked with hw_perf_event.extra_reg.idx.
693 * The physical counter id is tracked with hw_perf_event.idx.
694 * We don't assign an index until we actually place the event onto
695 * hardware. Use -1 to signify that we haven't decided where to put it
699 hwc
->extra_reg
.idx
= -1;
700 hwc
->config
= cspmu
->impl
.ops
.event_type(event
);
705 static inline u32
counter_offset(u32 reg_sz
, u32 ctr_idx
)
707 return (PMEVCNTR_LO
+ (reg_sz
* ctr_idx
));
710 static void arm_cspmu_write_counter(struct perf_event
*event
, u64 val
)
713 struct arm_cspmu
*cspmu
= to_arm_cspmu(event
->pmu
);
715 if (use_64b_counter_reg(cspmu
)) {
716 offset
= counter_offset(sizeof(u64
), event
->hw
.idx
);
718 if (cspmu
->has_atomic_dword
)
719 writeq(val
, cspmu
->base1
+ offset
);
721 lo_hi_writeq(val
, cspmu
->base1
+ offset
);
723 offset
= counter_offset(sizeof(u32
), event
->hw
.idx
);
725 writel(lower_32_bits(val
), cspmu
->base1
+ offset
);
729 static u64
arm_cspmu_read_counter(struct perf_event
*event
)
732 const void __iomem
*counter_addr
;
733 struct arm_cspmu
*cspmu
= to_arm_cspmu(event
->pmu
);
735 if (use_64b_counter_reg(cspmu
)) {
736 offset
= counter_offset(sizeof(u64
), event
->hw
.idx
);
737 counter_addr
= cspmu
->base1
+ offset
;
739 return cspmu
->has_atomic_dword
?
740 readq(counter_addr
) :
741 read_reg64_hilohi(counter_addr
, HILOHI_MAX_POLL
);
744 offset
= counter_offset(sizeof(u32
), event
->hw
.idx
);
745 return readl(cspmu
->base1
+ offset
);
749 * arm_cspmu_set_event_period: Set the period for the counter.
751 * To handle cases of extreme interrupt latency, we program
752 * the counter with half of the max count for the counters.
754 static void arm_cspmu_set_event_period(struct perf_event
*event
)
756 struct arm_cspmu
*cspmu
= to_arm_cspmu(event
->pmu
);
757 u64 val
= counter_mask(cspmu
) >> 1ULL;
759 local64_set(&event
->hw
.prev_count
, val
);
760 arm_cspmu_write_counter(event
, val
);
763 static void arm_cspmu_enable_counter(struct arm_cspmu
*cspmu
, int idx
)
765 u32 reg_id
, reg_bit
, inten_off
, cnten_off
;
767 reg_id
= COUNTER_TO_SET_CLR_ID(idx
);
768 reg_bit
= COUNTER_TO_SET_CLR_BIT(idx
);
770 inten_off
= PMINTENSET
+ (4 * reg_id
);
771 cnten_off
= PMCNTENSET
+ (4 * reg_id
);
773 writel(BIT(reg_bit
), cspmu
->base0
+ inten_off
);
774 writel(BIT(reg_bit
), cspmu
->base0
+ cnten_off
);
777 static void arm_cspmu_disable_counter(struct arm_cspmu
*cspmu
, int idx
)
779 u32 reg_id
, reg_bit
, inten_off
, cnten_off
;
781 reg_id
= COUNTER_TO_SET_CLR_ID(idx
);
782 reg_bit
= COUNTER_TO_SET_CLR_BIT(idx
);
784 inten_off
= PMINTENCLR
+ (4 * reg_id
);
785 cnten_off
= PMCNTENCLR
+ (4 * reg_id
);
787 writel(BIT(reg_bit
), cspmu
->base0
+ cnten_off
);
788 writel(BIT(reg_bit
), cspmu
->base0
+ inten_off
);
791 static void arm_cspmu_event_update(struct perf_event
*event
)
793 struct arm_cspmu
*cspmu
= to_arm_cspmu(event
->pmu
);
794 struct hw_perf_event
*hwc
= &event
->hw
;
795 u64 delta
, prev
, now
;
798 prev
= local64_read(&hwc
->prev_count
);
799 now
= arm_cspmu_read_counter(event
);
800 } while (local64_cmpxchg(&hwc
->prev_count
, prev
, now
) != prev
);
802 delta
= (now
- prev
) & counter_mask(cspmu
);
803 local64_add(delta
, &event
->count
);
806 static inline void arm_cspmu_set_event(struct arm_cspmu
*cspmu
,
807 struct hw_perf_event
*hwc
)
809 u32 offset
= PMEVTYPER
+ (4 * hwc
->idx
);
811 writel(hwc
->config
, cspmu
->base0
+ offset
);
814 static void arm_cspmu_set_ev_filter(struct arm_cspmu
*cspmu
,
815 struct hw_perf_event
*hwc
,
818 u32 offset
= PMEVFILTR
+ (4 * hwc
->idx
);
820 writel(filter
, cspmu
->base0
+ offset
);
823 static inline void arm_cspmu_set_cc_filter(struct arm_cspmu
*cspmu
, u32 filter
)
825 u32 offset
= PMCCFILTR
;
827 writel(filter
, cspmu
->base0
+ offset
);
830 static void arm_cspmu_start(struct perf_event
*event
, int pmu_flags
)
832 struct arm_cspmu
*cspmu
= to_arm_cspmu(event
->pmu
);
833 struct hw_perf_event
*hwc
= &event
->hw
;
836 /* We always reprogram the counter */
837 if (pmu_flags
& PERF_EF_RELOAD
)
838 WARN_ON(!(hwc
->state
& PERF_HES_UPTODATE
));
840 arm_cspmu_set_event_period(event
);
842 filter
= cspmu
->impl
.ops
.event_filter(event
);
844 if (event
->hw
.extra_reg
.idx
== cspmu
->cycle_counter_logical_idx
) {
845 arm_cspmu_set_cc_filter(cspmu
, filter
);
847 arm_cspmu_set_event(cspmu
, hwc
);
848 cspmu
->impl
.ops
.set_ev_filter(cspmu
, hwc
, filter
);
853 arm_cspmu_enable_counter(cspmu
, hwc
->idx
);
856 static void arm_cspmu_stop(struct perf_event
*event
, int pmu_flags
)
858 struct arm_cspmu
*cspmu
= to_arm_cspmu(event
->pmu
);
859 struct hw_perf_event
*hwc
= &event
->hw
;
861 if (hwc
->state
& PERF_HES_STOPPED
)
864 arm_cspmu_disable_counter(cspmu
, hwc
->idx
);
865 arm_cspmu_event_update(event
);
867 hwc
->state
|= PERF_HES_STOPPED
| PERF_HES_UPTODATE
;
870 static inline u32
to_phys_idx(struct arm_cspmu
*cspmu
, u32 idx
)
872 return (idx
== cspmu
->cycle_counter_logical_idx
) ?
873 ARM_CSPMU_CYCLE_CNTR_IDX
: idx
;
876 static int arm_cspmu_add(struct perf_event
*event
, int flags
)
878 struct arm_cspmu
*cspmu
= to_arm_cspmu(event
->pmu
);
879 struct arm_cspmu_hw_events
*hw_events
= &cspmu
->hw_events
;
880 struct hw_perf_event
*hwc
= &event
->hw
;
883 if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
884 &cspmu
->associated_cpus
)))
887 idx
= arm_cspmu_get_event_idx(hw_events
, event
);
891 hw_events
->events
[idx
] = event
;
892 hwc
->idx
= to_phys_idx(cspmu
, idx
);
893 hwc
->extra_reg
.idx
= idx
;
894 hwc
->state
= PERF_HES_STOPPED
| PERF_HES_UPTODATE
;
896 if (flags
& PERF_EF_START
)
897 arm_cspmu_start(event
, PERF_EF_RELOAD
);
899 /* Propagate changes to the userspace mapping. */
900 perf_event_update_userpage(event
);
905 static void arm_cspmu_del(struct perf_event
*event
, int flags
)
907 struct arm_cspmu
*cspmu
= to_arm_cspmu(event
->pmu
);
908 struct arm_cspmu_hw_events
*hw_events
= &cspmu
->hw_events
;
909 struct hw_perf_event
*hwc
= &event
->hw
;
910 int idx
= hwc
->extra_reg
.idx
;
912 arm_cspmu_stop(event
, PERF_EF_UPDATE
);
914 hw_events
->events
[idx
] = NULL
;
916 clear_bit(idx
, hw_events
->used_ctrs
);
918 perf_event_update_userpage(event
);
921 static void arm_cspmu_read(struct perf_event
*event
)
923 arm_cspmu_event_update(event
);
926 static struct arm_cspmu
*arm_cspmu_alloc(struct platform_device
*pdev
)
928 struct acpi_apmt_node
*apmt_node
;
929 struct arm_cspmu
*cspmu
;
930 struct device
*dev
= &pdev
->dev
;
932 cspmu
= devm_kzalloc(dev
, sizeof(*cspmu
), GFP_KERNEL
);
937 platform_set_drvdata(pdev
, cspmu
);
939 apmt_node
= arm_cspmu_apmt_node(dev
);
941 cspmu
->has_atomic_dword
= apmt_node
->flags
& ACPI_APMT_FLAGS_ATOMIC
;
945 device_property_read_u32(dev
, "reg-io-width", &width
);
946 cspmu
->has_atomic_dword
= (width
== 8);
952 static int arm_cspmu_init_mmio(struct arm_cspmu
*cspmu
)
955 struct platform_device
*pdev
;
958 pdev
= to_platform_device(dev
);
960 /* Base address for page 0. */
961 cspmu
->base0
= devm_platform_ioremap_resource(pdev
, 0);
962 if (IS_ERR(cspmu
->base0
)) {
963 dev_err(dev
, "ioremap failed for page-0 resource\n");
964 return PTR_ERR(cspmu
->base0
);
967 /* Base address for page 1 if supported. Otherwise point to page 0. */
968 cspmu
->base1
= cspmu
->base0
;
969 if (platform_get_resource(pdev
, IORESOURCE_MEM
, 1)) {
970 cspmu
->base1
= devm_platform_ioremap_resource(pdev
, 1);
971 if (IS_ERR(cspmu
->base1
)) {
972 dev_err(dev
, "ioremap failed for page-1 resource\n");
973 return PTR_ERR(cspmu
->base1
);
977 cspmu
->pmcfgr
= readl(cspmu
->base0
+ PMCFGR
);
979 cspmu
->num_logical_ctrs
= FIELD_GET(PMCFGR_N
, cspmu
->pmcfgr
) + 1;
981 cspmu
->cycle_counter_logical_idx
= ARM_CSPMU_MAX_HW_CNTRS
;
983 if (supports_cycle_counter(cspmu
)) {
985 * The last logical counter is mapped to cycle counter if
986 * there is a gap between regular and cycle counter. Otherwise,
987 * logical and physical have 1-to-1 mapping.
989 cspmu
->cycle_counter_logical_idx
=
990 (cspmu
->num_logical_ctrs
<= ARM_CSPMU_CYCLE_CNTR_IDX
) ?
991 cspmu
->num_logical_ctrs
- 1 :
992 ARM_CSPMU_CYCLE_CNTR_IDX
;
995 cspmu
->num_set_clr_reg
=
996 DIV_ROUND_UP(cspmu
->num_logical_ctrs
,
997 ARM_CSPMU_SET_CLR_COUNTER_NUM
);
999 cspmu
->hw_events
.events
=
1000 devm_kcalloc(dev
, cspmu
->num_logical_ctrs
,
1001 sizeof(*cspmu
->hw_events
.events
), GFP_KERNEL
);
1003 if (!cspmu
->hw_events
.events
)
1009 static inline int arm_cspmu_get_reset_overflow(struct arm_cspmu
*cspmu
,
1013 u32 pmovclr_offset
= PMOVSCLR
;
1014 u32 has_overflowed
= 0;
1016 for (i
= 0; i
< cspmu
->num_set_clr_reg
; ++i
) {
1017 pmovs
[i
] = readl(cspmu
->base1
+ pmovclr_offset
);
1018 has_overflowed
|= pmovs
[i
];
1019 writel(pmovs
[i
], cspmu
->base1
+ pmovclr_offset
);
1020 pmovclr_offset
+= sizeof(u32
);
1023 return has_overflowed
!= 0;
1026 static irqreturn_t
arm_cspmu_handle_irq(int irq_num
, void *dev
)
1028 int idx
, has_overflowed
;
1029 struct perf_event
*event
;
1030 struct arm_cspmu
*cspmu
= dev
;
1031 DECLARE_BITMAP(pmovs
, ARM_CSPMU_MAX_HW_CNTRS
);
1032 bool handled
= false;
1034 arm_cspmu_stop_counters(cspmu
);
1036 has_overflowed
= arm_cspmu_get_reset_overflow(cspmu
, (u32
*)pmovs
);
1037 if (!has_overflowed
)
1040 for_each_set_bit(idx
, cspmu
->hw_events
.used_ctrs
,
1041 cspmu
->num_logical_ctrs
) {
1042 event
= cspmu
->hw_events
.events
[idx
];
1047 if (!test_bit(event
->hw
.idx
, pmovs
))
1050 arm_cspmu_event_update(event
);
1051 arm_cspmu_set_event_period(event
);
1057 arm_cspmu_start_counters(cspmu
);
1058 return IRQ_RETVAL(handled
);
1061 static int arm_cspmu_request_irq(struct arm_cspmu
*cspmu
)
1065 struct platform_device
*pdev
;
1068 pdev
= to_platform_device(dev
);
1070 /* Skip IRQ request if the PMU does not support overflow interrupt. */
1071 irq
= platform_get_irq_optional(pdev
, 0);
1073 return irq
== -ENXIO
? 0 : irq
;
1075 ret
= devm_request_irq(dev
, irq
, arm_cspmu_handle_irq
,
1076 IRQF_NOBALANCING
| IRQF_NO_THREAD
, dev_name(dev
),
1079 dev_err(dev
, "Could not request IRQ %d\n", irq
);
1088 #if defined(CONFIG_ACPI) && defined(CONFIG_ARM64)
1089 #include <acpi/processor.h>
1091 static inline int arm_cspmu_find_cpu_container(int cpu
, u32 container_uid
)
1093 struct device
*cpu_dev
;
1094 struct acpi_device
*acpi_dev
;
1096 cpu_dev
= get_cpu_device(cpu
);
1100 acpi_dev
= ACPI_COMPANION(cpu_dev
);
1102 if (acpi_dev_hid_uid_match(acpi_dev
, ACPI_PROCESSOR_CONTAINER_HID
, container_uid
))
1105 acpi_dev
= acpi_dev_parent(acpi_dev
);
1111 static int arm_cspmu_acpi_get_cpus(struct arm_cspmu
*cspmu
)
1113 struct acpi_apmt_node
*apmt_node
;
1117 apmt_node
= arm_cspmu_apmt_node(cspmu
->dev
);
1118 affinity_flag
= apmt_node
->flags
& ACPI_APMT_FLAGS_AFFINITY
;
1120 if (affinity_flag
== ACPI_APMT_FLAGS_AFFINITY_PROC
) {
1121 for_each_possible_cpu(cpu
) {
1122 if (apmt_node
->proc_affinity
==
1123 get_acpi_id_for_cpu(cpu
)) {
1124 cpumask_set_cpu(cpu
, &cspmu
->associated_cpus
);
1129 for_each_possible_cpu(cpu
) {
1130 if (arm_cspmu_find_cpu_container(
1131 cpu
, apmt_node
->proc_affinity
))
1134 cpumask_set_cpu(cpu
, &cspmu
->associated_cpus
);
1141 static int arm_cspmu_acpi_get_cpus(struct arm_cspmu
*cspmu
)
1147 static int arm_cspmu_of_get_cpus(struct arm_cspmu
*cspmu
)
1149 struct of_phandle_iterator it
;
1152 of_for_each_phandle(&it
, ret
, dev_of_node(cspmu
->dev
), "cpus", NULL
, 0) {
1153 cpu
= of_cpu_node_to_id(it
.node
);
1156 cpumask_set_cpu(cpu
, &cspmu
->associated_cpus
);
1158 return ret
== -ENOENT
? 0 : ret
;
1161 static int arm_cspmu_get_cpus(struct arm_cspmu
*cspmu
)
1165 if (arm_cspmu_apmt_node(cspmu
->dev
))
1166 ret
= arm_cspmu_acpi_get_cpus(cspmu
);
1167 else if (device_property_present(cspmu
->dev
, "cpus"))
1168 ret
= arm_cspmu_of_get_cpus(cspmu
);
1170 cpumask_copy(&cspmu
->associated_cpus
, cpu_possible_mask
);
1172 if (!ret
&& cpumask_empty(&cspmu
->associated_cpus
)) {
1173 dev_dbg(cspmu
->dev
, "No cpu associated with the PMU\n");
1179 static int arm_cspmu_register_pmu(struct arm_cspmu
*cspmu
)
1181 int ret
, capabilities
;
1183 ret
= arm_cspmu_alloc_attr_groups(cspmu
);
1187 ret
= cpuhp_state_add_instance(arm_cspmu_cpuhp_state
,
1188 &cspmu
->cpuhp_node
);
1192 capabilities
= PERF_PMU_CAP_NO_EXCLUDE
;
1193 if (cspmu
->irq
== 0)
1194 capabilities
|= PERF_PMU_CAP_NO_INTERRUPT
;
1196 cspmu
->pmu
= (struct pmu
){
1197 .task_ctx_nr
= perf_invalid_context
,
1198 .module
= cspmu
->impl
.module
,
1199 .parent
= cspmu
->dev
,
1200 .pmu_enable
= arm_cspmu_enable
,
1201 .pmu_disable
= arm_cspmu_disable
,
1202 .event_init
= arm_cspmu_event_init
,
1203 .add
= arm_cspmu_add
,
1204 .del
= arm_cspmu_del
,
1205 .start
= arm_cspmu_start
,
1206 .stop
= arm_cspmu_stop
,
1207 .read
= arm_cspmu_read
,
1208 .attr_groups
= cspmu
->attr_groups
,
1209 .capabilities
= capabilities
,
1212 /* Hardware counter init */
1213 arm_cspmu_reset_counters(cspmu
);
1215 ret
= perf_pmu_register(&cspmu
->pmu
, cspmu
->name
, -1);
1217 cpuhp_state_remove_instance(arm_cspmu_cpuhp_state
,
1218 &cspmu
->cpuhp_node
);
1224 static int arm_cspmu_device_probe(struct platform_device
*pdev
)
1227 struct arm_cspmu
*cspmu
;
1229 cspmu
= arm_cspmu_alloc(pdev
);
1233 ret
= arm_cspmu_init_mmio(cspmu
);
1237 ret
= arm_cspmu_request_irq(cspmu
);
1241 ret
= arm_cspmu_get_cpus(cspmu
);
1245 ret
= arm_cspmu_init_impl_ops(cspmu
);
1249 ret
= arm_cspmu_register_pmu(cspmu
);
1251 /* Matches arm_cspmu_init_impl_ops() above. */
1252 if (cspmu
->impl
.module
!= THIS_MODULE
)
1253 module_put(cspmu
->impl
.module
);
1258 static void arm_cspmu_device_remove(struct platform_device
*pdev
)
1260 struct arm_cspmu
*cspmu
= platform_get_drvdata(pdev
);
1262 perf_pmu_unregister(&cspmu
->pmu
);
1263 cpuhp_state_remove_instance(arm_cspmu_cpuhp_state
, &cspmu
->cpuhp_node
);
1266 static const struct platform_device_id arm_cspmu_id
[] = {
1270 MODULE_DEVICE_TABLE(platform
, arm_cspmu_id
);
1272 static const struct of_device_id arm_cspmu_of_match
[] = {
1273 { .compatible
= "arm,coresight-pmu" },
1276 MODULE_DEVICE_TABLE(of
, arm_cspmu_of_match
);
1278 static struct platform_driver arm_cspmu_driver
= {
1281 .of_match_table
= arm_cspmu_of_match
,
1282 .suppress_bind_attrs
= true,
1284 .probe
= arm_cspmu_device_probe
,
1285 .remove
= arm_cspmu_device_remove
,
1286 .id_table
= arm_cspmu_id
,
1289 static void arm_cspmu_set_active_cpu(int cpu
, struct arm_cspmu
*cspmu
)
1291 cpumask_set_cpu(cpu
, &cspmu
->active_cpu
);
1293 WARN_ON(irq_set_affinity(cspmu
->irq
, &cspmu
->active_cpu
));
1296 static int arm_cspmu_cpu_online(unsigned int cpu
, struct hlist_node
*node
)
1298 struct arm_cspmu
*cspmu
=
1299 hlist_entry_safe(node
, struct arm_cspmu
, cpuhp_node
);
1301 if (!cpumask_test_cpu(cpu
, &cspmu
->associated_cpus
))
1304 /* If the PMU is already managed, there is nothing to do */
1305 if (!cpumask_empty(&cspmu
->active_cpu
))
1308 /* Use this CPU for event counting */
1309 arm_cspmu_set_active_cpu(cpu
, cspmu
);
1314 static int arm_cspmu_cpu_teardown(unsigned int cpu
, struct hlist_node
*node
)
1318 struct arm_cspmu
*cspmu
=
1319 hlist_entry_safe(node
, struct arm_cspmu
, cpuhp_node
);
1321 /* Nothing to do if this CPU doesn't own the PMU */
1322 if (!cpumask_test_and_clear_cpu(cpu
, &cspmu
->active_cpu
))
1325 /* Choose a new CPU to migrate ownership of the PMU to */
1326 dst
= cpumask_any_and_but(&cspmu
->associated_cpus
,
1327 cpu_online_mask
, cpu
);
1328 if (dst
>= nr_cpu_ids
)
1331 /* Use this CPU for event counting */
1332 perf_pmu_migrate_context(&cspmu
->pmu
, cpu
, dst
);
1333 arm_cspmu_set_active_cpu(dst
, cspmu
);
1338 static int __init
arm_cspmu_init(void)
1342 ret
= cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN
,
1343 "perf/arm/cspmu:online",
1344 arm_cspmu_cpu_online
,
1345 arm_cspmu_cpu_teardown
);
1348 arm_cspmu_cpuhp_state
= ret
;
1349 return platform_driver_register(&arm_cspmu_driver
);
1352 static void __exit
arm_cspmu_exit(void)
1354 platform_driver_unregister(&arm_cspmu_driver
);
1355 cpuhp_remove_multi_state(arm_cspmu_cpuhp_state
);
1358 int arm_cspmu_impl_register(const struct arm_cspmu_impl_match
*impl_match
)
1360 struct arm_cspmu_impl_match
*match
;
1363 match
= arm_cspmu_impl_match_get(impl_match
->pmiidr_val
);
1366 mutex_lock(&arm_cspmu_lock
);
1368 if (!match
->impl_init_ops
) {
1369 match
->module
= impl_match
->module
;
1370 match
->impl_init_ops
= impl_match
->impl_init_ops
;
1372 /* Broken match table may contain non-unique entries */
1373 WARN(1, "arm_cspmu backend already registered for module: %s, pmiidr: 0x%x, mask: 0x%x\n",
1376 match
->pmiidr_mask
);
1381 mutex_unlock(&arm_cspmu_lock
);
1384 ret
= driver_attach(&arm_cspmu_driver
.driver
);
1386 pr_err("arm_cspmu reg failed, unable to find a match for pmiidr: 0x%x\n",
1387 impl_match
->pmiidr_val
);
1394 EXPORT_SYMBOL_GPL(arm_cspmu_impl_register
);
1396 static int arm_cspmu_match_device(struct device
*dev
, const void *match
)
1398 struct arm_cspmu
*cspmu
= platform_get_drvdata(to_platform_device(dev
));
1400 return (cspmu
&& cspmu
->impl
.match
== match
) ? 1 : 0;
1403 void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match
*impl_match
)
1406 struct arm_cspmu_impl_match
*match
;
1408 match
= arm_cspmu_impl_match_get(impl_match
->pmiidr_val
);
1410 if (WARN_ON(!match
))
1413 /* Unbind the driver from all matching backend devices. */
1414 while ((dev
= driver_find_device(&arm_cspmu_driver
.driver
, NULL
,
1415 match
, arm_cspmu_match_device
)))
1416 device_release_driver(dev
);
1418 mutex_lock(&arm_cspmu_lock
);
1420 match
->module
= NULL
;
1421 match
->impl_init_ops
= NULL
;
1423 mutex_unlock(&arm_cspmu_lock
);
1425 EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister
);
1427 module_init(arm_cspmu_init
);
1428 module_exit(arm_cspmu_exit
);
1430 MODULE_DESCRIPTION("ARM CoreSight Architecture Performance Monitor Driver");
1431 MODULE_LICENSE("GPL v2");