1 // SPDX-License-Identifier: GPL-2.0-only
3 * Zhaoxin PMU; like Intel Architectural PerfMon-v2
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8 #include <linux/stddef.h>
9 #include <linux/types.h>
10 #include <linux/init.h>
11 #include <linux/slab.h>
12 #include <linux/export.h>
13 #include <linux/nmi.h>
15 #include <asm/cpufeature.h>
16 #include <asm/hardirq.h>
19 #include "../perf_event.h"
22 * Zhaoxin PerfMon, used on zxc and later.
24 static u64 zx_pmon_event_map
[PERF_COUNT_HW_MAX
] __read_mostly
= {
26 [PERF_COUNT_HW_CPU_CYCLES
] = 0x0082,
27 [PERF_COUNT_HW_INSTRUCTIONS
] = 0x00c0,
28 [PERF_COUNT_HW_CACHE_REFERENCES
] = 0x0515,
29 [PERF_COUNT_HW_CACHE_MISSES
] = 0x051a,
30 [PERF_COUNT_HW_BUS_CYCLES
] = 0x0083,
33 static struct event_constraint zxc_event_constraints
[] __read_mostly
= {
35 FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
39 static struct event_constraint zxd_event_constraints
[] __read_mostly
= {
41 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* retired instructions */
42 FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
43 FIXED_EVENT_CONSTRAINT(0x0083, 2), /* unhalted bus clock cycles */
47 static __initconst
const u64 zxd_hw_cache_event_ids
48 [PERF_COUNT_HW_CACHE_MAX
]
49 [PERF_COUNT_HW_CACHE_OP_MAX
]
50 [PERF_COUNT_HW_CACHE_RESULT_MAX
] = {
53 [C(RESULT_ACCESS
)] = 0x0042,
54 [C(RESULT_MISS
)] = 0x0538,
57 [C(RESULT_ACCESS
)] = 0x0043,
58 [C(RESULT_MISS
)] = 0x0562,
61 [C(RESULT_ACCESS
)] = -1,
62 [C(RESULT_MISS
)] = -1,
67 [C(RESULT_ACCESS
)] = 0x0300,
68 [C(RESULT_MISS
)] = 0x0301,
71 [C(RESULT_ACCESS
)] = -1,
72 [C(RESULT_MISS
)] = -1,
75 [C(RESULT_ACCESS
)] = 0x030a,
76 [C(RESULT_MISS
)] = 0x030b,
81 [C(RESULT_ACCESS
)] = -1,
82 [C(RESULT_MISS
)] = -1,
85 [C(RESULT_ACCESS
)] = -1,
86 [C(RESULT_MISS
)] = -1,
89 [C(RESULT_ACCESS
)] = -1,
90 [C(RESULT_MISS
)] = -1,
95 [C(RESULT_ACCESS
)] = 0x0042,
96 [C(RESULT_MISS
)] = 0x052c,
99 [C(RESULT_ACCESS
)] = 0x0043,
100 [C(RESULT_MISS
)] = 0x0530,
103 [C(RESULT_ACCESS
)] = 0x0564,
104 [C(RESULT_MISS
)] = 0x0565,
109 [C(RESULT_ACCESS
)] = 0x00c0,
110 [C(RESULT_MISS
)] = 0x0534,
113 [C(RESULT_ACCESS
)] = -1,
114 [C(RESULT_MISS
)] = -1,
117 [C(RESULT_ACCESS
)] = -1,
118 [C(RESULT_MISS
)] = -1,
123 [C(RESULT_ACCESS
)] = 0x0700,
124 [C(RESULT_MISS
)] = 0x0709,
127 [C(RESULT_ACCESS
)] = -1,
128 [C(RESULT_MISS
)] = -1,
131 [C(RESULT_ACCESS
)] = -1,
132 [C(RESULT_MISS
)] = -1,
137 [C(RESULT_ACCESS
)] = -1,
138 [C(RESULT_MISS
)] = -1,
141 [C(RESULT_ACCESS
)] = -1,
142 [C(RESULT_MISS
)] = -1,
145 [C(RESULT_ACCESS
)] = -1,
146 [C(RESULT_MISS
)] = -1,
151 static __initconst
const u64 zxe_hw_cache_event_ids
152 [PERF_COUNT_HW_CACHE_MAX
]
153 [PERF_COUNT_HW_CACHE_OP_MAX
]
154 [PERF_COUNT_HW_CACHE_RESULT_MAX
] = {
157 [C(RESULT_ACCESS
)] = 0x0568,
158 [C(RESULT_MISS
)] = 0x054b,
161 [C(RESULT_ACCESS
)] = 0x0669,
162 [C(RESULT_MISS
)] = 0x0562,
165 [C(RESULT_ACCESS
)] = -1,
166 [C(RESULT_MISS
)] = -1,
171 [C(RESULT_ACCESS
)] = 0x0300,
172 [C(RESULT_MISS
)] = 0x0301,
175 [C(RESULT_ACCESS
)] = -1,
176 [C(RESULT_MISS
)] = -1,
179 [C(RESULT_ACCESS
)] = 0x030a,
180 [C(RESULT_MISS
)] = 0x030b,
185 [C(RESULT_ACCESS
)] = 0x0,
186 [C(RESULT_MISS
)] = 0x0,
189 [C(RESULT_ACCESS
)] = 0x0,
190 [C(RESULT_MISS
)] = 0x0,
193 [C(RESULT_ACCESS
)] = 0x0,
194 [C(RESULT_MISS
)] = 0x0,
199 [C(RESULT_ACCESS
)] = 0x0568,
200 [C(RESULT_MISS
)] = 0x052c,
203 [C(RESULT_ACCESS
)] = 0x0669,
204 [C(RESULT_MISS
)] = 0x0530,
207 [C(RESULT_ACCESS
)] = 0x0564,
208 [C(RESULT_MISS
)] = 0x0565,
213 [C(RESULT_ACCESS
)] = 0x00c0,
214 [C(RESULT_MISS
)] = 0x0534,
217 [C(RESULT_ACCESS
)] = -1,
218 [C(RESULT_MISS
)] = -1,
221 [C(RESULT_ACCESS
)] = -1,
222 [C(RESULT_MISS
)] = -1,
227 [C(RESULT_ACCESS
)] = 0x0028,
228 [C(RESULT_MISS
)] = 0x0029,
231 [C(RESULT_ACCESS
)] = -1,
232 [C(RESULT_MISS
)] = -1,
235 [C(RESULT_ACCESS
)] = -1,
236 [C(RESULT_MISS
)] = -1,
241 [C(RESULT_ACCESS
)] = -1,
242 [C(RESULT_MISS
)] = -1,
245 [C(RESULT_ACCESS
)] = -1,
246 [C(RESULT_MISS
)] = -1,
249 [C(RESULT_ACCESS
)] = -1,
250 [C(RESULT_MISS
)] = -1,
255 static void zhaoxin_pmu_disable_all(void)
257 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL
, 0);
260 static void zhaoxin_pmu_enable_all(int added
)
262 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL
, x86_pmu
.intel_ctrl
);
265 static inline u64
zhaoxin_pmu_get_status(void)
269 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS
, status
);
274 static inline void zhaoxin_pmu_ack_status(u64 ack
)
276 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL
, ack
);
279 static inline void zxc_pmu_ack_status(u64 ack
)
282 * ZXC needs global control enabled in order to clear status bits.
284 zhaoxin_pmu_enable_all(0);
285 zhaoxin_pmu_ack_status(ack
);
286 zhaoxin_pmu_disable_all();
289 static void zhaoxin_pmu_disable_fixed(struct hw_perf_event
*hwc
)
291 int idx
= hwc
->idx
- INTEL_PMC_IDX_FIXED
;
294 mask
= 0xfULL
<< (idx
* 4);
296 rdmsrl(hwc
->config_base
, ctrl_val
);
298 wrmsrl(hwc
->config_base
, ctrl_val
);
301 static void zhaoxin_pmu_disable_event(struct perf_event
*event
)
303 struct hw_perf_event
*hwc
= &event
->hw
;
305 if (unlikely(hwc
->config_base
== MSR_ARCH_PERFMON_FIXED_CTR_CTRL
)) {
306 zhaoxin_pmu_disable_fixed(hwc
);
310 x86_pmu_disable_event(event
);
313 static void zhaoxin_pmu_enable_fixed(struct hw_perf_event
*hwc
)
315 int idx
= hwc
->idx
- INTEL_PMC_IDX_FIXED
;
316 u64 ctrl_val
, bits
, mask
;
319 * Enable IRQ generation (0x8),
320 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
324 if (hwc
->config
& ARCH_PERFMON_EVENTSEL_USR
)
326 if (hwc
->config
& ARCH_PERFMON_EVENTSEL_OS
)
330 mask
= 0xfULL
<< (idx
* 4);
332 rdmsrl(hwc
->config_base
, ctrl_val
);
335 wrmsrl(hwc
->config_base
, ctrl_val
);
338 static void zhaoxin_pmu_enable_event(struct perf_event
*event
)
340 struct hw_perf_event
*hwc
= &event
->hw
;
342 if (unlikely(hwc
->config_base
== MSR_ARCH_PERFMON_FIXED_CTR_CTRL
)) {
343 zhaoxin_pmu_enable_fixed(hwc
);
347 __x86_pmu_enable_event(hwc
, ARCH_PERFMON_EVENTSEL_ENABLE
);
351 * This handler is triggered by the local APIC, so the APIC IRQ handling
354 static int zhaoxin_pmu_handle_irq(struct pt_regs
*regs
)
356 struct perf_sample_data data
;
357 struct cpu_hw_events
*cpuc
;
362 cpuc
= this_cpu_ptr(&cpu_hw_events
);
363 apic_write(APIC_LVTPC
, APIC_DM_NMI
);
364 zhaoxin_pmu_disable_all();
365 status
= zhaoxin_pmu_get_status();
370 if (x86_pmu
.enabled_ack
)
371 zxc_pmu_ack_status(status
);
373 zhaoxin_pmu_ack_status(status
);
375 inc_irq_stat(apic_perf_irqs
);
378 * CondChgd bit 63 doesn't mean any overflow status. Ignore
381 if (__test_and_clear_bit(63, (unsigned long *)&status
)) {
386 for_each_set_bit(bit
, (unsigned long *)&status
, X86_PMC_IDX_MAX
) {
387 struct perf_event
*event
= cpuc
->events
[bit
];
391 if (!test_bit(bit
, cpuc
->active_mask
))
394 x86_perf_event_update(event
);
395 perf_sample_data_init(&data
, 0, event
->hw
.last_period
);
397 if (!x86_perf_event_set_period(event
))
400 if (perf_event_overflow(event
, &data
, regs
))
401 x86_pmu_stop(event
, 0);
405 * Repeat if there is more work to be done:
407 status
= zhaoxin_pmu_get_status();
412 zhaoxin_pmu_enable_all(0);
416 static u64
zhaoxin_pmu_event_map(int hw_event
)
418 return zx_pmon_event_map
[hw_event
];
421 static struct event_constraint
*
422 zhaoxin_get_event_constraints(struct cpu_hw_events
*cpuc
, int idx
,
423 struct perf_event
*event
)
425 struct event_constraint
*c
;
427 if (x86_pmu
.event_constraints
) {
428 for_each_event_constraint(c
, x86_pmu
.event_constraints
) {
429 if ((event
->hw
.config
& c
->cmask
) == c
->code
)
434 return &unconstrained
;
437 PMU_FORMAT_ATTR(event
, "config:0-7");
438 PMU_FORMAT_ATTR(umask
, "config:8-15");
439 PMU_FORMAT_ATTR(edge
, "config:18");
440 PMU_FORMAT_ATTR(inv
, "config:23");
441 PMU_FORMAT_ATTR(cmask
, "config:24-31");
443 static struct attribute
*zx_arch_formats_attr
[] = {
444 &format_attr_event
.attr
,
445 &format_attr_umask
.attr
,
446 &format_attr_edge
.attr
,
447 &format_attr_inv
.attr
,
448 &format_attr_cmask
.attr
,
452 static ssize_t
zhaoxin_event_sysfs_show(char *page
, u64 config
)
454 u64 event
= (config
& ARCH_PERFMON_EVENTSEL_EVENT
);
456 return x86_event_sysfs_show(page
, config
, event
);
459 static const struct x86_pmu zhaoxin_pmu __initconst
= {
461 .handle_irq
= zhaoxin_pmu_handle_irq
,
462 .disable_all
= zhaoxin_pmu_disable_all
,
463 .enable_all
= zhaoxin_pmu_enable_all
,
464 .enable
= zhaoxin_pmu_enable_event
,
465 .disable
= zhaoxin_pmu_disable_event
,
466 .hw_config
= x86_pmu_hw_config
,
467 .schedule_events
= x86_schedule_events
,
468 .eventsel
= MSR_ARCH_PERFMON_EVENTSEL0
,
469 .perfctr
= MSR_ARCH_PERFMON_PERFCTR0
,
470 .event_map
= zhaoxin_pmu_event_map
,
471 .max_events
= ARRAY_SIZE(zx_pmon_event_map
),
474 * For zxd/zxe, read/write operation for PMCx MSR is 48 bits.
476 .max_period
= (1ULL << 47) - 1,
477 .get_event_constraints
= zhaoxin_get_event_constraints
,
479 .format_attrs
= zx_arch_formats_attr
,
480 .events_sysfs_show
= zhaoxin_event_sysfs_show
,
483 static const struct { int id
; char *name
; } zx_arch_events_map
[] __initconst
= {
484 { PERF_COUNT_HW_CPU_CYCLES
, "cpu cycles" },
485 { PERF_COUNT_HW_INSTRUCTIONS
, "instructions" },
486 { PERF_COUNT_HW_BUS_CYCLES
, "bus cycles" },
487 { PERF_COUNT_HW_CACHE_REFERENCES
, "cache references" },
488 { PERF_COUNT_HW_CACHE_MISSES
, "cache misses" },
489 { PERF_COUNT_HW_BRANCH_INSTRUCTIONS
, "branch instructions" },
490 { PERF_COUNT_HW_BRANCH_MISSES
, "branch misses" },
493 static __init
void zhaoxin_arch_events_quirk(void)
497 /* disable event that reported as not presend by cpuid */
498 for_each_set_bit(bit
, x86_pmu
.events_mask
, ARRAY_SIZE(zx_arch_events_map
)) {
499 zx_pmon_event_map
[zx_arch_events_map
[bit
].id
] = 0;
500 pr_warn("CPUID marked event: \'%s\' unavailable\n",
501 zx_arch_events_map
[bit
].name
);
505 __init
int zhaoxin_pmu_init(void)
507 union cpuid10_edx edx
;
508 union cpuid10_eax eax
;
509 union cpuid10_ebx ebx
;
510 struct event_constraint
*c
;
514 pr_info("Welcome to zhaoxin pmu!\n");
517 * Check whether the Architectural PerfMon supports
520 cpuid(10, &eax
.full
, &ebx
.full
, &unused
, &edx
.full
);
522 if (eax
.split
.mask_length
< ARCH_PERFMON_EVENTS_COUNT
- 1)
525 version
= eax
.split
.version_id
;
529 x86_pmu
= zhaoxin_pmu
;
530 pr_info("Version check pass!\n");
532 x86_pmu
.version
= version
;
533 x86_pmu
.num_counters
= eax
.split
.num_counters
;
534 x86_pmu
.cntval_bits
= eax
.split
.bit_width
;
535 x86_pmu
.cntval_mask
= (1ULL << eax
.split
.bit_width
) - 1;
536 x86_pmu
.events_maskl
= ebx
.full
;
537 x86_pmu
.events_mask_len
= eax
.split
.mask_length
;
539 x86_pmu
.num_counters_fixed
= edx
.split
.num_counters_fixed
;
540 x86_add_quirk(zhaoxin_arch_events_quirk
);
542 switch (boot_cpu_data
.x86
) {
544 if (boot_cpu_data
.x86_model
== 0x0f || boot_cpu_data
.x86_model
== 0x19) {
546 x86_pmu
.max_period
= x86_pmu
.cntval_mask
>> 1;
548 /* Clearing status works only if the global control is enable on zxc. */
549 x86_pmu
.enabled_ack
= 1;
551 x86_pmu
.event_constraints
= zxc_event_constraints
;
552 zx_pmon_event_map
[PERF_COUNT_HW_INSTRUCTIONS
] = 0;
553 zx_pmon_event_map
[PERF_COUNT_HW_CACHE_REFERENCES
] = 0;
554 zx_pmon_event_map
[PERF_COUNT_HW_CACHE_MISSES
] = 0;
555 zx_pmon_event_map
[PERF_COUNT_HW_BUS_CYCLES
] = 0;
557 pr_cont("ZXC events, ");
563 zx_pmon_event_map
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
] =
564 X86_CONFIG(.event
= 0x01, .umask
= 0x01, .inv
= 0x01, .cmask
= 0x01);
566 zx_pmon_event_map
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND
] =
567 X86_CONFIG(.event
= 0x0f, .umask
= 0x04, .inv
= 0, .cmask
= 0);
569 switch (boot_cpu_data
.x86_model
) {
571 memcpy(hw_cache_event_ids
, zxd_hw_cache_event_ids
,
572 sizeof(hw_cache_event_ids
));
574 x86_pmu
.event_constraints
= zxd_event_constraints
;
576 zx_pmon_event_map
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS
] = 0x0700;
577 zx_pmon_event_map
[PERF_COUNT_HW_BRANCH_MISSES
] = 0x0709;
579 pr_cont("ZXD events, ");
582 memcpy(hw_cache_event_ids
, zxe_hw_cache_event_ids
,
583 sizeof(hw_cache_event_ids
));
585 x86_pmu
.event_constraints
= zxd_event_constraints
;
587 zx_pmon_event_map
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS
] = 0x0028;
588 zx_pmon_event_map
[PERF_COUNT_HW_BRANCH_MISSES
] = 0x0029;
590 pr_cont("ZXE events, ");
601 x86_pmu
.intel_ctrl
= (1 << (x86_pmu
.num_counters
)) - 1;
602 x86_pmu
.intel_ctrl
|= ((1LL << x86_pmu
.num_counters_fixed
)-1) << INTEL_PMC_IDX_FIXED
;
604 if (x86_pmu
.event_constraints
) {
605 for_each_event_constraint(c
, x86_pmu
.event_constraints
) {
606 c
->idxmsk64
|= (1ULL << x86_pmu
.num_counters
) - 1;
607 c
->weight
+= x86_pmu
.num_counters
;