1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Performance counter support for POWER10 processors.
5 * Copyright 2020 Madhavan Srinivasan, IBM Corporation.
6 * Copyright 2020 Athira Rajeev, IBM Corporation.
9 #define pr_fmt(fmt) "power10-pmu: " fmt
11 #include "isa207-common.h"
14 * Raw event encoding for Power10:
16 * 60 56 52 48 44 40 36 32
17 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
18 * | | [ ] [ src_match ] [ src_mask ] | [ ] [ l2l3_sel ] [ thresh_ctl ]
20 * | | *- IFM (Linux) | | thresh start/stop -*
21 * | *- BHRB (Linux) | src_sel
22 * *- EBB (Linux) *invert_bit
24 * 28 24 20 16 12 8 4 0
25 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
26 * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] | m [ pmcxsel ]
29 * | | | *- L1/L2/L3 cache_sel | |*-radix_scope_qual
31 * | *- sampling mode for marked events *- combine
35 * Below uses IBM bit numbering.
37 * MMCR1[x:y] = unit (PMCxUNIT)
38 * MMCR1[24] = pmc1combine[0]
39 * MMCR1[25] = pmc1combine[1]
40 * MMCR1[26] = pmc2combine[0]
41 * MMCR1[27] = pmc2combine[1]
42 * MMCR1[28] = pmc3combine[0]
43 * MMCR1[29] = pmc3combine[1]
44 * MMCR1[30] = pmc4combine[0]
45 * MMCR1[31] = pmc4combine[1]
47 * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
48 * MMCR1[20:27] = thresh_ctl
49 * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
50 * MMCR1[20:27] = thresh_ctl
52 * MMCRA[48:55] = thresh_ctl (THRESH START/END)
55 * MMCRA[45:47] = thresh_sel
58 * MMCR2[56:60] = l2l3_sel[0:4]
60 * MMCR1[16] = cache_sel[0]
61 * MMCR1[17] = cache_sel[1]
62 * MMCR1[18] = radix_scope_qual
65 * MMCRA[63] = 1 (SAMPLE_ENABLE)
66 * MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG)
67 * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE)
72 * MMCRA[SDAR_MODE] = sdar_mode[0:1]
76 * Some power10 event codes.
78 #define EVENT(_name, _code) enum{_name = _code}
80 #include "power10-events-list.h"
84 /* MMCRA IFM bits - POWER10 */
85 #define POWER10_MMCRA_IFM1 0x0000000040000000UL
86 #define POWER10_MMCRA_IFM2 0x0000000080000000UL
87 #define POWER10_MMCRA_IFM3 0x00000000C0000000UL
88 #define POWER10_MMCRA_BHRB_MASK 0x00000000C0000000UL
90 extern u64 PERF_REG_EXTENDED_MASK
;
92 /* Table of alternatives, sorted by column 0 */
93 static const unsigned int power10_event_alternatives
[][MAX_ALT
] = {
94 { PM_RUN_CYC_ALT
, PM_RUN_CYC
},
95 { PM_RUN_INST_CMPL_ALT
, PM_RUN_INST_CMPL
},
98 static int power10_get_alternatives(u64 event
, unsigned int flags
, u64 alt
[])
102 num_alt
= isa207_get_alternatives(event
, alt
,
103 ARRAY_SIZE(power10_event_alternatives
), flags
,
104 power10_event_alternatives
);
109 GENERIC_EVENT_ATTR(cpu
-cycles
, PM_RUN_CYC
);
110 GENERIC_EVENT_ATTR(instructions
, PM_RUN_INST_CMPL
);
111 GENERIC_EVENT_ATTR(branch
-instructions
, PM_BR_CMPL
);
112 GENERIC_EVENT_ATTR(branch
-misses
, PM_BR_MPRED_CMPL
);
113 GENERIC_EVENT_ATTR(cache
-references
, PM_LD_REF_L1
);
114 GENERIC_EVENT_ATTR(cache
-misses
, PM_LD_MISS_L1
);
115 GENERIC_EVENT_ATTR(mem
-loads
, MEM_LOADS
);
116 GENERIC_EVENT_ATTR(mem
-stores
, MEM_STORES
);
117 GENERIC_EVENT_ATTR(branch
-instructions
, PM_BR_FIN
);
118 GENERIC_EVENT_ATTR(branch
-misses
, PM_MPRED_BR_FIN
);
119 GENERIC_EVENT_ATTR(cache
-misses
, PM_LD_DEMAND_MISS_L1_FIN
);
121 CACHE_EVENT_ATTR(L1
-dcache
-load
-misses
, PM_LD_MISS_L1
);
122 CACHE_EVENT_ATTR(L1
-dcache
-loads
, PM_LD_REF_L1
);
123 CACHE_EVENT_ATTR(L1
-dcache
-prefetches
, PM_LD_PREFETCH_CACHE_LINE_MISS
);
124 CACHE_EVENT_ATTR(L1
-dcache
-store
-misses
, PM_ST_MISS_L1
);
125 CACHE_EVENT_ATTR(L1
-icache
-load
-misses
, PM_L1_ICACHE_MISS
);
126 CACHE_EVENT_ATTR(L1
-icache
-loads
, PM_INST_FROM_L1
);
127 CACHE_EVENT_ATTR(L1
-icache
-prefetches
, PM_IC_PREF_REQ
);
128 CACHE_EVENT_ATTR(LLC
-load
-misses
, PM_DATA_FROM_L3MISS
);
129 CACHE_EVENT_ATTR(LLC
-loads
, PM_DATA_FROM_L3
);
130 CACHE_EVENT_ATTR(LLC
-prefetches
, PM_L3_PF_MISS_L3
);
131 CACHE_EVENT_ATTR(LLC
-store
-misses
, PM_L2_ST_MISS
);
132 CACHE_EVENT_ATTR(LLC
-stores
, PM_L2_ST
);
133 CACHE_EVENT_ATTR(branch
-load
-misses
, PM_BR_MPRED_CMPL
);
134 CACHE_EVENT_ATTR(branch
-loads
, PM_BR_CMPL
);
135 CACHE_EVENT_ATTR(dTLB
-load
-misses
, PM_DTLB_MISS
);
136 CACHE_EVENT_ATTR(iTLB
-load
-misses
, PM_ITLB_MISS
);
138 static struct attribute
*power10_events_attr_dd1
[] = {
139 GENERIC_EVENT_PTR(PM_RUN_CYC
),
140 GENERIC_EVENT_PTR(PM_RUN_INST_CMPL
),
141 GENERIC_EVENT_PTR(PM_BR_CMPL
),
142 GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL
),
143 GENERIC_EVENT_PTR(PM_LD_REF_L1
),
144 GENERIC_EVENT_PTR(PM_LD_MISS_L1
),
145 GENERIC_EVENT_PTR(MEM_LOADS
),
146 GENERIC_EVENT_PTR(MEM_STORES
),
147 CACHE_EVENT_PTR(PM_LD_MISS_L1
),
148 CACHE_EVENT_PTR(PM_LD_REF_L1
),
149 CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS
),
150 CACHE_EVENT_PTR(PM_ST_MISS_L1
),
151 CACHE_EVENT_PTR(PM_L1_ICACHE_MISS
),
152 CACHE_EVENT_PTR(PM_INST_FROM_L1
),
153 CACHE_EVENT_PTR(PM_IC_PREF_REQ
),
154 CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS
),
155 CACHE_EVENT_PTR(PM_DATA_FROM_L3
),
156 CACHE_EVENT_PTR(PM_BR_MPRED_CMPL
),
157 CACHE_EVENT_PTR(PM_BR_CMPL
),
158 CACHE_EVENT_PTR(PM_DTLB_MISS
),
159 CACHE_EVENT_PTR(PM_ITLB_MISS
),
163 static struct attribute
*power10_events_attr
[] = {
164 GENERIC_EVENT_PTR(PM_RUN_CYC
),
165 GENERIC_EVENT_PTR(PM_RUN_INST_CMPL
),
166 GENERIC_EVENT_PTR(PM_BR_FIN
),
167 GENERIC_EVENT_PTR(PM_MPRED_BR_FIN
),
168 GENERIC_EVENT_PTR(PM_LD_REF_L1
),
169 GENERIC_EVENT_PTR(PM_LD_DEMAND_MISS_L1_FIN
),
170 GENERIC_EVENT_PTR(MEM_LOADS
),
171 GENERIC_EVENT_PTR(MEM_STORES
),
172 CACHE_EVENT_PTR(PM_LD_MISS_L1
),
173 CACHE_EVENT_PTR(PM_LD_REF_L1
),
174 CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS
),
175 CACHE_EVENT_PTR(PM_ST_MISS_L1
),
176 CACHE_EVENT_PTR(PM_L1_ICACHE_MISS
),
177 CACHE_EVENT_PTR(PM_INST_FROM_L1
),
178 CACHE_EVENT_PTR(PM_IC_PREF_REQ
),
179 CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS
),
180 CACHE_EVENT_PTR(PM_DATA_FROM_L3
),
181 CACHE_EVENT_PTR(PM_L3_PF_MISS_L3
),
182 CACHE_EVENT_PTR(PM_L2_ST_MISS
),
183 CACHE_EVENT_PTR(PM_L2_ST
),
184 CACHE_EVENT_PTR(PM_BR_MPRED_CMPL
),
185 CACHE_EVENT_PTR(PM_BR_CMPL
),
186 CACHE_EVENT_PTR(PM_DTLB_MISS
),
187 CACHE_EVENT_PTR(PM_ITLB_MISS
),
191 static struct attribute_group power10_pmu_events_group_dd1
= {
193 .attrs
= power10_events_attr_dd1
,
196 static struct attribute_group power10_pmu_events_group
= {
198 .attrs
= power10_events_attr
,
201 PMU_FORMAT_ATTR(event
, "config:0-59");
202 PMU_FORMAT_ATTR(pmcxsel
, "config:0-7");
203 PMU_FORMAT_ATTR(mark
, "config:8");
204 PMU_FORMAT_ATTR(combine
, "config:10-11");
205 PMU_FORMAT_ATTR(unit
, "config:12-15");
206 PMU_FORMAT_ATTR(pmc
, "config:16-19");
207 PMU_FORMAT_ATTR(cache_sel
, "config:20-21");
208 PMU_FORMAT_ATTR(sdar_mode
, "config:22-23");
209 PMU_FORMAT_ATTR(sample_mode
, "config:24-28");
210 PMU_FORMAT_ATTR(thresh_sel
, "config:29-31");
211 PMU_FORMAT_ATTR(thresh_stop
, "config:32-35");
212 PMU_FORMAT_ATTR(thresh_start
, "config:36-39");
213 PMU_FORMAT_ATTR(l2l3_sel
, "config:40-44");
214 PMU_FORMAT_ATTR(src_sel
, "config:45-46");
215 PMU_FORMAT_ATTR(invert_bit
, "config:47");
216 PMU_FORMAT_ATTR(src_mask
, "config:48-53");
217 PMU_FORMAT_ATTR(src_match
, "config:54-59");
218 PMU_FORMAT_ATTR(radix_scope
, "config:9");
220 static struct attribute
*power10_pmu_format_attr
[] = {
221 &format_attr_event
.attr
,
222 &format_attr_pmcxsel
.attr
,
223 &format_attr_mark
.attr
,
224 &format_attr_combine
.attr
,
225 &format_attr_unit
.attr
,
226 &format_attr_pmc
.attr
,
227 &format_attr_cache_sel
.attr
,
228 &format_attr_sdar_mode
.attr
,
229 &format_attr_sample_mode
.attr
,
230 &format_attr_thresh_sel
.attr
,
231 &format_attr_thresh_stop
.attr
,
232 &format_attr_thresh_start
.attr
,
233 &format_attr_l2l3_sel
.attr
,
234 &format_attr_src_sel
.attr
,
235 &format_attr_invert_bit
.attr
,
236 &format_attr_src_mask
.attr
,
237 &format_attr_src_match
.attr
,
238 &format_attr_radix_scope
.attr
,
242 static struct attribute_group power10_pmu_format_group
= {
244 .attrs
= power10_pmu_format_attr
,
247 static const struct attribute_group
*power10_pmu_attr_groups_dd1
[] = {
248 &power10_pmu_format_group
,
249 &power10_pmu_events_group_dd1
,
253 static const struct attribute_group
*power10_pmu_attr_groups
[] = {
254 &power10_pmu_format_group
,
255 &power10_pmu_events_group
,
259 static int power10_generic_events_dd1
[] = {
260 [PERF_COUNT_HW_CPU_CYCLES
] = PM_RUN_CYC
,
261 [PERF_COUNT_HW_INSTRUCTIONS
] = PM_RUN_INST_CMPL
,
262 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS
] = PM_BR_CMPL
,
263 [PERF_COUNT_HW_BRANCH_MISSES
] = PM_BR_MPRED_CMPL
,
264 [PERF_COUNT_HW_CACHE_REFERENCES
] = PM_LD_REF_L1
,
265 [PERF_COUNT_HW_CACHE_MISSES
] = PM_LD_MISS_L1
,
268 static int power10_generic_events
[] = {
269 [PERF_COUNT_HW_CPU_CYCLES
] = PM_RUN_CYC
,
270 [PERF_COUNT_HW_INSTRUCTIONS
] = PM_RUN_INST_CMPL
,
271 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS
] = PM_BR_FIN
,
272 [PERF_COUNT_HW_BRANCH_MISSES
] = PM_MPRED_BR_FIN
,
273 [PERF_COUNT_HW_CACHE_REFERENCES
] = PM_LD_REF_L1
,
274 [PERF_COUNT_HW_CACHE_MISSES
] = PM_LD_DEMAND_MISS_L1_FIN
,
277 static u64
power10_bhrb_filter_map(u64 branch_sample_type
)
279 u64 pmu_bhrb_filter
= 0;
281 /* BHRB and regular PMU events share the same privilege state
282 * filter configuration. BHRB is always recorded along with a
283 * regular PMU event. As the privilege state filter is handled
284 * in the basic PMC configuration of the accompanying regular
285 * PMU event, we ignore any separate BHRB specific request.
288 /* No branch filter requested */
289 if (branch_sample_type
& PERF_SAMPLE_BRANCH_ANY
)
290 return pmu_bhrb_filter
;
292 /* Invalid branch filter options - HW does not support */
293 if (branch_sample_type
& PERF_SAMPLE_BRANCH_ANY_RETURN
)
296 if (branch_sample_type
& PERF_SAMPLE_BRANCH_IND_CALL
) {
297 pmu_bhrb_filter
|= POWER10_MMCRA_IFM2
;
298 return pmu_bhrb_filter
;
301 if (branch_sample_type
& PERF_SAMPLE_BRANCH_COND
) {
302 pmu_bhrb_filter
|= POWER10_MMCRA_IFM3
;
303 return pmu_bhrb_filter
;
306 if (branch_sample_type
& PERF_SAMPLE_BRANCH_CALL
)
309 if (branch_sample_type
& PERF_SAMPLE_BRANCH_ANY_CALL
) {
310 pmu_bhrb_filter
|= POWER10_MMCRA_IFM1
;
311 return pmu_bhrb_filter
;
314 /* Every thing else is unsupported */
318 static void power10_config_bhrb(u64 pmu_bhrb_filter
)
320 pmu_bhrb_filter
&= POWER10_MMCRA_BHRB_MASK
;
322 /* Enable BHRB filter in PMU */
323 mtspr(SPRN_MMCRA
, (mfspr(SPRN_MMCRA
) | pmu_bhrb_filter
));
326 #define C(x) PERF_COUNT_HW_CACHE_##x
329 * Table of generalized cache-related events.
330 * 0 means not supported, -1 means nonsensical, other values
333 static u64 power10_cache_events_dd1
[C(MAX
)][C(OP_MAX
)][C(RESULT_MAX
)] = {
336 [C(RESULT_ACCESS
)] = PM_LD_REF_L1
,
337 [C(RESULT_MISS
)] = PM_LD_MISS_L1
,
340 [C(RESULT_ACCESS
)] = 0,
341 [C(RESULT_MISS
)] = PM_ST_MISS_L1
,
344 [C(RESULT_ACCESS
)] = PM_LD_PREFETCH_CACHE_LINE_MISS
,
345 [C(RESULT_MISS
)] = 0,
350 [C(RESULT_ACCESS
)] = PM_INST_FROM_L1
,
351 [C(RESULT_MISS
)] = PM_L1_ICACHE_MISS
,
354 [C(RESULT_ACCESS
)] = PM_INST_FROM_L1MISS
,
355 [C(RESULT_MISS
)] = -1,
358 [C(RESULT_ACCESS
)] = PM_IC_PREF_REQ
,
359 [C(RESULT_MISS
)] = 0,
364 [C(RESULT_ACCESS
)] = PM_DATA_FROM_L3
,
365 [C(RESULT_MISS
)] = PM_DATA_FROM_L3MISS
,
368 [C(RESULT_ACCESS
)] = -1,
369 [C(RESULT_MISS
)] = -1,
372 [C(RESULT_ACCESS
)] = -1,
373 [C(RESULT_MISS
)] = 0,
378 [C(RESULT_ACCESS
)] = 0,
379 [C(RESULT_MISS
)] = PM_DTLB_MISS
,
382 [C(RESULT_ACCESS
)] = -1,
383 [C(RESULT_MISS
)] = -1,
386 [C(RESULT_ACCESS
)] = -1,
387 [C(RESULT_MISS
)] = -1,
392 [C(RESULT_ACCESS
)] = 0,
393 [C(RESULT_MISS
)] = PM_ITLB_MISS
,
396 [C(RESULT_ACCESS
)] = -1,
397 [C(RESULT_MISS
)] = -1,
400 [C(RESULT_ACCESS
)] = -1,
401 [C(RESULT_MISS
)] = -1,
406 [C(RESULT_ACCESS
)] = PM_BR_CMPL
,
407 [C(RESULT_MISS
)] = PM_BR_MPRED_CMPL
,
410 [C(RESULT_ACCESS
)] = -1,
411 [C(RESULT_MISS
)] = -1,
414 [C(RESULT_ACCESS
)] = -1,
415 [C(RESULT_MISS
)] = -1,
420 [C(RESULT_ACCESS
)] = -1,
421 [C(RESULT_MISS
)] = -1,
424 [C(RESULT_ACCESS
)] = -1,
425 [C(RESULT_MISS
)] = -1,
428 [C(RESULT_ACCESS
)] = -1,
429 [C(RESULT_MISS
)] = -1,
434 static u64 power10_cache_events
[C(MAX
)][C(OP_MAX
)][C(RESULT_MAX
)] = {
437 [C(RESULT_ACCESS
)] = PM_LD_REF_L1
,
438 [C(RESULT_MISS
)] = PM_LD_MISS_L1
,
441 [C(RESULT_ACCESS
)] = 0,
442 [C(RESULT_MISS
)] = PM_ST_MISS_L1
,
445 [C(RESULT_ACCESS
)] = PM_LD_PREFETCH_CACHE_LINE_MISS
,
446 [C(RESULT_MISS
)] = 0,
451 [C(RESULT_ACCESS
)] = PM_INST_FROM_L1
,
452 [C(RESULT_MISS
)] = PM_L1_ICACHE_MISS
,
455 [C(RESULT_ACCESS
)] = PM_INST_FROM_L1MISS
,
456 [C(RESULT_MISS
)] = -1,
459 [C(RESULT_ACCESS
)] = PM_IC_PREF_REQ
,
460 [C(RESULT_MISS
)] = 0,
465 [C(RESULT_ACCESS
)] = PM_DATA_FROM_L3
,
466 [C(RESULT_MISS
)] = PM_DATA_FROM_L3MISS
,
469 [C(RESULT_ACCESS
)] = PM_L2_ST
,
470 [C(RESULT_MISS
)] = PM_L2_ST_MISS
,
473 [C(RESULT_ACCESS
)] = PM_L3_PF_MISS_L3
,
474 [C(RESULT_MISS
)] = 0,
479 [C(RESULT_ACCESS
)] = 0,
480 [C(RESULT_MISS
)] = PM_DTLB_MISS
,
483 [C(RESULT_ACCESS
)] = -1,
484 [C(RESULT_MISS
)] = -1,
487 [C(RESULT_ACCESS
)] = -1,
488 [C(RESULT_MISS
)] = -1,
493 [C(RESULT_ACCESS
)] = 0,
494 [C(RESULT_MISS
)] = PM_ITLB_MISS
,
497 [C(RESULT_ACCESS
)] = -1,
498 [C(RESULT_MISS
)] = -1,
501 [C(RESULT_ACCESS
)] = -1,
502 [C(RESULT_MISS
)] = -1,
507 [C(RESULT_ACCESS
)] = PM_BR_CMPL
,
508 [C(RESULT_MISS
)] = PM_BR_MPRED_CMPL
,
511 [C(RESULT_ACCESS
)] = -1,
512 [C(RESULT_MISS
)] = -1,
515 [C(RESULT_ACCESS
)] = -1,
516 [C(RESULT_MISS
)] = -1,
521 [C(RESULT_ACCESS
)] = -1,
522 [C(RESULT_MISS
)] = -1,
525 [C(RESULT_ACCESS
)] = -1,
526 [C(RESULT_MISS
)] = -1,
529 [C(RESULT_ACCESS
)] = -1,
530 [C(RESULT_MISS
)] = -1,
537 static struct power_pmu power10_pmu
= {
539 .n_counter
= MAX_PMU_COUNTERS
,
540 .add_fields
= ISA207_ADD_FIELDS
,
541 .test_adder
= ISA207_TEST_ADDER
,
542 .group_constraint_mask
= CNST_CACHE_PMC4_MASK
,
543 .group_constraint_val
= CNST_CACHE_PMC4_VAL
,
544 .compute_mmcr
= isa207_compute_mmcr
,
545 .config_bhrb
= power10_config_bhrb
,
546 .bhrb_filter_map
= power10_bhrb_filter_map
,
547 .get_constraint
= isa207_get_constraint
,
548 .get_alternatives
= power10_get_alternatives
,
549 .get_mem_data_src
= isa207_get_mem_data_src
,
550 .get_mem_weight
= isa207_get_mem_weight
,
551 .disable_pmc
= isa207_disable_pmc
,
552 .flags
= PPMU_HAS_SIER
| PPMU_ARCH_207S
|
554 .n_generic
= ARRAY_SIZE(power10_generic_events
),
555 .generic_events
= power10_generic_events
,
556 .cache_events
= &power10_cache_events
,
557 .attr_groups
= power10_pmu_attr_groups
,
559 .capabilities
= PERF_PMU_CAP_EXTENDED_REGS
,
562 int init_power10_pmu(void)
567 /* Comes from cpu_specs[] */
568 if (!cur_cpu_spec
->oprofile_cpu_type
||
569 strcmp(cur_cpu_spec
->oprofile_cpu_type
, "ppc64/power10"))
572 pvr
= mfspr(SPRN_PVR
);
573 /* Add the ppmu flag for power10 DD1 */
574 if ((PVR_CFG(pvr
) == 1))
575 power10_pmu
.flags
|= PPMU_P10_DD1
;
577 /* Set the PERF_REG_EXTENDED_MASK here */
578 PERF_REG_EXTENDED_MASK
= PERF_REG_PMU_MASK_31
;
580 if ((PVR_CFG(pvr
) == 1)) {
581 power10_pmu
.generic_events
= power10_generic_events_dd1
;
582 power10_pmu
.attr_groups
= power10_pmu_attr_groups_dd1
;
583 power10_pmu
.cache_events
= &power10_cache_events_dd1
;
586 rc
= register_power_pmu(&power10_pmu
);
590 /* Tell userspace that EBB is supported */
591 cur_cpu_spec
->cpu_user_features2
|= PPC_FEATURE2_EBB
;