1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2020 Intel Corporation. All rights rsvd. */
4 #include <linux/sched/task.h>
5 #include <linux/io-64-nonatomic-lo-hi.h>
10 * These attributes specify the bits in the config word that the perf
11 * syscall uses to pass the event ids and categories to perfmon.
13 DEFINE_PERFMON_FORMAT_ATTR(event_category
, "config:0-3");
14 DEFINE_PERFMON_FORMAT_ATTR(event
, "config:4-31");
17 * These attributes specify the bits in the config1 word that the perf
18 * syscall uses to pass filter data to perfmon.
20 DEFINE_PERFMON_FORMAT_ATTR(filter_wq
, "config1:0-31");
21 DEFINE_PERFMON_FORMAT_ATTR(filter_tc
, "config1:32-39");
22 DEFINE_PERFMON_FORMAT_ATTR(filter_pgsz
, "config1:40-43");
23 DEFINE_PERFMON_FORMAT_ATTR(filter_sz
, "config1:44-51");
24 DEFINE_PERFMON_FORMAT_ATTR(filter_eng
, "config1:52-59");
26 #define PERFMON_FILTERS_START 2
27 #define PERFMON_FILTERS_MAX 5
29 static struct attribute
*perfmon_format_attrs
[] = {
30 &format_attr_idxd_event_category
.attr
,
31 &format_attr_idxd_event
.attr
,
32 &format_attr_idxd_filter_wq
.attr
,
33 &format_attr_idxd_filter_tc
.attr
,
34 &format_attr_idxd_filter_pgsz
.attr
,
35 &format_attr_idxd_filter_sz
.attr
,
36 &format_attr_idxd_filter_eng
.attr
,
40 static struct attribute_group perfmon_format_attr_group
= {
42 .attrs
= perfmon_format_attrs
,
45 static const struct attribute_group
*perfmon_attr_groups
[] = {
46 &perfmon_format_attr_group
,
50 static bool is_idxd_event(struct idxd_pmu
*idxd_pmu
, struct perf_event
*event
)
52 return &idxd_pmu
->pmu
== event
->pmu
;
55 static int perfmon_collect_events(struct idxd_pmu
*idxd_pmu
,
56 struct perf_event
*leader
,
59 struct perf_event
*event
;
62 max_count
= idxd_pmu
->n_counters
;
63 n
= idxd_pmu
->n_events
;
68 if (is_idxd_event(idxd_pmu
, leader
)) {
69 idxd_pmu
->event_list
[n
] = leader
;
70 idxd_pmu
->event_list
[n
]->hw
.idx
= n
;
77 for_each_sibling_event(event
, leader
) {
78 if (!is_idxd_event(idxd_pmu
, event
) ||
79 event
->state
<= PERF_EVENT_STATE_OFF
)
85 idxd_pmu
->event_list
[n
] = event
;
86 idxd_pmu
->event_list
[n
]->hw
.idx
= n
;
93 static void perfmon_assign_hw_event(struct idxd_pmu
*idxd_pmu
,
94 struct perf_event
*event
, int idx
)
96 struct idxd_device
*idxd
= idxd_pmu
->idxd
;
97 struct hw_perf_event
*hwc
= &event
->hw
;
100 hwc
->config_base
= ioread64(CNTRCFG_REG(idxd
, idx
));
101 hwc
->event_base
= ioread64(CNTRCFG_REG(idxd
, idx
));
104 static int perfmon_assign_event(struct idxd_pmu
*idxd_pmu
,
105 struct perf_event
*event
)
109 for (i
= 0; i
< IDXD_PMU_EVENT_MAX
; i
++)
110 if (!test_and_set_bit(i
, idxd_pmu
->used_mask
))
117 * Check whether there are enough counters to satisfy that all the
118 * events in the group can actually be scheduled at the same time.
120 * To do this, create a fake idxd_pmu object so the event collection
121 * and assignment functions can be used without affecting the internal
122 * state of the real idxd_pmu object.
124 static int perfmon_validate_group(struct idxd_pmu
*pmu
,
125 struct perf_event
*event
)
127 struct perf_event
*leader
= event
->group_leader
;
128 struct idxd_pmu
*fake_pmu
;
129 int i
, ret
= 0, n
, idx
;
131 fake_pmu
= kzalloc(sizeof(*fake_pmu
), GFP_KERNEL
);
135 fake_pmu
->pmu
.name
= pmu
->pmu
.name
;
136 fake_pmu
->n_counters
= pmu
->n_counters
;
138 n
= perfmon_collect_events(fake_pmu
, leader
, true);
144 fake_pmu
->n_events
= n
;
145 n
= perfmon_collect_events(fake_pmu
, event
, false);
151 fake_pmu
->n_events
= n
;
153 for (i
= 0; i
< n
; i
++) {
154 event
= fake_pmu
->event_list
[i
];
156 idx
= perfmon_assign_event(fake_pmu
, event
);
168 static int perfmon_pmu_event_init(struct perf_event
*event
)
170 struct idxd_device
*idxd
;
173 idxd
= event_to_idxd(event
);
176 if (event
->attr
.type
!= event
->pmu
->type
)
179 /* sampling not supported */
180 if (event
->attr
.sample_period
)
186 if (event
->pmu
!= &idxd
->idxd_pmu
->pmu
)
189 event
->hw
.event_base
= ioread64(PERFMON_TABLE_OFFSET(idxd
));
190 event
->hw
.config
= event
->attr
.config
;
192 if (event
->group_leader
!= event
)
193 /* non-group events have themselves as leader */
194 ret
= perfmon_validate_group(idxd
->idxd_pmu
, event
);
199 static inline u64
perfmon_pmu_read_counter(struct perf_event
*event
)
201 struct hw_perf_event
*hwc
= &event
->hw
;
202 struct idxd_device
*idxd
;
205 idxd
= event_to_idxd(event
);
207 return ioread64(CNTRDATA_REG(idxd
, cntr
));
210 static void perfmon_pmu_event_update(struct perf_event
*event
)
212 struct idxd_device
*idxd
= event_to_idxd(event
);
213 u64 prev_raw_count
, new_raw_count
, delta
, p
, n
;
214 int shift
= 64 - idxd
->idxd_pmu
->counter_width
;
215 struct hw_perf_event
*hwc
= &event
->hw
;
217 prev_raw_count
= local64_read(&hwc
->prev_count
);
219 new_raw_count
= perfmon_pmu_read_counter(event
);
220 } while (!local64_try_cmpxchg(&hwc
->prev_count
,
221 &prev_raw_count
, new_raw_count
));
222 n
= (new_raw_count
<< shift
);
223 p
= (prev_raw_count
<< shift
);
225 delta
= ((n
- p
) >> shift
);
227 local64_add(delta
, &event
->count
);
230 void perfmon_counter_overflow(struct idxd_device
*idxd
)
232 int i
, n_counters
, max_loop
= OVERFLOW_SIZE
;
233 struct perf_event
*event
;
234 unsigned long ovfstatus
;
236 n_counters
= min(idxd
->idxd_pmu
->n_counters
, OVERFLOW_SIZE
);
238 ovfstatus
= ioread32(OVFSTATUS_REG(idxd
));
241 * While updating overflowed counters, other counters behind
242 * them could overflow and be missed in a given pass.
243 * Normally this could happen at most n_counters times, but in
244 * theory a tiny counter width could result in continual
245 * overflows and endless looping. max_loop provides a
246 * failsafe in that highly unlikely case.
248 while (ovfstatus
&& max_loop
--) {
249 /* Figure out which counter(s) overflowed */
250 for_each_set_bit(i
, &ovfstatus
, n_counters
) {
251 unsigned long ovfstatus_clear
= 0;
253 /* Update event->count for overflowed counter */
254 event
= idxd
->idxd_pmu
->event_list
[i
];
255 perfmon_pmu_event_update(event
);
256 /* Writing 1 to OVFSTATUS bit clears it */
257 set_bit(i
, &ovfstatus_clear
);
258 iowrite32(ovfstatus_clear
, OVFSTATUS_REG(idxd
));
261 ovfstatus
= ioread32(OVFSTATUS_REG(idxd
));
265 * Should never happen. If so, it means a counter(s) looped
266 * around twice while this handler was running.
268 WARN_ON_ONCE(ovfstatus
);
271 static inline void perfmon_reset_config(struct idxd_device
*idxd
)
273 iowrite32(CONFIG_RESET
, PERFRST_REG(idxd
));
274 iowrite32(0, OVFSTATUS_REG(idxd
));
275 iowrite32(0, PERFFRZ_REG(idxd
));
278 static inline void perfmon_reset_counters(struct idxd_device
*idxd
)
280 iowrite32(CNTR_RESET
, PERFRST_REG(idxd
));
283 static inline void perfmon_reset(struct idxd_device
*idxd
)
285 perfmon_reset_config(idxd
);
286 perfmon_reset_counters(idxd
);
289 static void perfmon_pmu_event_start(struct perf_event
*event
, int mode
)
291 u32 flt_wq
, flt_tc
, flt_pg_sz
, flt_xfer_sz
, flt_eng
= 0;
292 u64 cntr_cfg
, cntrdata
, event_enc
, event_cat
= 0;
293 struct hw_perf_event
*hwc
= &event
->hw
;
294 union filter_cfg flt_cfg
;
295 union event_cfg event_cfg
;
296 struct idxd_device
*idxd
;
299 idxd
= event_to_idxd(event
);
301 event
->hw
.idx
= hwc
->idx
;
304 /* Obtain event category and event value from user space */
305 event_cfg
.val
= event
->attr
.config
;
306 flt_cfg
.val
= event
->attr
.config1
;
307 event_cat
= event_cfg
.event_cat
;
308 event_enc
= event_cfg
.event_enc
;
310 /* Obtain filter configuration from user space */
313 flt_pg_sz
= flt_cfg
.pg_sz
;
314 flt_xfer_sz
= flt_cfg
.xfer_sz
;
315 flt_eng
= flt_cfg
.eng
;
317 if (flt_wq
&& test_bit(FLT_WQ
, &idxd
->idxd_pmu
->supported_filters
))
318 iowrite32(flt_wq
, FLTCFG_REG(idxd
, cntr
, FLT_WQ
));
319 if (flt_tc
&& test_bit(FLT_TC
, &idxd
->idxd_pmu
->supported_filters
))
320 iowrite32(flt_tc
, FLTCFG_REG(idxd
, cntr
, FLT_TC
));
321 if (flt_pg_sz
&& test_bit(FLT_PG_SZ
, &idxd
->idxd_pmu
->supported_filters
))
322 iowrite32(flt_pg_sz
, FLTCFG_REG(idxd
, cntr
, FLT_PG_SZ
));
323 if (flt_xfer_sz
&& test_bit(FLT_XFER_SZ
, &idxd
->idxd_pmu
->supported_filters
))
324 iowrite32(flt_xfer_sz
, FLTCFG_REG(idxd
, cntr
, FLT_XFER_SZ
));
325 if (flt_eng
&& test_bit(FLT_ENG
, &idxd
->idxd_pmu
->supported_filters
))
326 iowrite32(flt_eng
, FLTCFG_REG(idxd
, cntr
, FLT_ENG
));
328 /* Read the start value */
329 cntrdata
= ioread64(CNTRDATA_REG(idxd
, cntr
));
330 local64_set(&event
->hw
.prev_count
, cntrdata
);
332 /* Set counter to event/category */
333 cntr_cfg
= event_cat
<< CNTRCFG_CATEGORY_SHIFT
;
334 cntr_cfg
|= event_enc
<< CNTRCFG_EVENT_SHIFT
;
335 /* Set interrupt on overflow and counter enable bits */
336 cntr_cfg
|= (CNTRCFG_IRQ_OVERFLOW
| CNTRCFG_ENABLE
);
338 iowrite64(cntr_cfg
, CNTRCFG_REG(idxd
, cntr
));
341 static void perfmon_pmu_event_stop(struct perf_event
*event
, int mode
)
343 struct hw_perf_event
*hwc
= &event
->hw
;
344 struct idxd_device
*idxd
;
345 int i
, cntr
= hwc
->idx
;
348 idxd
= event_to_idxd(event
);
350 /* remove this event from event list */
351 for (i
= 0; i
< idxd
->idxd_pmu
->n_events
; i
++) {
352 if (event
!= idxd
->idxd_pmu
->event_list
[i
])
355 for (++i
; i
< idxd
->idxd_pmu
->n_events
; i
++)
356 idxd
->idxd_pmu
->event_list
[i
- 1] = idxd
->idxd_pmu
->event_list
[i
];
357 --idxd
->idxd_pmu
->n_events
;
361 cntr_cfg
= ioread64(CNTRCFG_REG(idxd
, cntr
));
362 cntr_cfg
&= ~CNTRCFG_ENABLE
;
363 iowrite64(cntr_cfg
, CNTRCFG_REG(idxd
, cntr
));
365 if (mode
== PERF_EF_UPDATE
)
366 perfmon_pmu_event_update(event
);
369 clear_bit(cntr
, idxd
->idxd_pmu
->used_mask
);
372 static void perfmon_pmu_event_del(struct perf_event
*event
, int mode
)
374 perfmon_pmu_event_stop(event
, PERF_EF_UPDATE
);
377 static int perfmon_pmu_event_add(struct perf_event
*event
, int flags
)
379 struct idxd_device
*idxd
= event_to_idxd(event
);
380 struct idxd_pmu
*idxd_pmu
= idxd
->idxd_pmu
;
381 struct hw_perf_event
*hwc
= &event
->hw
;
384 n
= perfmon_collect_events(idxd_pmu
, event
, false);
388 hwc
->state
= PERF_HES_UPTODATE
| PERF_HES_STOPPED
;
389 if (!(flags
& PERF_EF_START
))
390 hwc
->state
|= PERF_HES_ARCH
;
392 idx
= perfmon_assign_event(idxd_pmu
, event
);
396 perfmon_assign_hw_event(idxd_pmu
, event
, idx
);
398 if (flags
& PERF_EF_START
)
399 perfmon_pmu_event_start(event
, 0);
401 idxd_pmu
->n_events
= n
;
406 static void enable_perfmon_pmu(struct idxd_device
*idxd
)
408 iowrite32(COUNTER_UNFREEZE
, PERFFRZ_REG(idxd
));
411 static void disable_perfmon_pmu(struct idxd_device
*idxd
)
413 iowrite32(COUNTER_FREEZE
, PERFFRZ_REG(idxd
));
416 static void perfmon_pmu_enable(struct pmu
*pmu
)
418 struct idxd_device
*idxd
= pmu_to_idxd(pmu
);
420 enable_perfmon_pmu(idxd
);
423 static void perfmon_pmu_disable(struct pmu
*pmu
)
425 struct idxd_device
*idxd
= pmu_to_idxd(pmu
);
427 disable_perfmon_pmu(idxd
);
430 static void skip_filter(int i
)
434 for (j
= i
; j
< PERFMON_FILTERS_MAX
; j
++)
435 perfmon_format_attrs
[PERFMON_FILTERS_START
+ j
] =
436 perfmon_format_attrs
[PERFMON_FILTERS_START
+ j
+ 1];
439 static void idxd_pmu_init(struct idxd_pmu
*idxd_pmu
)
443 for (i
= 0 ; i
< PERFMON_FILTERS_MAX
; i
++) {
444 if (!test_bit(i
, &idxd_pmu
->supported_filters
))
448 idxd_pmu
->pmu
.name
= idxd_pmu
->name
;
449 idxd_pmu
->pmu
.attr_groups
= perfmon_attr_groups
;
450 idxd_pmu
->pmu
.task_ctx_nr
= perf_invalid_context
;
451 idxd_pmu
->pmu
.event_init
= perfmon_pmu_event_init
;
452 idxd_pmu
->pmu
.pmu_enable
= perfmon_pmu_enable
;
453 idxd_pmu
->pmu
.pmu_disable
= perfmon_pmu_disable
;
454 idxd_pmu
->pmu
.add
= perfmon_pmu_event_add
;
455 idxd_pmu
->pmu
.del
= perfmon_pmu_event_del
;
456 idxd_pmu
->pmu
.start
= perfmon_pmu_event_start
;
457 idxd_pmu
->pmu
.stop
= perfmon_pmu_event_stop
;
458 idxd_pmu
->pmu
.read
= perfmon_pmu_event_update
;
459 idxd_pmu
->pmu
.capabilities
= PERF_PMU_CAP_NO_EXCLUDE
;
460 idxd_pmu
->pmu
.scope
= PERF_PMU_SCOPE_SYS_WIDE
;
461 idxd_pmu
->pmu
.module
= THIS_MODULE
;
464 void perfmon_pmu_remove(struct idxd_device
*idxd
)
469 perf_pmu_unregister(&idxd
->idxd_pmu
->pmu
);
470 kfree(idxd
->idxd_pmu
);
471 idxd
->idxd_pmu
= NULL
;
474 int perfmon_pmu_init(struct idxd_device
*idxd
)
476 union idxd_perfcap perfcap
;
477 struct idxd_pmu
*idxd_pmu
;
481 * If perfmon_offset or num_counters is 0, it means perfmon is
482 * not supported on this hardware.
484 if (idxd
->perfmon_offset
== 0)
487 idxd_pmu
= kzalloc(sizeof(*idxd_pmu
), GFP_KERNEL
);
491 idxd_pmu
->idxd
= idxd
;
492 idxd
->idxd_pmu
= idxd_pmu
;
494 if (idxd
->data
->type
== IDXD_TYPE_DSA
) {
495 rc
= sprintf(idxd_pmu
->name
, "dsa%d", idxd
->id
);
498 } else if (idxd
->data
->type
== IDXD_TYPE_IAX
) {
499 rc
= sprintf(idxd_pmu
->name
, "iax%d", idxd
->id
);
508 perfcap
.bits
= ioread64(PERFCAP_REG(idxd
));
511 * If total perf counter is 0, stop further registration.
512 * This is necessary in order to support driver running on
513 * guest which does not have pmon support.
515 if (perfcap
.num_perf_counter
== 0)
518 /* A counter width of 0 means it can't count */
519 if (perfcap
.counter_width
== 0)
522 /* Overflow interrupt and counter freeze support must be available */
523 if (!perfcap
.overflow_interrupt
|| !perfcap
.counter_freeze
)
526 /* Number of event categories cannot be 0 */
527 if (perfcap
.num_event_category
== 0)
531 * We don't support per-counter capabilities for now.
533 if (perfcap
.cap_per_counter
)
536 idxd_pmu
->n_event_categories
= perfcap
.num_event_category
;
537 idxd_pmu
->supported_event_categories
= perfcap
.global_event_category
;
538 idxd_pmu
->per_counter_caps_supported
= perfcap
.cap_per_counter
;
540 /* check filter capability. If 0, then filters are not supported */
541 idxd_pmu
->supported_filters
= perfcap
.filter
;
543 idxd_pmu
->n_filters
= hweight8(perfcap
.filter
);
545 /* Store the total number of counters categories, and counter width */
546 idxd_pmu
->n_counters
= perfcap
.num_perf_counter
;
547 idxd_pmu
->counter_width
= perfcap
.counter_width
;
549 idxd_pmu_init(idxd_pmu
);
551 rc
= perf_pmu_register(&idxd_pmu
->pmu
, idxd_pmu
->name
, -1);
559 idxd
->idxd_pmu
= NULL
;