2 * SPDX-License-Identifier: MIT
4 * Copyright © 2017-2018 Intel Corporation
7 #include <linux/pm_runtime.h>
9 #include "gt/intel_engine.h"
10 #include "gt/intel_engine_pm.h"
11 #include "gt/intel_engine_user.h"
12 #include "gt/intel_gt_pm.h"
13 #include "gt/intel_rc6.h"
14 #include "gt/intel_rps.h"
20 /* Frequency for the sampling timer for events which need it. */
22 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
24 #define ENGINE_SAMPLE_MASK \
25 (BIT(I915_SAMPLE_BUSY) | \
26 BIT(I915_SAMPLE_WAIT) | \
27 BIT(I915_SAMPLE_SEMA))
29 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
31 static cpumask_t i915_pmu_cpumask
;
32 static unsigned int i915_pmu_target_cpu
= -1;
34 static u8
engine_config_sample(u64 config
)
36 return config
& I915_PMU_SAMPLE_MASK
;
39 static u8
engine_event_sample(struct perf_event
*event
)
41 return engine_config_sample(event
->attr
.config
);
44 static u8
engine_event_class(struct perf_event
*event
)
46 return (event
->attr
.config
>> I915_PMU_CLASS_SHIFT
) & 0xff;
49 static u8
engine_event_instance(struct perf_event
*event
)
51 return (event
->attr
.config
>> I915_PMU_SAMPLE_BITS
) & 0xff;
54 static bool is_engine_config(u64 config
)
56 return config
< __I915_PMU_OTHER(0);
59 static unsigned int config_enabled_bit(u64 config
)
61 if (is_engine_config(config
))
62 return engine_config_sample(config
);
64 return ENGINE_SAMPLE_BITS
+ (config
- __I915_PMU_OTHER(0));
67 static u64
config_enabled_mask(u64 config
)
69 return BIT_ULL(config_enabled_bit(config
));
72 static bool is_engine_event(struct perf_event
*event
)
74 return is_engine_config(event
->attr
.config
);
77 static unsigned int event_enabled_bit(struct perf_event
*event
)
79 return config_enabled_bit(event
->attr
.config
);
82 static bool pmu_needs_timer(struct i915_pmu
*pmu
, bool gpu_active
)
84 struct drm_i915_private
*i915
= container_of(pmu
, typeof(*i915
), pmu
);
88 * Only some counters need the sampling timer.
90 * We start with a bitmask of all currently enabled events.
95 * Mask out all the ones which do not need the timer, or in
96 * other words keep all the ones that could need the timer.
98 enable
&= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY
) |
99 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY
) |
103 * When the GPU is idle per-engine counters do not need to be
104 * running so clear those bits out.
107 enable
&= ~ENGINE_SAMPLE_MASK
;
109 * Also there is software busyness tracking available we do not
110 * need the timer for I915_SAMPLE_BUSY counter.
112 else if (i915
->caps
.scheduler
& I915_SCHEDULER_CAP_ENGINE_BUSY_STATS
)
113 enable
&= ~BIT(I915_SAMPLE_BUSY
);
116 * If some bits remain it means we need the sampling timer running.
121 static u64
__get_rc6(struct intel_gt
*gt
)
123 struct drm_i915_private
*i915
= gt
->i915
;
126 val
= intel_rc6_residency_ns(>
->rc6
,
127 IS_VALLEYVIEW(i915
) ?
132 val
+= intel_rc6_residency_ns(>
->rc6
, GEN6_GT_GFX_RC6p
);
135 val
+= intel_rc6_residency_ns(>
->rc6
, GEN6_GT_GFX_RC6pp
);
140 #if IS_ENABLED(CONFIG_PM)
142 static inline s64
ktime_since(const ktime_t kt
)
144 return ktime_to_ns(ktime_sub(ktime_get(), kt
));
147 static u64
get_rc6(struct intel_gt
*gt
)
149 struct drm_i915_private
*i915
= gt
->i915
;
150 struct i915_pmu
*pmu
= &i915
->pmu
;
155 if (intel_gt_pm_get_if_awake(gt
)) {
157 intel_gt_pm_put_async(gt
);
161 spin_lock_irqsave(&pmu
->lock
, flags
);
164 pmu
->sample
[__I915_SAMPLE_RC6
].cur
= val
;
167 * We think we are runtime suspended.
169 * Report the delta from when the device was suspended to now,
170 * on top of the last known real value, as the approximated RC6
173 val
= ktime_since(pmu
->sleep_last
);
174 val
+= pmu
->sample
[__I915_SAMPLE_RC6
].cur
;
177 if (val
< pmu
->sample
[__I915_SAMPLE_RC6_LAST_REPORTED
].cur
)
178 val
= pmu
->sample
[__I915_SAMPLE_RC6_LAST_REPORTED
].cur
;
180 pmu
->sample
[__I915_SAMPLE_RC6_LAST_REPORTED
].cur
= val
;
182 spin_unlock_irqrestore(&pmu
->lock
, flags
);
187 static void park_rc6(struct drm_i915_private
*i915
)
189 struct i915_pmu
*pmu
= &i915
->pmu
;
191 if (pmu
->enable
& config_enabled_mask(I915_PMU_RC6_RESIDENCY
))
192 pmu
->sample
[__I915_SAMPLE_RC6
].cur
= __get_rc6(&i915
->gt
);
194 pmu
->sleep_last
= ktime_get();
199 static u64
get_rc6(struct intel_gt
*gt
)
201 return __get_rc6(gt
);
204 static void park_rc6(struct drm_i915_private
*i915
) {}
208 static void __i915_pmu_maybe_start_timer(struct i915_pmu
*pmu
)
210 if (!pmu
->timer_enabled
&& pmu_needs_timer(pmu
, true)) {
211 pmu
->timer_enabled
= true;
212 pmu
->timer_last
= ktime_get();
213 hrtimer_start_range_ns(&pmu
->timer
,
214 ns_to_ktime(PERIOD
), 0,
215 HRTIMER_MODE_REL_PINNED
);
219 void i915_pmu_gt_parked(struct drm_i915_private
*i915
)
221 struct i915_pmu
*pmu
= &i915
->pmu
;
223 if (!pmu
->base
.event_init
)
226 spin_lock_irq(&pmu
->lock
);
231 * Signal sampling timer to stop if only engine events are enabled and
234 pmu
->timer_enabled
= pmu_needs_timer(pmu
, false);
236 spin_unlock_irq(&pmu
->lock
);
239 void i915_pmu_gt_unparked(struct drm_i915_private
*i915
)
241 struct i915_pmu
*pmu
= &i915
->pmu
;
243 if (!pmu
->base
.event_init
)
246 spin_lock_irq(&pmu
->lock
);
249 * Re-enable sampling timer when GPU goes active.
251 __i915_pmu_maybe_start_timer(pmu
);
253 spin_unlock_irq(&pmu
->lock
);
257 add_sample(struct i915_pmu_sample
*sample
, u32 val
)
262 static bool exclusive_mmio_access(const struct drm_i915_private
*i915
)
265 * We have to avoid concurrent mmio cache line access on gen7 or
266 * risk a machine hang. For a fun history lesson dig out the old
267 * userspace intel_gpu_top and run it on Ivybridge or Haswell!
269 return IS_GEN(i915
, 7);
272 static void engine_sample(struct intel_engine_cs
*engine
, unsigned int period_ns
)
274 struct intel_engine_pmu
*pmu
= &engine
->pmu
;
278 val
= ENGINE_READ_FW(engine
, RING_CTL
);
279 if (val
== 0) /* powerwell off => engine idle */
283 add_sample(&pmu
->sample
[I915_SAMPLE_WAIT
], period_ns
);
284 if (val
& RING_WAIT_SEMAPHORE
)
285 add_sample(&pmu
->sample
[I915_SAMPLE_SEMA
], period_ns
);
287 /* No need to sample when busy stats are supported. */
288 if (intel_engine_supports_stats(engine
))
292 * While waiting on a semaphore or event, MI_MODE reports the
293 * ring as idle. However, previously using the seqno, and with
294 * execlists sampling, we account for the ring waiting as the
295 * engine being busy. Therefore, we record the sample as being
296 * busy if either waiting or !idle.
298 busy
= val
& (RING_WAIT_SEMAPHORE
| RING_WAIT
);
300 val
= ENGINE_READ_FW(engine
, RING_MI_MODE
);
301 busy
= !(val
& MODE_IDLE
);
304 add_sample(&pmu
->sample
[I915_SAMPLE_BUSY
], period_ns
);
308 engines_sample(struct intel_gt
*gt
, unsigned int period_ns
)
310 struct drm_i915_private
*i915
= gt
->i915
;
311 struct intel_engine_cs
*engine
;
312 enum intel_engine_id id
;
315 if ((i915
->pmu
.enable
& ENGINE_SAMPLE_MASK
) == 0)
318 if (!intel_gt_pm_is_awake(gt
))
321 for_each_engine(engine
, gt
, id
) {
322 if (!intel_engine_pm_get_if_awake(engine
))
325 if (exclusive_mmio_access(i915
)) {
326 spin_lock_irqsave(&engine
->uncore
->lock
, flags
);
327 engine_sample(engine
, period_ns
);
328 spin_unlock_irqrestore(&engine
->uncore
->lock
, flags
);
330 engine_sample(engine
, period_ns
);
333 intel_engine_pm_put_async(engine
);
338 add_sample_mult(struct i915_pmu_sample
*sample
, u32 val
, u32 mul
)
340 sample
->cur
+= mul_u32_u32(val
, mul
);
343 static bool frequency_sampling_enabled(struct i915_pmu
*pmu
)
346 (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY
) |
347 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY
));
351 frequency_sample(struct intel_gt
*gt
, unsigned int period_ns
)
353 struct drm_i915_private
*i915
= gt
->i915
;
354 struct intel_uncore
*uncore
= gt
->uncore
;
355 struct i915_pmu
*pmu
= &i915
->pmu
;
356 struct intel_rps
*rps
= >
->rps
;
358 if (!frequency_sampling_enabled(pmu
))
361 /* Report 0/0 (actual/requested) frequency while parked. */
362 if (!intel_gt_pm_get_if_awake(gt
))
365 if (pmu
->enable
& config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY
)) {
369 * We take a quick peek here without using forcewake
370 * so that we don't perturb the system under observation
371 * (forcewake => !rc6 => increased power use). We expect
372 * that if the read fails because it is outside of the
373 * mmio power well, then it will return 0 -- in which
374 * case we assume the system is running at the intended
375 * frequency. Fortunately, the read should rarely fail!
377 val
= intel_uncore_read_fw(uncore
, GEN6_RPSTAT1
);
379 val
= intel_rps_get_cagf(rps
, val
);
383 add_sample_mult(&pmu
->sample
[__I915_SAMPLE_FREQ_ACT
],
384 intel_gpu_freq(rps
, val
), period_ns
/ 1000);
387 if (pmu
->enable
& config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY
)) {
388 add_sample_mult(&pmu
->sample
[__I915_SAMPLE_FREQ_REQ
],
389 intel_gpu_freq(rps
, rps
->cur_freq
),
393 intel_gt_pm_put_async(gt
);
396 static enum hrtimer_restart
i915_sample(struct hrtimer
*hrtimer
)
398 struct drm_i915_private
*i915
=
399 container_of(hrtimer
, struct drm_i915_private
, pmu
.timer
);
400 struct i915_pmu
*pmu
= &i915
->pmu
;
401 struct intel_gt
*gt
= &i915
->gt
;
402 unsigned int period_ns
;
405 if (!READ_ONCE(pmu
->timer_enabled
))
406 return HRTIMER_NORESTART
;
409 period_ns
= ktime_to_ns(ktime_sub(now
, pmu
->timer_last
));
410 pmu
->timer_last
= now
;
413 * Strictly speaking the passed in period may not be 100% accurate for
414 * all internal calculation, since some amount of time can be spent on
415 * grabbing the forcewake. However the potential error from timer call-
416 * back delay greatly dominates this so we keep it simple.
418 engines_sample(gt
, period_ns
);
419 frequency_sample(gt
, period_ns
);
421 hrtimer_forward(hrtimer
, now
, ns_to_ktime(PERIOD
));
423 return HRTIMER_RESTART
;
426 static void i915_pmu_event_destroy(struct perf_event
*event
)
428 struct drm_i915_private
*i915
=
429 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
431 drm_WARN_ON(&i915
->drm
, event
->parent
);
433 drm_dev_put(&i915
->drm
);
437 engine_event_status(struct intel_engine_cs
*engine
,
438 enum drm_i915_pmu_engine_sample sample
)
441 case I915_SAMPLE_BUSY
:
442 case I915_SAMPLE_WAIT
:
444 case I915_SAMPLE_SEMA
:
445 if (INTEL_GEN(engine
->i915
) < 6)
456 config_status(struct drm_i915_private
*i915
, u64 config
)
459 case I915_PMU_ACTUAL_FREQUENCY
:
460 if (IS_VALLEYVIEW(i915
) || IS_CHERRYVIEW(i915
))
461 /* Requires a mutex for sampling! */
464 case I915_PMU_REQUESTED_FREQUENCY
:
465 if (INTEL_GEN(i915
) < 6)
468 case I915_PMU_INTERRUPTS
:
470 case I915_PMU_RC6_RESIDENCY
:
481 static int engine_event_init(struct perf_event
*event
)
483 struct drm_i915_private
*i915
=
484 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
485 struct intel_engine_cs
*engine
;
487 engine
= intel_engine_lookup_user(i915
, engine_event_class(event
),
488 engine_event_instance(event
));
492 return engine_event_status(engine
, engine_event_sample(event
));
495 static int i915_pmu_event_init(struct perf_event
*event
)
497 struct drm_i915_private
*i915
=
498 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
499 struct i915_pmu
*pmu
= &i915
->pmu
;
505 if (event
->attr
.type
!= event
->pmu
->type
)
508 /* unsupported modes and filters */
509 if (event
->attr
.sample_period
) /* no sampling */
512 if (has_branch_stack(event
))
518 /* only allow running on one cpu at a time */
519 if (!cpumask_test_cpu(event
->cpu
, &i915_pmu_cpumask
))
522 if (is_engine_event(event
))
523 ret
= engine_event_init(event
);
525 ret
= config_status(i915
, event
->attr
.config
);
529 if (!event
->parent
) {
530 drm_dev_get(&i915
->drm
);
531 event
->destroy
= i915_pmu_event_destroy
;
537 static u64
__i915_pmu_event_read(struct perf_event
*event
)
539 struct drm_i915_private
*i915
=
540 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
541 struct i915_pmu
*pmu
= &i915
->pmu
;
544 if (is_engine_event(event
)) {
545 u8 sample
= engine_event_sample(event
);
546 struct intel_engine_cs
*engine
;
548 engine
= intel_engine_lookup_user(i915
,
549 engine_event_class(event
),
550 engine_event_instance(event
));
552 if (drm_WARN_ON_ONCE(&i915
->drm
, !engine
)) {
554 } else if (sample
== I915_SAMPLE_BUSY
&&
555 intel_engine_supports_stats(engine
)) {
558 val
= ktime_to_ns(intel_engine_get_busy_time(engine
,
561 val
= engine
->pmu
.sample
[sample
].cur
;
564 switch (event
->attr
.config
) {
565 case I915_PMU_ACTUAL_FREQUENCY
:
567 div_u64(pmu
->sample
[__I915_SAMPLE_FREQ_ACT
].cur
,
568 USEC_PER_SEC
/* to MHz */);
570 case I915_PMU_REQUESTED_FREQUENCY
:
572 div_u64(pmu
->sample
[__I915_SAMPLE_FREQ_REQ
].cur
,
573 USEC_PER_SEC
/* to MHz */);
575 case I915_PMU_INTERRUPTS
:
576 val
= READ_ONCE(pmu
->irq_count
);
578 case I915_PMU_RC6_RESIDENCY
:
579 val
= get_rc6(&i915
->gt
);
587 static void i915_pmu_event_read(struct perf_event
*event
)
589 struct drm_i915_private
*i915
=
590 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
591 struct hw_perf_event
*hwc
= &event
->hw
;
592 struct i915_pmu
*pmu
= &i915
->pmu
;
596 event
->hw
.state
= PERF_HES_STOPPED
;
600 prev
= local64_read(&hwc
->prev_count
);
601 new = __i915_pmu_event_read(event
);
603 if (local64_cmpxchg(&hwc
->prev_count
, prev
, new) != prev
)
606 local64_add(new - prev
, &event
->count
);
609 static void i915_pmu_enable(struct perf_event
*event
)
611 struct drm_i915_private
*i915
=
612 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
613 unsigned int bit
= event_enabled_bit(event
);
614 struct i915_pmu
*pmu
= &i915
->pmu
;
615 intel_wakeref_t wakeref
;
618 wakeref
= intel_runtime_pm_get(&i915
->runtime_pm
);
619 spin_lock_irqsave(&pmu
->lock
, flags
);
622 * Update the bitmask of enabled events and increment
623 * the event reference counter.
625 BUILD_BUG_ON(ARRAY_SIZE(pmu
->enable_count
) != I915_PMU_MASK_BITS
);
626 GEM_BUG_ON(bit
>= ARRAY_SIZE(pmu
->enable_count
));
627 GEM_BUG_ON(pmu
->enable_count
[bit
] == ~0);
629 if (pmu
->enable_count
[bit
] == 0 &&
630 config_enabled_mask(I915_PMU_RC6_RESIDENCY
) & BIT_ULL(bit
)) {
631 pmu
->sample
[__I915_SAMPLE_RC6_LAST_REPORTED
].cur
= 0;
632 pmu
->sample
[__I915_SAMPLE_RC6
].cur
= __get_rc6(&i915
->gt
);
633 pmu
->sleep_last
= ktime_get();
636 pmu
->enable
|= BIT_ULL(bit
);
637 pmu
->enable_count
[bit
]++;
640 * Start the sampling timer if needed and not already enabled.
642 __i915_pmu_maybe_start_timer(pmu
);
645 * For per-engine events the bitmask and reference counting
646 * is stored per engine.
648 if (is_engine_event(event
)) {
649 u8 sample
= engine_event_sample(event
);
650 struct intel_engine_cs
*engine
;
652 engine
= intel_engine_lookup_user(i915
,
653 engine_event_class(event
),
654 engine_event_instance(event
));
656 BUILD_BUG_ON(ARRAY_SIZE(engine
->pmu
.enable_count
) !=
657 I915_ENGINE_SAMPLE_COUNT
);
658 BUILD_BUG_ON(ARRAY_SIZE(engine
->pmu
.sample
) !=
659 I915_ENGINE_SAMPLE_COUNT
);
660 GEM_BUG_ON(sample
>= ARRAY_SIZE(engine
->pmu
.enable_count
));
661 GEM_BUG_ON(sample
>= ARRAY_SIZE(engine
->pmu
.sample
));
662 GEM_BUG_ON(engine
->pmu
.enable_count
[sample
] == ~0);
664 engine
->pmu
.enable
|= BIT(sample
);
665 engine
->pmu
.enable_count
[sample
]++;
668 spin_unlock_irqrestore(&pmu
->lock
, flags
);
671 * Store the current counter value so we can report the correct delta
672 * for all listeners. Even when the event was already enabled and has
673 * an existing non-zero value.
675 local64_set(&event
->hw
.prev_count
, __i915_pmu_event_read(event
));
677 intel_runtime_pm_put(&i915
->runtime_pm
, wakeref
);
680 static void i915_pmu_disable(struct perf_event
*event
)
682 struct drm_i915_private
*i915
=
683 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
684 unsigned int bit
= event_enabled_bit(event
);
685 struct i915_pmu
*pmu
= &i915
->pmu
;
688 spin_lock_irqsave(&pmu
->lock
, flags
);
690 if (is_engine_event(event
)) {
691 u8 sample
= engine_event_sample(event
);
692 struct intel_engine_cs
*engine
;
694 engine
= intel_engine_lookup_user(i915
,
695 engine_event_class(event
),
696 engine_event_instance(event
));
698 GEM_BUG_ON(sample
>= ARRAY_SIZE(engine
->pmu
.enable_count
));
699 GEM_BUG_ON(sample
>= ARRAY_SIZE(engine
->pmu
.sample
));
700 GEM_BUG_ON(engine
->pmu
.enable_count
[sample
] == 0);
703 * Decrement the reference count and clear the enabled
704 * bitmask when the last listener on an event goes away.
706 if (--engine
->pmu
.enable_count
[sample
] == 0)
707 engine
->pmu
.enable
&= ~BIT(sample
);
710 GEM_BUG_ON(bit
>= ARRAY_SIZE(pmu
->enable_count
));
711 GEM_BUG_ON(pmu
->enable_count
[bit
] == 0);
713 * Decrement the reference count and clear the enabled
714 * bitmask when the last listener on an event goes away.
716 if (--pmu
->enable_count
[bit
] == 0) {
717 pmu
->enable
&= ~BIT_ULL(bit
);
718 pmu
->timer_enabled
&= pmu_needs_timer(pmu
, true);
721 spin_unlock_irqrestore(&pmu
->lock
, flags
);
724 static void i915_pmu_event_start(struct perf_event
*event
, int flags
)
726 struct drm_i915_private
*i915
=
727 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
728 struct i915_pmu
*pmu
= &i915
->pmu
;
733 i915_pmu_enable(event
);
737 static void i915_pmu_event_stop(struct perf_event
*event
, int flags
)
739 if (flags
& PERF_EF_UPDATE
)
740 i915_pmu_event_read(event
);
741 i915_pmu_disable(event
);
742 event
->hw
.state
= PERF_HES_STOPPED
;
745 static int i915_pmu_event_add(struct perf_event
*event
, int flags
)
747 struct drm_i915_private
*i915
=
748 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
749 struct i915_pmu
*pmu
= &i915
->pmu
;
754 if (flags
& PERF_EF_START
)
755 i915_pmu_event_start(event
, flags
);
760 static void i915_pmu_event_del(struct perf_event
*event
, int flags
)
762 i915_pmu_event_stop(event
, PERF_EF_UPDATE
);
765 static int i915_pmu_event_event_idx(struct perf_event
*event
)
770 struct i915_str_attribute
{
771 struct device_attribute attr
;
775 static ssize_t
i915_pmu_format_show(struct device
*dev
,
776 struct device_attribute
*attr
, char *buf
)
778 struct i915_str_attribute
*eattr
;
780 eattr
= container_of(attr
, struct i915_str_attribute
, attr
);
781 return sprintf(buf
, "%s\n", eattr
->str
);
784 #define I915_PMU_FORMAT_ATTR(_name, _config) \
785 (&((struct i915_str_attribute[]) { \
786 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
790 static struct attribute
*i915_pmu_format_attrs
[] = {
791 I915_PMU_FORMAT_ATTR(i915_eventid
, "config:0-20"),
795 static const struct attribute_group i915_pmu_format_attr_group
= {
797 .attrs
= i915_pmu_format_attrs
,
800 struct i915_ext_attribute
{
801 struct device_attribute attr
;
805 static ssize_t
i915_pmu_event_show(struct device
*dev
,
806 struct device_attribute
*attr
, char *buf
)
808 struct i915_ext_attribute
*eattr
;
810 eattr
= container_of(attr
, struct i915_ext_attribute
, attr
);
811 return sprintf(buf
, "config=0x%lx\n", eattr
->val
);
815 i915_pmu_get_attr_cpumask(struct device
*dev
,
816 struct device_attribute
*attr
,
819 return cpumap_print_to_pagebuf(true, buf
, &i915_pmu_cpumask
);
822 static DEVICE_ATTR(cpumask
, 0444, i915_pmu_get_attr_cpumask
, NULL
);
824 static struct attribute
*i915_cpumask_attrs
[] = {
825 &dev_attr_cpumask
.attr
,
829 static const struct attribute_group i915_pmu_cpumask_attr_group
= {
830 .attrs
= i915_cpumask_attrs
,
833 #define __event(__config, __name, __unit) \
835 .config = (__config), \
840 #define __engine_event(__sample, __name) \
842 .sample = (__sample), \
846 static struct i915_ext_attribute
*
847 add_i915_attr(struct i915_ext_attribute
*attr
, const char *name
, u64 config
)
849 sysfs_attr_init(&attr
->attr
.attr
);
850 attr
->attr
.attr
.name
= name
;
851 attr
->attr
.attr
.mode
= 0444;
852 attr
->attr
.show
= i915_pmu_event_show
;
858 static struct perf_pmu_events_attr
*
859 add_pmu_attr(struct perf_pmu_events_attr
*attr
, const char *name
,
862 sysfs_attr_init(&attr
->attr
.attr
);
863 attr
->attr
.attr
.name
= name
;
864 attr
->attr
.attr
.mode
= 0444;
865 attr
->attr
.show
= perf_event_sysfs_show
;
866 attr
->event_str
= str
;
871 static struct attribute
**
872 create_event_attributes(struct i915_pmu
*pmu
)
874 struct drm_i915_private
*i915
= container_of(pmu
, typeof(*i915
), pmu
);
875 static const struct {
880 __event(I915_PMU_ACTUAL_FREQUENCY
, "actual-frequency", "M"),
881 __event(I915_PMU_REQUESTED_FREQUENCY
, "requested-frequency", "M"),
882 __event(I915_PMU_INTERRUPTS
, "interrupts", NULL
),
883 __event(I915_PMU_RC6_RESIDENCY
, "rc6-residency", "ns"),
885 static const struct {
886 enum drm_i915_pmu_engine_sample sample
;
888 } engine_events
[] = {
889 __engine_event(I915_SAMPLE_BUSY
, "busy"),
890 __engine_event(I915_SAMPLE_SEMA
, "sema"),
891 __engine_event(I915_SAMPLE_WAIT
, "wait"),
893 unsigned int count
= 0;
894 struct perf_pmu_events_attr
*pmu_attr
= NULL
, *pmu_iter
;
895 struct i915_ext_attribute
*i915_attr
= NULL
, *i915_iter
;
896 struct attribute
**attr
= NULL
, **attr_iter
;
897 struct intel_engine_cs
*engine
;
900 /* Count how many counters we will be exposing. */
901 for (i
= 0; i
< ARRAY_SIZE(events
); i
++) {
902 if (!config_status(i915
, events
[i
].config
))
906 for_each_uabi_engine(engine
, i915
) {
907 for (i
= 0; i
< ARRAY_SIZE(engine_events
); i
++) {
908 if (!engine_event_status(engine
,
909 engine_events
[i
].sample
))
914 /* Allocate attribute objects and table. */
915 i915_attr
= kcalloc(count
, sizeof(*i915_attr
), GFP_KERNEL
);
919 pmu_attr
= kcalloc(count
, sizeof(*pmu_attr
), GFP_KERNEL
);
923 /* Max one pointer of each attribute type plus a termination entry. */
924 attr
= kcalloc(count
* 2 + 1, sizeof(*attr
), GFP_KERNEL
);
928 i915_iter
= i915_attr
;
932 /* Initialize supported non-engine counters. */
933 for (i
= 0; i
< ARRAY_SIZE(events
); i
++) {
936 if (config_status(i915
, events
[i
].config
))
939 str
= kstrdup(events
[i
].name
, GFP_KERNEL
);
943 *attr_iter
++ = &i915_iter
->attr
.attr
;
944 i915_iter
= add_i915_attr(i915_iter
, str
, events
[i
].config
);
946 if (events
[i
].unit
) {
947 str
= kasprintf(GFP_KERNEL
, "%s.unit", events
[i
].name
);
951 *attr_iter
++ = &pmu_iter
->attr
.attr
;
952 pmu_iter
= add_pmu_attr(pmu_iter
, str
, events
[i
].unit
);
956 /* Initialize supported engine counters. */
957 for_each_uabi_engine(engine
, i915
) {
958 for (i
= 0; i
< ARRAY_SIZE(engine_events
); i
++) {
961 if (engine_event_status(engine
,
962 engine_events
[i
].sample
))
965 str
= kasprintf(GFP_KERNEL
, "%s-%s",
966 engine
->name
, engine_events
[i
].name
);
970 *attr_iter
++ = &i915_iter
->attr
.attr
;
972 add_i915_attr(i915_iter
, str
,
973 __I915_PMU_ENGINE(engine
->uabi_class
,
974 engine
->uabi_instance
,
975 engine_events
[i
].sample
));
977 str
= kasprintf(GFP_KERNEL
, "%s-%s.unit",
978 engine
->name
, engine_events
[i
].name
);
982 *attr_iter
++ = &pmu_iter
->attr
.attr
;
983 pmu_iter
= add_pmu_attr(pmu_iter
, str
, "ns");
987 pmu
->i915_attr
= i915_attr
;
988 pmu
->pmu_attr
= pmu_attr
;
993 for (attr_iter
= attr
; *attr_iter
; attr_iter
++)
994 kfree((*attr_iter
)->name
);
1004 static void free_event_attributes(struct i915_pmu
*pmu
)
1006 struct attribute
**attr_iter
= pmu
->events_attr_group
.attrs
;
1008 for (; *attr_iter
; attr_iter
++)
1009 kfree((*attr_iter
)->name
);
1011 kfree(pmu
->events_attr_group
.attrs
);
1012 kfree(pmu
->i915_attr
);
1013 kfree(pmu
->pmu_attr
);
1015 pmu
->events_attr_group
.attrs
= NULL
;
1016 pmu
->i915_attr
= NULL
;
1017 pmu
->pmu_attr
= NULL
;
1020 static int i915_pmu_cpu_online(unsigned int cpu
, struct hlist_node
*node
)
1022 struct i915_pmu
*pmu
= hlist_entry_safe(node
, typeof(*pmu
), cpuhp
.node
);
1024 GEM_BUG_ON(!pmu
->base
.event_init
);
1026 /* Select the first online CPU as a designated reader. */
1027 if (!cpumask_weight(&i915_pmu_cpumask
))
1028 cpumask_set_cpu(cpu
, &i915_pmu_cpumask
);
1033 static int i915_pmu_cpu_offline(unsigned int cpu
, struct hlist_node
*node
)
1035 struct i915_pmu
*pmu
= hlist_entry_safe(node
, typeof(*pmu
), cpuhp
.node
);
1036 unsigned int target
= i915_pmu_target_cpu
;
1038 GEM_BUG_ON(!pmu
->base
.event_init
);
1041 * Unregistering an instance generates a CPU offline event which we must
1042 * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask.
1047 if (cpumask_test_and_clear_cpu(cpu
, &i915_pmu_cpumask
)) {
1048 target
= cpumask_any_but(topology_sibling_cpumask(cpu
), cpu
);
1050 /* Migrate events if there is a valid target */
1051 if (target
< nr_cpu_ids
) {
1052 cpumask_set_cpu(target
, &i915_pmu_cpumask
);
1053 i915_pmu_target_cpu
= target
;
1057 if (target
< nr_cpu_ids
&& target
!= pmu
->cpuhp
.cpu
) {
1058 perf_pmu_migrate_context(&pmu
->base
, cpu
, target
);
1059 pmu
->cpuhp
.cpu
= target
;
1065 static enum cpuhp_state cpuhp_slot
= CPUHP_INVALID
;
1067 void i915_pmu_init(void)
1071 ret
= cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN
,
1072 "perf/x86/intel/i915:online",
1073 i915_pmu_cpu_online
,
1074 i915_pmu_cpu_offline
);
1076 pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n",
1082 void i915_pmu_exit(void)
1084 if (cpuhp_slot
!= CPUHP_INVALID
)
1085 cpuhp_remove_multi_state(cpuhp_slot
);
1088 static int i915_pmu_register_cpuhp_state(struct i915_pmu
*pmu
)
1090 if (cpuhp_slot
== CPUHP_INVALID
)
1093 return cpuhp_state_add_instance(cpuhp_slot
, &pmu
->cpuhp
.node
);
1096 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu
*pmu
)
1098 cpuhp_state_remove_instance(cpuhp_slot
, &pmu
->cpuhp
.node
);
1101 static bool is_igp(struct drm_i915_private
*i915
)
1103 struct pci_dev
*pdev
= i915
->drm
.pdev
;
1105 /* IGP is 0000:00:02.0 */
1106 return pci_domain_nr(pdev
->bus
) == 0 &&
1107 pdev
->bus
->number
== 0 &&
1108 PCI_SLOT(pdev
->devfn
) == 2 &&
1109 PCI_FUNC(pdev
->devfn
) == 0;
1112 void i915_pmu_register(struct drm_i915_private
*i915
)
1114 struct i915_pmu
*pmu
= &i915
->pmu
;
1115 const struct attribute_group
*attr_groups
[] = {
1116 &i915_pmu_format_attr_group
,
1117 &pmu
->events_attr_group
,
1118 &i915_pmu_cpumask_attr_group
,
1124 if (INTEL_GEN(i915
) <= 2) {
1125 drm_info(&i915
->drm
, "PMU not supported for this GPU.");
1129 spin_lock_init(&pmu
->lock
);
1130 hrtimer_init(&pmu
->timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_REL
);
1131 pmu
->timer
.function
= i915_sample
;
1132 pmu
->cpuhp
.cpu
= -1;
1134 if (!is_igp(i915
)) {
1135 pmu
->name
= kasprintf(GFP_KERNEL
,
1137 dev_name(i915
->drm
.dev
));
1139 /* tools/perf reserves colons as special. */
1140 strreplace((char *)pmu
->name
, ':', '_');
1148 pmu
->events_attr_group
.name
= "events";
1149 pmu
->events_attr_group
.attrs
= create_event_attributes(pmu
);
1150 if (!pmu
->events_attr_group
.attrs
)
1153 pmu
->base
.attr_groups
= kmemdup(attr_groups
, sizeof(attr_groups
),
1155 if (!pmu
->base
.attr_groups
)
1158 pmu
->base
.module
= THIS_MODULE
;
1159 pmu
->base
.task_ctx_nr
= perf_invalid_context
;
1160 pmu
->base
.event_init
= i915_pmu_event_init
;
1161 pmu
->base
.add
= i915_pmu_event_add
;
1162 pmu
->base
.del
= i915_pmu_event_del
;
1163 pmu
->base
.start
= i915_pmu_event_start
;
1164 pmu
->base
.stop
= i915_pmu_event_stop
;
1165 pmu
->base
.read
= i915_pmu_event_read
;
1166 pmu
->base
.event_idx
= i915_pmu_event_event_idx
;
1168 ret
= perf_pmu_register(&pmu
->base
, pmu
->name
, -1);
1172 ret
= i915_pmu_register_cpuhp_state(pmu
);
1179 perf_pmu_unregister(&pmu
->base
);
1181 kfree(pmu
->base
.attr_groups
);
1183 pmu
->base
.event_init
= NULL
;
1184 free_event_attributes(pmu
);
1189 drm_notice(&i915
->drm
, "Failed to register PMU!\n");
1192 void i915_pmu_unregister(struct drm_i915_private
*i915
)
1194 struct i915_pmu
*pmu
= &i915
->pmu
;
1196 if (!pmu
->base
.event_init
)
1200 * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu
1201 * ensures all currently executing ones will have exited before we
1202 * proceed with unregistration.
1207 hrtimer_cancel(&pmu
->timer
);
1209 i915_pmu_unregister_cpuhp_state(pmu
);
1211 perf_pmu_unregister(&pmu
->base
);
1212 pmu
->base
.event_init
= NULL
;
1213 kfree(pmu
->base
.attr_groups
);
1216 free_event_attributes(pmu
);