2 * SPDX-License-Identifier: MIT
4 * Copyright © 2017-2018 Intel Corporation
8 #include <linux/pm_runtime.h>
10 #include "gt/intel_engine.h"
11 #include "gt/intel_engine_pm.h"
12 #include "gt/intel_engine_user.h"
13 #include "gt/intel_gt_pm.h"
14 #include "gt/intel_rc6.h"
15 #include "gt/intel_rps.h"
21 /* Frequency for the sampling timer for events which need it. */
23 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
25 #define ENGINE_SAMPLE_MASK \
26 (BIT(I915_SAMPLE_BUSY) | \
27 BIT(I915_SAMPLE_WAIT) | \
28 BIT(I915_SAMPLE_SEMA))
30 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
32 static cpumask_t i915_pmu_cpumask
;
34 static u8
engine_config_sample(u64 config
)
36 return config
& I915_PMU_SAMPLE_MASK
;
39 static u8
engine_event_sample(struct perf_event
*event
)
41 return engine_config_sample(event
->attr
.config
);
44 static u8
engine_event_class(struct perf_event
*event
)
46 return (event
->attr
.config
>> I915_PMU_CLASS_SHIFT
) & 0xff;
49 static u8
engine_event_instance(struct perf_event
*event
)
51 return (event
->attr
.config
>> I915_PMU_SAMPLE_BITS
) & 0xff;
54 static bool is_engine_config(u64 config
)
56 return config
< __I915_PMU_OTHER(0);
59 static unsigned int config_enabled_bit(u64 config
)
61 if (is_engine_config(config
))
62 return engine_config_sample(config
);
64 return ENGINE_SAMPLE_BITS
+ (config
- __I915_PMU_OTHER(0));
67 static u64
config_enabled_mask(u64 config
)
69 return BIT_ULL(config_enabled_bit(config
));
72 static bool is_engine_event(struct perf_event
*event
)
74 return is_engine_config(event
->attr
.config
);
77 static unsigned int event_enabled_bit(struct perf_event
*event
)
79 return config_enabled_bit(event
->attr
.config
);
82 static bool pmu_needs_timer(struct i915_pmu
*pmu
, bool gpu_active
)
84 struct drm_i915_private
*i915
= container_of(pmu
, typeof(*i915
), pmu
);
88 * Only some counters need the sampling timer.
90 * We start with a bitmask of all currently enabled events.
95 * Mask out all the ones which do not need the timer, or in
96 * other words keep all the ones that could need the timer.
98 enable
&= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY
) |
99 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY
) |
103 * When the GPU is idle per-engine counters do not need to be
104 * running so clear those bits out.
107 enable
&= ~ENGINE_SAMPLE_MASK
;
109 * Also there is software busyness tracking available we do not
110 * need the timer for I915_SAMPLE_BUSY counter.
112 else if (i915
->caps
.scheduler
& I915_SCHEDULER_CAP_ENGINE_BUSY_STATS
)
113 enable
&= ~BIT(I915_SAMPLE_BUSY
);
116 * If some bits remain it means we need the sampling timer running.
121 static u64
__get_rc6(struct intel_gt
*gt
)
123 struct drm_i915_private
*i915
= gt
->i915
;
126 val
= intel_rc6_residency_ns(>
->rc6
,
127 IS_VALLEYVIEW(i915
) ?
132 val
+= intel_rc6_residency_ns(>
->rc6
, GEN6_GT_GFX_RC6p
);
135 val
+= intel_rc6_residency_ns(>
->rc6
, GEN6_GT_GFX_RC6pp
);
140 #if IS_ENABLED(CONFIG_PM)
142 static inline s64
ktime_since(const ktime_t kt
)
144 return ktime_to_ns(ktime_sub(ktime_get(), kt
));
147 static u64
get_rc6(struct intel_gt
*gt
)
149 struct drm_i915_private
*i915
= gt
->i915
;
150 struct i915_pmu
*pmu
= &i915
->pmu
;
155 if (intel_gt_pm_get_if_awake(gt
)) {
157 intel_gt_pm_put_async(gt
);
161 spin_lock_irqsave(&pmu
->lock
, flags
);
164 pmu
->sample
[__I915_SAMPLE_RC6
].cur
= val
;
167 * We think we are runtime suspended.
169 * Report the delta from when the device was suspended to now,
170 * on top of the last known real value, as the approximated RC6
173 val
= ktime_since(pmu
->sleep_last
);
174 val
+= pmu
->sample
[__I915_SAMPLE_RC6
].cur
;
177 if (val
< pmu
->sample
[__I915_SAMPLE_RC6_LAST_REPORTED
].cur
)
178 val
= pmu
->sample
[__I915_SAMPLE_RC6_LAST_REPORTED
].cur
;
180 pmu
->sample
[__I915_SAMPLE_RC6_LAST_REPORTED
].cur
= val
;
182 spin_unlock_irqrestore(&pmu
->lock
, flags
);
187 static void park_rc6(struct drm_i915_private
*i915
)
189 struct i915_pmu
*pmu
= &i915
->pmu
;
191 if (pmu
->enable
& config_enabled_mask(I915_PMU_RC6_RESIDENCY
))
192 pmu
->sample
[__I915_SAMPLE_RC6
].cur
= __get_rc6(&i915
->gt
);
194 pmu
->sleep_last
= ktime_get();
199 static u64
get_rc6(struct intel_gt
*gt
)
201 return __get_rc6(gt
);
204 static void park_rc6(struct drm_i915_private
*i915
) {}
208 static void __i915_pmu_maybe_start_timer(struct i915_pmu
*pmu
)
210 if (!pmu
->timer_enabled
&& pmu_needs_timer(pmu
, true)) {
211 pmu
->timer_enabled
= true;
212 pmu
->timer_last
= ktime_get();
213 hrtimer_start_range_ns(&pmu
->timer
,
214 ns_to_ktime(PERIOD
), 0,
215 HRTIMER_MODE_REL_PINNED
);
219 void i915_pmu_gt_parked(struct drm_i915_private
*i915
)
221 struct i915_pmu
*pmu
= &i915
->pmu
;
223 if (!pmu
->base
.event_init
)
226 spin_lock_irq(&pmu
->lock
);
231 * Signal sampling timer to stop if only engine events are enabled and
234 pmu
->timer_enabled
= pmu_needs_timer(pmu
, false);
236 spin_unlock_irq(&pmu
->lock
);
239 void i915_pmu_gt_unparked(struct drm_i915_private
*i915
)
241 struct i915_pmu
*pmu
= &i915
->pmu
;
243 if (!pmu
->base
.event_init
)
246 spin_lock_irq(&pmu
->lock
);
249 * Re-enable sampling timer when GPU goes active.
251 __i915_pmu_maybe_start_timer(pmu
);
253 spin_unlock_irq(&pmu
->lock
);
257 add_sample(struct i915_pmu_sample
*sample
, u32 val
)
262 static bool exclusive_mmio_access(const struct drm_i915_private
*i915
)
265 * We have to avoid concurrent mmio cache line access on gen7 or
266 * risk a machine hang. For a fun history lesson dig out the old
267 * userspace intel_gpu_top and run it on Ivybridge or Haswell!
269 return IS_GEN(i915
, 7);
273 engines_sample(struct intel_gt
*gt
, unsigned int period_ns
)
275 struct drm_i915_private
*i915
= gt
->i915
;
276 struct intel_engine_cs
*engine
;
277 enum intel_engine_id id
;
279 if ((i915
->pmu
.enable
& ENGINE_SAMPLE_MASK
) == 0)
282 if (!intel_gt_pm_is_awake(gt
))
285 for_each_engine(engine
, gt
, id
) {
286 struct intel_engine_pmu
*pmu
= &engine
->pmu
;
287 spinlock_t
*mmio_lock
;
292 if (!intel_engine_pm_get_if_awake(engine
))
296 if (exclusive_mmio_access(i915
))
297 mmio_lock
= &engine
->uncore
->lock
;
299 if (unlikely(mmio_lock
))
300 spin_lock_irqsave(mmio_lock
, flags
);
302 val
= ENGINE_READ_FW(engine
, RING_CTL
);
303 if (val
== 0) /* powerwell off => engine idle */
307 add_sample(&pmu
->sample
[I915_SAMPLE_WAIT
], period_ns
);
308 if (val
& RING_WAIT_SEMAPHORE
)
309 add_sample(&pmu
->sample
[I915_SAMPLE_SEMA
], period_ns
);
311 /* No need to sample when busy stats are supported. */
312 if (intel_engine_supports_stats(engine
))
316 * While waiting on a semaphore or event, MI_MODE reports the
317 * ring as idle. However, previously using the seqno, and with
318 * execlists sampling, we account for the ring waiting as the
319 * engine being busy. Therefore, we record the sample as being
320 * busy if either waiting or !idle.
322 busy
= val
& (RING_WAIT_SEMAPHORE
| RING_WAIT
);
324 val
= ENGINE_READ_FW(engine
, RING_MI_MODE
);
325 busy
= !(val
& MODE_IDLE
);
328 add_sample(&pmu
->sample
[I915_SAMPLE_BUSY
], period_ns
);
331 if (unlikely(mmio_lock
))
332 spin_unlock_irqrestore(mmio_lock
, flags
);
333 intel_engine_pm_put_async(engine
);
338 add_sample_mult(struct i915_pmu_sample
*sample
, u32 val
, u32 mul
)
340 sample
->cur
+= mul_u32_u32(val
, mul
);
343 static bool frequency_sampling_enabled(struct i915_pmu
*pmu
)
346 (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY
) |
347 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY
));
351 frequency_sample(struct intel_gt
*gt
, unsigned int period_ns
)
353 struct drm_i915_private
*i915
= gt
->i915
;
354 struct intel_uncore
*uncore
= gt
->uncore
;
355 struct i915_pmu
*pmu
= &i915
->pmu
;
356 struct intel_rps
*rps
= >
->rps
;
358 if (!frequency_sampling_enabled(pmu
))
361 /* Report 0/0 (actual/requested) frequency while parked. */
362 if (!intel_gt_pm_get_if_awake(gt
))
365 if (pmu
->enable
& config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY
)) {
369 * We take a quick peek here without using forcewake
370 * so that we don't perturb the system under observation
371 * (forcewake => !rc6 => increased power use). We expect
372 * that if the read fails because it is outside of the
373 * mmio power well, then it will return 0 -- in which
374 * case we assume the system is running at the intended
375 * frequency. Fortunately, the read should rarely fail!
377 val
= intel_uncore_read_fw(uncore
, GEN6_RPSTAT1
);
379 val
= intel_rps_get_cagf(rps
, val
);
383 add_sample_mult(&pmu
->sample
[__I915_SAMPLE_FREQ_ACT
],
384 intel_gpu_freq(rps
, val
), period_ns
/ 1000);
387 if (pmu
->enable
& config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY
)) {
388 add_sample_mult(&pmu
->sample
[__I915_SAMPLE_FREQ_REQ
],
389 intel_gpu_freq(rps
, rps
->cur_freq
),
393 intel_gt_pm_put_async(gt
);
396 static enum hrtimer_restart
i915_sample(struct hrtimer
*hrtimer
)
398 struct drm_i915_private
*i915
=
399 container_of(hrtimer
, struct drm_i915_private
, pmu
.timer
);
400 struct i915_pmu
*pmu
= &i915
->pmu
;
401 struct intel_gt
*gt
= &i915
->gt
;
402 unsigned int period_ns
;
405 if (!READ_ONCE(pmu
->timer_enabled
))
406 return HRTIMER_NORESTART
;
409 period_ns
= ktime_to_ns(ktime_sub(now
, pmu
->timer_last
));
410 pmu
->timer_last
= now
;
413 * Strictly speaking the passed in period may not be 100% accurate for
414 * all internal calculation, since some amount of time can be spent on
415 * grabbing the forcewake. However the potential error from timer call-
416 * back delay greatly dominates this so we keep it simple.
418 engines_sample(gt
, period_ns
);
419 frequency_sample(gt
, period_ns
);
421 hrtimer_forward(hrtimer
, now
, ns_to_ktime(PERIOD
));
423 return HRTIMER_RESTART
;
426 static u64
count_interrupts(struct drm_i915_private
*i915
)
428 /* open-coded kstat_irqs() */
429 struct irq_desc
*desc
= irq_to_desc(i915
->drm
.pdev
->irq
);
433 if (!desc
|| !desc
->kstat_irqs
)
436 for_each_possible_cpu(cpu
)
437 sum
+= *per_cpu_ptr(desc
->kstat_irqs
, cpu
);
442 static void engine_event_destroy(struct perf_event
*event
)
444 struct drm_i915_private
*i915
=
445 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
446 struct intel_engine_cs
*engine
;
448 engine
= intel_engine_lookup_user(i915
,
449 engine_event_class(event
),
450 engine_event_instance(event
));
451 if (WARN_ON_ONCE(!engine
))
454 if (engine_event_sample(event
) == I915_SAMPLE_BUSY
&&
455 intel_engine_supports_stats(engine
))
456 intel_disable_engine_stats(engine
);
459 static void i915_pmu_event_destroy(struct perf_event
*event
)
461 WARN_ON(event
->parent
);
463 if (is_engine_event(event
))
464 engine_event_destroy(event
);
468 engine_event_status(struct intel_engine_cs
*engine
,
469 enum drm_i915_pmu_engine_sample sample
)
472 case I915_SAMPLE_BUSY
:
473 case I915_SAMPLE_WAIT
:
475 case I915_SAMPLE_SEMA
:
476 if (INTEL_GEN(engine
->i915
) < 6)
487 config_status(struct drm_i915_private
*i915
, u64 config
)
490 case I915_PMU_ACTUAL_FREQUENCY
:
491 if (IS_VALLEYVIEW(i915
) || IS_CHERRYVIEW(i915
))
492 /* Requires a mutex for sampling! */
495 case I915_PMU_REQUESTED_FREQUENCY
:
496 if (INTEL_GEN(i915
) < 6)
499 case I915_PMU_INTERRUPTS
:
501 case I915_PMU_RC6_RESIDENCY
:
512 static int engine_event_init(struct perf_event
*event
)
514 struct drm_i915_private
*i915
=
515 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
516 struct intel_engine_cs
*engine
;
520 engine
= intel_engine_lookup_user(i915
, engine_event_class(event
),
521 engine_event_instance(event
));
525 sample
= engine_event_sample(event
);
526 ret
= engine_event_status(engine
, sample
);
530 if (sample
== I915_SAMPLE_BUSY
&& intel_engine_supports_stats(engine
))
531 ret
= intel_enable_engine_stats(engine
);
536 static int i915_pmu_event_init(struct perf_event
*event
)
538 struct drm_i915_private
*i915
=
539 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
542 if (event
->attr
.type
!= event
->pmu
->type
)
545 /* unsupported modes and filters */
546 if (event
->attr
.sample_period
) /* no sampling */
549 if (has_branch_stack(event
))
555 /* only allow running on one cpu at a time */
556 if (!cpumask_test_cpu(event
->cpu
, &i915_pmu_cpumask
))
559 if (is_engine_event(event
))
560 ret
= engine_event_init(event
);
562 ret
= config_status(i915
, event
->attr
.config
);
567 event
->destroy
= i915_pmu_event_destroy
;
572 static u64
__i915_pmu_event_read(struct perf_event
*event
)
574 struct drm_i915_private
*i915
=
575 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
576 struct i915_pmu
*pmu
= &i915
->pmu
;
579 if (is_engine_event(event
)) {
580 u8 sample
= engine_event_sample(event
);
581 struct intel_engine_cs
*engine
;
583 engine
= intel_engine_lookup_user(i915
,
584 engine_event_class(event
),
585 engine_event_instance(event
));
587 if (WARN_ON_ONCE(!engine
)) {
589 } else if (sample
== I915_SAMPLE_BUSY
&&
590 intel_engine_supports_stats(engine
)) {
591 val
= ktime_to_ns(intel_engine_get_busy_time(engine
));
593 val
= engine
->pmu
.sample
[sample
].cur
;
596 switch (event
->attr
.config
) {
597 case I915_PMU_ACTUAL_FREQUENCY
:
599 div_u64(pmu
->sample
[__I915_SAMPLE_FREQ_ACT
].cur
,
600 USEC_PER_SEC
/* to MHz */);
602 case I915_PMU_REQUESTED_FREQUENCY
:
604 div_u64(pmu
->sample
[__I915_SAMPLE_FREQ_REQ
].cur
,
605 USEC_PER_SEC
/* to MHz */);
607 case I915_PMU_INTERRUPTS
:
608 val
= count_interrupts(i915
);
610 case I915_PMU_RC6_RESIDENCY
:
611 val
= get_rc6(&i915
->gt
);
619 static void i915_pmu_event_read(struct perf_event
*event
)
621 struct hw_perf_event
*hwc
= &event
->hw
;
625 prev
= local64_read(&hwc
->prev_count
);
626 new = __i915_pmu_event_read(event
);
628 if (local64_cmpxchg(&hwc
->prev_count
, prev
, new) != prev
)
631 local64_add(new - prev
, &event
->count
);
634 static void i915_pmu_enable(struct perf_event
*event
)
636 struct drm_i915_private
*i915
=
637 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
638 unsigned int bit
= event_enabled_bit(event
);
639 struct i915_pmu
*pmu
= &i915
->pmu
;
642 spin_lock_irqsave(&pmu
->lock
, flags
);
645 * Update the bitmask of enabled events and increment
646 * the event reference counter.
648 BUILD_BUG_ON(ARRAY_SIZE(pmu
->enable_count
) != I915_PMU_MASK_BITS
);
649 GEM_BUG_ON(bit
>= ARRAY_SIZE(pmu
->enable_count
));
650 GEM_BUG_ON(pmu
->enable_count
[bit
] == ~0);
651 pmu
->enable
|= BIT_ULL(bit
);
652 pmu
->enable_count
[bit
]++;
655 * Start the sampling timer if needed and not already enabled.
657 __i915_pmu_maybe_start_timer(pmu
);
660 * For per-engine events the bitmask and reference counting
661 * is stored per engine.
663 if (is_engine_event(event
)) {
664 u8 sample
= engine_event_sample(event
);
665 struct intel_engine_cs
*engine
;
667 engine
= intel_engine_lookup_user(i915
,
668 engine_event_class(event
),
669 engine_event_instance(event
));
671 BUILD_BUG_ON(ARRAY_SIZE(engine
->pmu
.enable_count
) !=
672 I915_ENGINE_SAMPLE_COUNT
);
673 BUILD_BUG_ON(ARRAY_SIZE(engine
->pmu
.sample
) !=
674 I915_ENGINE_SAMPLE_COUNT
);
675 GEM_BUG_ON(sample
>= ARRAY_SIZE(engine
->pmu
.enable_count
));
676 GEM_BUG_ON(sample
>= ARRAY_SIZE(engine
->pmu
.sample
));
677 GEM_BUG_ON(engine
->pmu
.enable_count
[sample
] == ~0);
679 engine
->pmu
.enable
|= BIT(sample
);
680 engine
->pmu
.enable_count
[sample
]++;
683 spin_unlock_irqrestore(&pmu
->lock
, flags
);
686 * Store the current counter value so we can report the correct delta
687 * for all listeners. Even when the event was already enabled and has
688 * an existing non-zero value.
690 local64_set(&event
->hw
.prev_count
, __i915_pmu_event_read(event
));
693 static void i915_pmu_disable(struct perf_event
*event
)
695 struct drm_i915_private
*i915
=
696 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
697 unsigned int bit
= event_enabled_bit(event
);
698 struct i915_pmu
*pmu
= &i915
->pmu
;
701 spin_lock_irqsave(&pmu
->lock
, flags
);
703 if (is_engine_event(event
)) {
704 u8 sample
= engine_event_sample(event
);
705 struct intel_engine_cs
*engine
;
707 engine
= intel_engine_lookup_user(i915
,
708 engine_event_class(event
),
709 engine_event_instance(event
));
711 GEM_BUG_ON(sample
>= ARRAY_SIZE(engine
->pmu
.enable_count
));
712 GEM_BUG_ON(sample
>= ARRAY_SIZE(engine
->pmu
.sample
));
713 GEM_BUG_ON(engine
->pmu
.enable_count
[sample
] == 0);
716 * Decrement the reference count and clear the enabled
717 * bitmask when the last listener on an event goes away.
719 if (--engine
->pmu
.enable_count
[sample
] == 0)
720 engine
->pmu
.enable
&= ~BIT(sample
);
723 GEM_BUG_ON(bit
>= ARRAY_SIZE(pmu
->enable_count
));
724 GEM_BUG_ON(pmu
->enable_count
[bit
] == 0);
726 * Decrement the reference count and clear the enabled
727 * bitmask when the last listener on an event goes away.
729 if (--pmu
->enable_count
[bit
] == 0) {
730 pmu
->enable
&= ~BIT_ULL(bit
);
731 pmu
->timer_enabled
&= pmu_needs_timer(pmu
, true);
734 spin_unlock_irqrestore(&pmu
->lock
, flags
);
737 static void i915_pmu_event_start(struct perf_event
*event
, int flags
)
739 i915_pmu_enable(event
);
743 static void i915_pmu_event_stop(struct perf_event
*event
, int flags
)
745 if (flags
& PERF_EF_UPDATE
)
746 i915_pmu_event_read(event
);
747 i915_pmu_disable(event
);
748 event
->hw
.state
= PERF_HES_STOPPED
;
751 static int i915_pmu_event_add(struct perf_event
*event
, int flags
)
753 if (flags
& PERF_EF_START
)
754 i915_pmu_event_start(event
, flags
);
759 static void i915_pmu_event_del(struct perf_event
*event
, int flags
)
761 i915_pmu_event_stop(event
, PERF_EF_UPDATE
);
764 static int i915_pmu_event_event_idx(struct perf_event
*event
)
769 struct i915_str_attribute
{
770 struct device_attribute attr
;
774 static ssize_t
i915_pmu_format_show(struct device
*dev
,
775 struct device_attribute
*attr
, char *buf
)
777 struct i915_str_attribute
*eattr
;
779 eattr
= container_of(attr
, struct i915_str_attribute
, attr
);
780 return sprintf(buf
, "%s\n", eattr
->str
);
783 #define I915_PMU_FORMAT_ATTR(_name, _config) \
784 (&((struct i915_str_attribute[]) { \
785 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
789 static struct attribute
*i915_pmu_format_attrs
[] = {
790 I915_PMU_FORMAT_ATTR(i915_eventid
, "config:0-20"),
794 static const struct attribute_group i915_pmu_format_attr_group
= {
796 .attrs
= i915_pmu_format_attrs
,
799 struct i915_ext_attribute
{
800 struct device_attribute attr
;
804 static ssize_t
i915_pmu_event_show(struct device
*dev
,
805 struct device_attribute
*attr
, char *buf
)
807 struct i915_ext_attribute
*eattr
;
809 eattr
= container_of(attr
, struct i915_ext_attribute
, attr
);
810 return sprintf(buf
, "config=0x%lx\n", eattr
->val
);
813 static struct attribute_group i915_pmu_events_attr_group
= {
815 /* Patch in attrs at runtime. */
819 i915_pmu_get_attr_cpumask(struct device
*dev
,
820 struct device_attribute
*attr
,
823 return cpumap_print_to_pagebuf(true, buf
, &i915_pmu_cpumask
);
826 static DEVICE_ATTR(cpumask
, 0444, i915_pmu_get_attr_cpumask
, NULL
);
828 static struct attribute
*i915_cpumask_attrs
[] = {
829 &dev_attr_cpumask
.attr
,
833 static const struct attribute_group i915_pmu_cpumask_attr_group
= {
834 .attrs
= i915_cpumask_attrs
,
837 static const struct attribute_group
*i915_pmu_attr_groups
[] = {
838 &i915_pmu_format_attr_group
,
839 &i915_pmu_events_attr_group
,
840 &i915_pmu_cpumask_attr_group
,
844 #define __event(__config, __name, __unit) \
846 .config = (__config), \
851 #define __engine_event(__sample, __name) \
853 .sample = (__sample), \
857 static struct i915_ext_attribute
*
858 add_i915_attr(struct i915_ext_attribute
*attr
, const char *name
, u64 config
)
860 sysfs_attr_init(&attr
->attr
.attr
);
861 attr
->attr
.attr
.name
= name
;
862 attr
->attr
.attr
.mode
= 0444;
863 attr
->attr
.show
= i915_pmu_event_show
;
869 static struct perf_pmu_events_attr
*
870 add_pmu_attr(struct perf_pmu_events_attr
*attr
, const char *name
,
873 sysfs_attr_init(&attr
->attr
.attr
);
874 attr
->attr
.attr
.name
= name
;
875 attr
->attr
.attr
.mode
= 0444;
876 attr
->attr
.show
= perf_event_sysfs_show
;
877 attr
->event_str
= str
;
882 static struct attribute
**
883 create_event_attributes(struct i915_pmu
*pmu
)
885 struct drm_i915_private
*i915
= container_of(pmu
, typeof(*i915
), pmu
);
886 static const struct {
891 __event(I915_PMU_ACTUAL_FREQUENCY
, "actual-frequency", "M"),
892 __event(I915_PMU_REQUESTED_FREQUENCY
, "requested-frequency", "M"),
893 __event(I915_PMU_INTERRUPTS
, "interrupts", NULL
),
894 __event(I915_PMU_RC6_RESIDENCY
, "rc6-residency", "ns"),
896 static const struct {
897 enum drm_i915_pmu_engine_sample sample
;
899 } engine_events
[] = {
900 __engine_event(I915_SAMPLE_BUSY
, "busy"),
901 __engine_event(I915_SAMPLE_SEMA
, "sema"),
902 __engine_event(I915_SAMPLE_WAIT
, "wait"),
904 unsigned int count
= 0;
905 struct perf_pmu_events_attr
*pmu_attr
= NULL
, *pmu_iter
;
906 struct i915_ext_attribute
*i915_attr
= NULL
, *i915_iter
;
907 struct attribute
**attr
= NULL
, **attr_iter
;
908 struct intel_engine_cs
*engine
;
911 /* Count how many counters we will be exposing. */
912 for (i
= 0; i
< ARRAY_SIZE(events
); i
++) {
913 if (!config_status(i915
, events
[i
].config
))
917 for_each_uabi_engine(engine
, i915
) {
918 for (i
= 0; i
< ARRAY_SIZE(engine_events
); i
++) {
919 if (!engine_event_status(engine
,
920 engine_events
[i
].sample
))
925 /* Allocate attribute objects and table. */
926 i915_attr
= kcalloc(count
, sizeof(*i915_attr
), GFP_KERNEL
);
930 pmu_attr
= kcalloc(count
, sizeof(*pmu_attr
), GFP_KERNEL
);
934 /* Max one pointer of each attribute type plus a termination entry. */
935 attr
= kcalloc(count
* 2 + 1, sizeof(*attr
), GFP_KERNEL
);
939 i915_iter
= i915_attr
;
943 /* Initialize supported non-engine counters. */
944 for (i
= 0; i
< ARRAY_SIZE(events
); i
++) {
947 if (config_status(i915
, events
[i
].config
))
950 str
= kstrdup(events
[i
].name
, GFP_KERNEL
);
954 *attr_iter
++ = &i915_iter
->attr
.attr
;
955 i915_iter
= add_i915_attr(i915_iter
, str
, events
[i
].config
);
957 if (events
[i
].unit
) {
958 str
= kasprintf(GFP_KERNEL
, "%s.unit", events
[i
].name
);
962 *attr_iter
++ = &pmu_iter
->attr
.attr
;
963 pmu_iter
= add_pmu_attr(pmu_iter
, str
, events
[i
].unit
);
967 /* Initialize supported engine counters. */
968 for_each_uabi_engine(engine
, i915
) {
969 for (i
= 0; i
< ARRAY_SIZE(engine_events
); i
++) {
972 if (engine_event_status(engine
,
973 engine_events
[i
].sample
))
976 str
= kasprintf(GFP_KERNEL
, "%s-%s",
977 engine
->name
, engine_events
[i
].name
);
981 *attr_iter
++ = &i915_iter
->attr
.attr
;
983 add_i915_attr(i915_iter
, str
,
984 __I915_PMU_ENGINE(engine
->uabi_class
,
985 engine
->uabi_instance
,
986 engine_events
[i
].sample
));
988 str
= kasprintf(GFP_KERNEL
, "%s-%s.unit",
989 engine
->name
, engine_events
[i
].name
);
993 *attr_iter
++ = &pmu_iter
->attr
.attr
;
994 pmu_iter
= add_pmu_attr(pmu_iter
, str
, "ns");
998 pmu
->i915_attr
= i915_attr
;
999 pmu
->pmu_attr
= pmu_attr
;
1004 for (attr_iter
= attr
; *attr_iter
; attr_iter
++)
1005 kfree((*attr_iter
)->name
);
1015 static void free_event_attributes(struct i915_pmu
*pmu
)
1017 struct attribute
**attr_iter
= i915_pmu_events_attr_group
.attrs
;
1019 for (; *attr_iter
; attr_iter
++)
1020 kfree((*attr_iter
)->name
);
1022 kfree(i915_pmu_events_attr_group
.attrs
);
1023 kfree(pmu
->i915_attr
);
1024 kfree(pmu
->pmu_attr
);
1026 i915_pmu_events_attr_group
.attrs
= NULL
;
1027 pmu
->i915_attr
= NULL
;
1028 pmu
->pmu_attr
= NULL
;
1031 static int i915_pmu_cpu_online(unsigned int cpu
, struct hlist_node
*node
)
1033 struct i915_pmu
*pmu
= hlist_entry_safe(node
, typeof(*pmu
), node
);
1035 GEM_BUG_ON(!pmu
->base
.event_init
);
1037 /* Select the first online CPU as a designated reader. */
1038 if (!cpumask_weight(&i915_pmu_cpumask
))
1039 cpumask_set_cpu(cpu
, &i915_pmu_cpumask
);
1044 static int i915_pmu_cpu_offline(unsigned int cpu
, struct hlist_node
*node
)
1046 struct i915_pmu
*pmu
= hlist_entry_safe(node
, typeof(*pmu
), node
);
1047 unsigned int target
;
1049 GEM_BUG_ON(!pmu
->base
.event_init
);
1051 if (cpumask_test_and_clear_cpu(cpu
, &i915_pmu_cpumask
)) {
1052 target
= cpumask_any_but(topology_sibling_cpumask(cpu
), cpu
);
1053 /* Migrate events if there is a valid target */
1054 if (target
< nr_cpu_ids
) {
1055 cpumask_set_cpu(target
, &i915_pmu_cpumask
);
1056 perf_pmu_migrate_context(&pmu
->base
, cpu
, target
);
1063 static enum cpuhp_state cpuhp_slot
= CPUHP_INVALID
;
1065 static int i915_pmu_register_cpuhp_state(struct i915_pmu
*pmu
)
1067 enum cpuhp_state slot
;
1070 ret
= cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN
,
1071 "perf/x86/intel/i915:online",
1072 i915_pmu_cpu_online
,
1073 i915_pmu_cpu_offline
);
1078 ret
= cpuhp_state_add_instance(slot
, &pmu
->node
);
1080 cpuhp_remove_multi_state(slot
);
1088 static void i915_pmu_unregister_cpuhp_state(struct i915_pmu
*pmu
)
1090 WARN_ON(cpuhp_slot
== CPUHP_INVALID
);
1091 WARN_ON(cpuhp_state_remove_instance(cpuhp_slot
, &pmu
->node
));
1092 cpuhp_remove_multi_state(cpuhp_slot
);
1095 static bool is_igp(struct drm_i915_private
*i915
)
1097 struct pci_dev
*pdev
= i915
->drm
.pdev
;
1099 /* IGP is 0000:00:02.0 */
1100 return pci_domain_nr(pdev
->bus
) == 0 &&
1101 pdev
->bus
->number
== 0 &&
1102 PCI_SLOT(pdev
->devfn
) == 2 &&
1103 PCI_FUNC(pdev
->devfn
) == 0;
1106 void i915_pmu_register(struct drm_i915_private
*i915
)
1108 struct i915_pmu
*pmu
= &i915
->pmu
;
1111 if (INTEL_GEN(i915
) <= 2) {
1112 dev_info(i915
->drm
.dev
, "PMU not supported for this GPU.");
1116 spin_lock_init(&pmu
->lock
);
1117 hrtimer_init(&pmu
->timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_REL
);
1118 pmu
->timer
.function
= i915_sample
;
1120 if (!is_igp(i915
)) {
1121 pmu
->name
= kasprintf(GFP_KERNEL
,
1123 dev_name(i915
->drm
.dev
));
1125 /* tools/perf reserves colons as special. */
1126 strreplace((char *)pmu
->name
, ':', '_');
1134 i915_pmu_events_attr_group
.attrs
= create_event_attributes(pmu
);
1135 if (!i915_pmu_events_attr_group
.attrs
)
1138 pmu
->base
.attr_groups
= i915_pmu_attr_groups
;
1139 pmu
->base
.task_ctx_nr
= perf_invalid_context
;
1140 pmu
->base
.event_init
= i915_pmu_event_init
;
1141 pmu
->base
.add
= i915_pmu_event_add
;
1142 pmu
->base
.del
= i915_pmu_event_del
;
1143 pmu
->base
.start
= i915_pmu_event_start
;
1144 pmu
->base
.stop
= i915_pmu_event_stop
;
1145 pmu
->base
.read
= i915_pmu_event_read
;
1146 pmu
->base
.event_idx
= i915_pmu_event_event_idx
;
1148 ret
= perf_pmu_register(&pmu
->base
, pmu
->name
, -1);
1152 ret
= i915_pmu_register_cpuhp_state(pmu
);
1159 perf_pmu_unregister(&pmu
->base
);
1161 pmu
->base
.event_init
= NULL
;
1162 free_event_attributes(pmu
);
1167 dev_notice(i915
->drm
.dev
, "Failed to register PMU!\n");
1170 void i915_pmu_unregister(struct drm_i915_private
*i915
)
1172 struct i915_pmu
*pmu
= &i915
->pmu
;
1174 if (!pmu
->base
.event_init
)
1177 WARN_ON(pmu
->enable
);
1179 hrtimer_cancel(&pmu
->timer
);
1181 i915_pmu_unregister_cpuhp_state(pmu
);
1183 perf_pmu_unregister(&pmu
->base
);
1184 pmu
->base
.event_init
= NULL
;
1187 free_event_attributes(pmu
);