2 * SPDX-License-Identifier: MIT
4 * Copyright © 2017-2018 Intel Corporation
9 #include "intel_ringbuffer.h"
12 /* Frequency for the sampling timer for events which need it. */
14 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY)
16 #define ENGINE_SAMPLE_MASK \
17 (BIT(I915_SAMPLE_BUSY) | \
18 BIT(I915_SAMPLE_WAIT) | \
19 BIT(I915_SAMPLE_SEMA))
21 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
23 static cpumask_t i915_pmu_cpumask
;
25 static u8
engine_config_sample(u64 config
)
27 return config
& I915_PMU_SAMPLE_MASK
;
30 static u8
engine_event_sample(struct perf_event
*event
)
32 return engine_config_sample(event
->attr
.config
);
35 static u8
engine_event_class(struct perf_event
*event
)
37 return (event
->attr
.config
>> I915_PMU_CLASS_SHIFT
) & 0xff;
40 static u8
engine_event_instance(struct perf_event
*event
)
42 return (event
->attr
.config
>> I915_PMU_SAMPLE_BITS
) & 0xff;
45 static bool is_engine_config(u64 config
)
47 return config
< __I915_PMU_OTHER(0);
50 static unsigned int config_enabled_bit(u64 config
)
52 if (is_engine_config(config
))
53 return engine_config_sample(config
);
55 return ENGINE_SAMPLE_BITS
+ (config
- __I915_PMU_OTHER(0));
58 static u64
config_enabled_mask(u64 config
)
60 return BIT_ULL(config_enabled_bit(config
));
63 static bool is_engine_event(struct perf_event
*event
)
65 return is_engine_config(event
->attr
.config
);
68 static unsigned int event_enabled_bit(struct perf_event
*event
)
70 return config_enabled_bit(event
->attr
.config
);
73 static bool pmu_needs_timer(struct drm_i915_private
*i915
, bool gpu_active
)
78 * Only some counters need the sampling timer.
80 * We start with a bitmask of all currently enabled events.
82 enable
= i915
->pmu
.enable
;
85 * Mask out all the ones which do not need the timer, or in
86 * other words keep all the ones that could need the timer.
88 enable
&= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY
) |
89 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY
) |
93 * When the GPU is idle per-engine counters do not need to be
94 * running so clear those bits out.
97 enable
&= ~ENGINE_SAMPLE_MASK
;
99 * Also there is software busyness tracking available we do not
100 * need the timer for I915_SAMPLE_BUSY counter.
102 * Use RCS as proxy for all engines.
104 else if (intel_engine_supports_stats(i915
->engine
[RCS
]))
105 enable
&= ~BIT(I915_SAMPLE_BUSY
);
108 * If some bits remain it means we need the sampling timer running.
113 void i915_pmu_gt_parked(struct drm_i915_private
*i915
)
115 if (!i915
->pmu
.base
.event_init
)
118 spin_lock_irq(&i915
->pmu
.lock
);
120 * Signal sampling timer to stop if only engine events are enabled and
123 i915
->pmu
.timer_enabled
= pmu_needs_timer(i915
, false);
124 spin_unlock_irq(&i915
->pmu
.lock
);
127 static void __i915_pmu_maybe_start_timer(struct drm_i915_private
*i915
)
129 if (!i915
->pmu
.timer_enabled
&& pmu_needs_timer(i915
, true)) {
130 i915
->pmu
.timer_enabled
= true;
131 i915
->pmu
.timer_last
= ktime_get();
132 hrtimer_start_range_ns(&i915
->pmu
.timer
,
133 ns_to_ktime(PERIOD
), 0,
134 HRTIMER_MODE_REL_PINNED
);
138 void i915_pmu_gt_unparked(struct drm_i915_private
*i915
)
140 if (!i915
->pmu
.base
.event_init
)
143 spin_lock_irq(&i915
->pmu
.lock
);
145 * Re-enable sampling timer when GPU goes active.
147 __i915_pmu_maybe_start_timer(i915
);
148 spin_unlock_irq(&i915
->pmu
.lock
);
151 static bool grab_forcewake(struct drm_i915_private
*i915
, bool fw
)
154 intel_uncore_forcewake_get(i915
, FORCEWAKE_ALL
);
160 add_sample(struct i915_pmu_sample
*sample
, u32 val
)
166 engines_sample(struct drm_i915_private
*dev_priv
, unsigned int period_ns
)
168 struct intel_engine_cs
*engine
;
169 enum intel_engine_id id
;
172 if ((dev_priv
->pmu
.enable
& ENGINE_SAMPLE_MASK
) == 0)
175 if (!dev_priv
->gt
.awake
)
178 if (!intel_runtime_pm_get_if_in_use(dev_priv
))
181 for_each_engine(engine
, dev_priv
, id
) {
182 u32 current_seqno
= intel_engine_get_seqno(engine
);
183 u32 last_seqno
= intel_engine_last_submit(engine
);
186 val
= !i915_seqno_passed(current_seqno
, last_seqno
);
189 add_sample(&engine
->pmu
.sample
[I915_SAMPLE_BUSY
],
192 if (val
&& (engine
->pmu
.enable
&
193 (BIT(I915_SAMPLE_WAIT
) | BIT(I915_SAMPLE_SEMA
)))) {
194 fw
= grab_forcewake(dev_priv
, fw
);
196 val
= I915_READ_FW(RING_CTL(engine
->mmio_base
));
202 add_sample(&engine
->pmu
.sample
[I915_SAMPLE_WAIT
],
205 if (val
& RING_WAIT_SEMAPHORE
)
206 add_sample(&engine
->pmu
.sample
[I915_SAMPLE_SEMA
],
211 intel_uncore_forcewake_put(dev_priv
, FORCEWAKE_ALL
);
213 intel_runtime_pm_put(dev_priv
);
217 add_sample_mult(struct i915_pmu_sample
*sample
, u32 val
, u32 mul
)
219 sample
->cur
+= mul_u32_u32(val
, mul
);
223 frequency_sample(struct drm_i915_private
*dev_priv
, unsigned int period_ns
)
225 if (dev_priv
->pmu
.enable
&
226 config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY
)) {
229 val
= dev_priv
->gt_pm
.rps
.cur_freq
;
230 if (dev_priv
->gt
.awake
&&
231 intel_runtime_pm_get_if_in_use(dev_priv
)) {
232 val
= intel_get_cagf(dev_priv
,
233 I915_READ_NOTRACE(GEN6_RPSTAT1
));
234 intel_runtime_pm_put(dev_priv
);
237 add_sample_mult(&dev_priv
->pmu
.sample
[__I915_SAMPLE_FREQ_ACT
],
238 intel_gpu_freq(dev_priv
, val
),
242 if (dev_priv
->pmu
.enable
&
243 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY
)) {
244 add_sample_mult(&dev_priv
->pmu
.sample
[__I915_SAMPLE_FREQ_REQ
],
245 intel_gpu_freq(dev_priv
,
246 dev_priv
->gt_pm
.rps
.cur_freq
),
251 static enum hrtimer_restart
i915_sample(struct hrtimer
*hrtimer
)
253 struct drm_i915_private
*i915
=
254 container_of(hrtimer
, struct drm_i915_private
, pmu
.timer
);
255 unsigned int period_ns
;
258 if (!READ_ONCE(i915
->pmu
.timer_enabled
))
259 return HRTIMER_NORESTART
;
262 period_ns
= ktime_to_ns(ktime_sub(now
, i915
->pmu
.timer_last
));
263 i915
->pmu
.timer_last
= now
;
266 * Strictly speaking the passed in period may not be 100% accurate for
267 * all internal calculation, since some amount of time can be spent on
268 * grabbing the forcewake. However the potential error from timer call-
269 * back delay greatly dominates this so we keep it simple.
271 engines_sample(i915
, period_ns
);
272 frequency_sample(i915
, period_ns
);
274 hrtimer_forward(hrtimer
, now
, ns_to_ktime(PERIOD
));
276 return HRTIMER_RESTART
;
279 static u64
count_interrupts(struct drm_i915_private
*i915
)
281 /* open-coded kstat_irqs() */
282 struct irq_desc
*desc
= irq_to_desc(i915
->drm
.pdev
->irq
);
286 if (!desc
|| !desc
->kstat_irqs
)
289 for_each_possible_cpu(cpu
)
290 sum
+= *per_cpu_ptr(desc
->kstat_irqs
, cpu
);
295 static void engine_event_destroy(struct perf_event
*event
)
297 struct drm_i915_private
*i915
=
298 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
299 struct intel_engine_cs
*engine
;
301 engine
= intel_engine_lookup_user(i915
,
302 engine_event_class(event
),
303 engine_event_instance(event
));
304 if (WARN_ON_ONCE(!engine
))
307 if (engine_event_sample(event
) == I915_SAMPLE_BUSY
&&
308 intel_engine_supports_stats(engine
))
309 intel_disable_engine_stats(engine
);
312 static void i915_pmu_event_destroy(struct perf_event
*event
)
314 WARN_ON(event
->parent
);
316 if (is_engine_event(event
))
317 engine_event_destroy(event
);
321 engine_event_status(struct intel_engine_cs
*engine
,
322 enum drm_i915_pmu_engine_sample sample
)
325 case I915_SAMPLE_BUSY
:
326 case I915_SAMPLE_WAIT
:
328 case I915_SAMPLE_SEMA
:
329 if (INTEL_GEN(engine
->i915
) < 6)
340 config_status(struct drm_i915_private
*i915
, u64 config
)
343 case I915_PMU_ACTUAL_FREQUENCY
:
344 if (IS_VALLEYVIEW(i915
) || IS_CHERRYVIEW(i915
))
345 /* Requires a mutex for sampling! */
348 case I915_PMU_REQUESTED_FREQUENCY
:
349 if (INTEL_GEN(i915
) < 6)
352 case I915_PMU_INTERRUPTS
:
354 case I915_PMU_RC6_RESIDENCY
:
365 static int engine_event_init(struct perf_event
*event
)
367 struct drm_i915_private
*i915
=
368 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
369 struct intel_engine_cs
*engine
;
373 engine
= intel_engine_lookup_user(i915
, engine_event_class(event
),
374 engine_event_instance(event
));
378 sample
= engine_event_sample(event
);
379 ret
= engine_event_status(engine
, sample
);
383 if (sample
== I915_SAMPLE_BUSY
&& intel_engine_supports_stats(engine
))
384 ret
= intel_enable_engine_stats(engine
);
389 static int i915_pmu_event_init(struct perf_event
*event
)
391 struct drm_i915_private
*i915
=
392 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
395 if (event
->attr
.type
!= event
->pmu
->type
)
398 /* unsupported modes and filters */
399 if (event
->attr
.sample_period
) /* no sampling */
402 if (has_branch_stack(event
))
408 /* only allow running on one cpu at a time */
409 if (!cpumask_test_cpu(event
->cpu
, &i915_pmu_cpumask
))
412 if (is_engine_event(event
))
413 ret
= engine_event_init(event
);
415 ret
= config_status(i915
, event
->attr
.config
);
420 event
->destroy
= i915_pmu_event_destroy
;
425 static u64
__get_rc6(struct drm_i915_private
*i915
)
429 val
= intel_rc6_residency_ns(i915
,
430 IS_VALLEYVIEW(i915
) ?
435 val
+= intel_rc6_residency_ns(i915
, GEN6_GT_GFX_RC6p
);
438 val
+= intel_rc6_residency_ns(i915
, GEN6_GT_GFX_RC6pp
);
443 static u64
get_rc6(struct drm_i915_private
*i915
)
445 #if IS_ENABLED(CONFIG_PM)
449 if (intel_runtime_pm_get_if_in_use(i915
)) {
450 val
= __get_rc6(i915
);
451 intel_runtime_pm_put(i915
);
454 * If we are coming back from being runtime suspended we must
455 * be careful not to report a larger value than returned
459 spin_lock_irqsave(&i915
->pmu
.lock
, flags
);
461 if (val
>= i915
->pmu
.sample
[__I915_SAMPLE_RC6_ESTIMATED
].cur
) {
462 i915
->pmu
.sample
[__I915_SAMPLE_RC6_ESTIMATED
].cur
= 0;
463 i915
->pmu
.sample
[__I915_SAMPLE_RC6
].cur
= val
;
465 val
= i915
->pmu
.sample
[__I915_SAMPLE_RC6_ESTIMATED
].cur
;
468 spin_unlock_irqrestore(&i915
->pmu
.lock
, flags
);
470 struct pci_dev
*pdev
= i915
->drm
.pdev
;
471 struct device
*kdev
= &pdev
->dev
;
474 * We are runtime suspended.
476 * Report the delta from when the device was suspended to now,
477 * on top of the last known real value, as the approximated RC6
480 spin_lock_irqsave(&i915
->pmu
.lock
, flags
);
481 spin_lock(&kdev
->power
.lock
);
484 * After the above branch intel_runtime_pm_get_if_in_use failed
485 * to get the runtime PM reference we cannot assume we are in
486 * runtime suspend since we can either: a) race with coming out
487 * of it before we took the power.lock, or b) there are other
488 * states than suspended which can bring us here.
490 * We need to double-check that we are indeed currently runtime
491 * suspended and if not we cannot do better than report the last
494 if (kdev
->power
.runtime_status
== RPM_SUSPENDED
) {
495 if (!i915
->pmu
.sample
[__I915_SAMPLE_RC6_ESTIMATED
].cur
)
496 i915
->pmu
.suspended_jiffies_last
=
497 kdev
->power
.suspended_jiffies
;
499 val
= kdev
->power
.suspended_jiffies
-
500 i915
->pmu
.suspended_jiffies_last
;
501 val
+= jiffies
- kdev
->power
.accounting_timestamp
;
503 val
= jiffies_to_nsecs(val
);
504 val
+= i915
->pmu
.sample
[__I915_SAMPLE_RC6
].cur
;
506 i915
->pmu
.sample
[__I915_SAMPLE_RC6_ESTIMATED
].cur
= val
;
507 } else if (i915
->pmu
.sample
[__I915_SAMPLE_RC6_ESTIMATED
].cur
) {
508 val
= i915
->pmu
.sample
[__I915_SAMPLE_RC6_ESTIMATED
].cur
;
510 val
= i915
->pmu
.sample
[__I915_SAMPLE_RC6
].cur
;
513 spin_unlock(&kdev
->power
.lock
);
514 spin_unlock_irqrestore(&i915
->pmu
.lock
, flags
);
519 return __get_rc6(i915
);
523 static u64
__i915_pmu_event_read(struct perf_event
*event
)
525 struct drm_i915_private
*i915
=
526 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
529 if (is_engine_event(event
)) {
530 u8 sample
= engine_event_sample(event
);
531 struct intel_engine_cs
*engine
;
533 engine
= intel_engine_lookup_user(i915
,
534 engine_event_class(event
),
535 engine_event_instance(event
));
537 if (WARN_ON_ONCE(!engine
)) {
539 } else if (sample
== I915_SAMPLE_BUSY
&&
540 intel_engine_supports_stats(engine
)) {
541 val
= ktime_to_ns(intel_engine_get_busy_time(engine
));
543 val
= engine
->pmu
.sample
[sample
].cur
;
546 switch (event
->attr
.config
) {
547 case I915_PMU_ACTUAL_FREQUENCY
:
549 div_u64(i915
->pmu
.sample
[__I915_SAMPLE_FREQ_ACT
].cur
,
550 USEC_PER_SEC
/* to MHz */);
552 case I915_PMU_REQUESTED_FREQUENCY
:
554 div_u64(i915
->pmu
.sample
[__I915_SAMPLE_FREQ_REQ
].cur
,
555 USEC_PER_SEC
/* to MHz */);
557 case I915_PMU_INTERRUPTS
:
558 val
= count_interrupts(i915
);
560 case I915_PMU_RC6_RESIDENCY
:
569 static void i915_pmu_event_read(struct perf_event
*event
)
571 struct hw_perf_event
*hwc
= &event
->hw
;
575 prev
= local64_read(&hwc
->prev_count
);
576 new = __i915_pmu_event_read(event
);
578 if (local64_cmpxchg(&hwc
->prev_count
, prev
, new) != prev
)
581 local64_add(new - prev
, &event
->count
);
584 static void i915_pmu_enable(struct perf_event
*event
)
586 struct drm_i915_private
*i915
=
587 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
588 unsigned int bit
= event_enabled_bit(event
);
591 spin_lock_irqsave(&i915
->pmu
.lock
, flags
);
594 * Update the bitmask of enabled events and increment
595 * the event reference counter.
597 GEM_BUG_ON(bit
>= I915_PMU_MASK_BITS
);
598 GEM_BUG_ON(i915
->pmu
.enable_count
[bit
] == ~0);
599 i915
->pmu
.enable
|= BIT_ULL(bit
);
600 i915
->pmu
.enable_count
[bit
]++;
603 * Start the sampling timer if needed and not already enabled.
605 __i915_pmu_maybe_start_timer(i915
);
608 * For per-engine events the bitmask and reference counting
609 * is stored per engine.
611 if (is_engine_event(event
)) {
612 u8 sample
= engine_event_sample(event
);
613 struct intel_engine_cs
*engine
;
615 engine
= intel_engine_lookup_user(i915
,
616 engine_event_class(event
),
617 engine_event_instance(event
));
619 engine
->pmu
.enable
|= BIT(sample
);
621 GEM_BUG_ON(sample
>= I915_PMU_SAMPLE_BITS
);
622 GEM_BUG_ON(engine
->pmu
.enable_count
[sample
] == ~0);
623 engine
->pmu
.enable_count
[sample
]++;
626 spin_unlock_irqrestore(&i915
->pmu
.lock
, flags
);
629 * Store the current counter value so we can report the correct delta
630 * for all listeners. Even when the event was already enabled and has
631 * an existing non-zero value.
633 local64_set(&event
->hw
.prev_count
, __i915_pmu_event_read(event
));
636 static void i915_pmu_disable(struct perf_event
*event
)
638 struct drm_i915_private
*i915
=
639 container_of(event
->pmu
, typeof(*i915
), pmu
.base
);
640 unsigned int bit
= event_enabled_bit(event
);
643 spin_lock_irqsave(&i915
->pmu
.lock
, flags
);
645 if (is_engine_event(event
)) {
646 u8 sample
= engine_event_sample(event
);
647 struct intel_engine_cs
*engine
;
649 engine
= intel_engine_lookup_user(i915
,
650 engine_event_class(event
),
651 engine_event_instance(event
));
653 GEM_BUG_ON(sample
>= I915_PMU_SAMPLE_BITS
);
654 GEM_BUG_ON(engine
->pmu
.enable_count
[sample
] == 0);
656 * Decrement the reference count and clear the enabled
657 * bitmask when the last listener on an event goes away.
659 if (--engine
->pmu
.enable_count
[sample
] == 0)
660 engine
->pmu
.enable
&= ~BIT(sample
);
663 GEM_BUG_ON(bit
>= I915_PMU_MASK_BITS
);
664 GEM_BUG_ON(i915
->pmu
.enable_count
[bit
] == 0);
666 * Decrement the reference count and clear the enabled
667 * bitmask when the last listener on an event goes away.
669 if (--i915
->pmu
.enable_count
[bit
] == 0) {
670 i915
->pmu
.enable
&= ~BIT_ULL(bit
);
671 i915
->pmu
.timer_enabled
&= pmu_needs_timer(i915
, true);
674 spin_unlock_irqrestore(&i915
->pmu
.lock
, flags
);
677 static void i915_pmu_event_start(struct perf_event
*event
, int flags
)
679 i915_pmu_enable(event
);
683 static void i915_pmu_event_stop(struct perf_event
*event
, int flags
)
685 if (flags
& PERF_EF_UPDATE
)
686 i915_pmu_event_read(event
);
687 i915_pmu_disable(event
);
688 event
->hw
.state
= PERF_HES_STOPPED
;
691 static int i915_pmu_event_add(struct perf_event
*event
, int flags
)
693 if (flags
& PERF_EF_START
)
694 i915_pmu_event_start(event
, flags
);
699 static void i915_pmu_event_del(struct perf_event
*event
, int flags
)
701 i915_pmu_event_stop(event
, PERF_EF_UPDATE
);
704 static int i915_pmu_event_event_idx(struct perf_event
*event
)
709 struct i915_str_attribute
{
710 struct device_attribute attr
;
714 static ssize_t
i915_pmu_format_show(struct device
*dev
,
715 struct device_attribute
*attr
, char *buf
)
717 struct i915_str_attribute
*eattr
;
719 eattr
= container_of(attr
, struct i915_str_attribute
, attr
);
720 return sprintf(buf
, "%s\n", eattr
->str
);
723 #define I915_PMU_FORMAT_ATTR(_name, _config) \
724 (&((struct i915_str_attribute[]) { \
725 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \
729 static struct attribute
*i915_pmu_format_attrs
[] = {
730 I915_PMU_FORMAT_ATTR(i915_eventid
, "config:0-20"),
734 static const struct attribute_group i915_pmu_format_attr_group
= {
736 .attrs
= i915_pmu_format_attrs
,
739 struct i915_ext_attribute
{
740 struct device_attribute attr
;
744 static ssize_t
i915_pmu_event_show(struct device
*dev
,
745 struct device_attribute
*attr
, char *buf
)
747 struct i915_ext_attribute
*eattr
;
749 eattr
= container_of(attr
, struct i915_ext_attribute
, attr
);
750 return sprintf(buf
, "config=0x%lx\n", eattr
->val
);
753 static struct attribute_group i915_pmu_events_attr_group
= {
755 /* Patch in attrs at runtime. */
759 i915_pmu_get_attr_cpumask(struct device
*dev
,
760 struct device_attribute
*attr
,
763 return cpumap_print_to_pagebuf(true, buf
, &i915_pmu_cpumask
);
766 static DEVICE_ATTR(cpumask
, 0444, i915_pmu_get_attr_cpumask
, NULL
);
768 static struct attribute
*i915_cpumask_attrs
[] = {
769 &dev_attr_cpumask
.attr
,
773 static const struct attribute_group i915_pmu_cpumask_attr_group
= {
774 .attrs
= i915_cpumask_attrs
,
777 static const struct attribute_group
*i915_pmu_attr_groups
[] = {
778 &i915_pmu_format_attr_group
,
779 &i915_pmu_events_attr_group
,
780 &i915_pmu_cpumask_attr_group
,
784 #define __event(__config, __name, __unit) \
786 .config = (__config), \
791 #define __engine_event(__sample, __name) \
793 .sample = (__sample), \
797 static struct i915_ext_attribute
*
798 add_i915_attr(struct i915_ext_attribute
*attr
, const char *name
, u64 config
)
800 sysfs_attr_init(&attr
->attr
.attr
);
801 attr
->attr
.attr
.name
= name
;
802 attr
->attr
.attr
.mode
= 0444;
803 attr
->attr
.show
= i915_pmu_event_show
;
809 static struct perf_pmu_events_attr
*
810 add_pmu_attr(struct perf_pmu_events_attr
*attr
, const char *name
,
813 sysfs_attr_init(&attr
->attr
.attr
);
814 attr
->attr
.attr
.name
= name
;
815 attr
->attr
.attr
.mode
= 0444;
816 attr
->attr
.show
= perf_event_sysfs_show
;
817 attr
->event_str
= str
;
822 static struct attribute
**
823 create_event_attributes(struct drm_i915_private
*i915
)
825 static const struct {
830 __event(I915_PMU_ACTUAL_FREQUENCY
, "actual-frequency", "M"),
831 __event(I915_PMU_REQUESTED_FREQUENCY
, "requested-frequency", "M"),
832 __event(I915_PMU_INTERRUPTS
, "interrupts", NULL
),
833 __event(I915_PMU_RC6_RESIDENCY
, "rc6-residency", "ns"),
835 static const struct {
836 enum drm_i915_pmu_engine_sample sample
;
838 } engine_events
[] = {
839 __engine_event(I915_SAMPLE_BUSY
, "busy"),
840 __engine_event(I915_SAMPLE_SEMA
, "sema"),
841 __engine_event(I915_SAMPLE_WAIT
, "wait"),
843 unsigned int count
= 0;
844 struct perf_pmu_events_attr
*pmu_attr
= NULL
, *pmu_iter
;
845 struct i915_ext_attribute
*i915_attr
= NULL
, *i915_iter
;
846 struct attribute
**attr
= NULL
, **attr_iter
;
847 struct intel_engine_cs
*engine
;
848 enum intel_engine_id id
;
851 /* Count how many counters we will be exposing. */
852 for (i
= 0; i
< ARRAY_SIZE(events
); i
++) {
853 if (!config_status(i915
, events
[i
].config
))
857 for_each_engine(engine
, i915
, id
) {
858 for (i
= 0; i
< ARRAY_SIZE(engine_events
); i
++) {
859 if (!engine_event_status(engine
,
860 engine_events
[i
].sample
))
865 /* Allocate attribute objects and table. */
866 i915_attr
= kcalloc(count
, sizeof(*i915_attr
), GFP_KERNEL
);
870 pmu_attr
= kcalloc(count
, sizeof(*pmu_attr
), GFP_KERNEL
);
874 /* Max one pointer of each attribute type plus a termination entry. */
875 attr
= kcalloc(count
* 2 + 1, sizeof(*attr
), GFP_KERNEL
);
879 i915_iter
= i915_attr
;
883 /* Initialize supported non-engine counters. */
884 for (i
= 0; i
< ARRAY_SIZE(events
); i
++) {
887 if (config_status(i915
, events
[i
].config
))
890 str
= kstrdup(events
[i
].name
, GFP_KERNEL
);
894 *attr_iter
++ = &i915_iter
->attr
.attr
;
895 i915_iter
= add_i915_attr(i915_iter
, str
, events
[i
].config
);
897 if (events
[i
].unit
) {
898 str
= kasprintf(GFP_KERNEL
, "%s.unit", events
[i
].name
);
902 *attr_iter
++ = &pmu_iter
->attr
.attr
;
903 pmu_iter
= add_pmu_attr(pmu_iter
, str
, events
[i
].unit
);
907 /* Initialize supported engine counters. */
908 for_each_engine(engine
, i915
, id
) {
909 for (i
= 0; i
< ARRAY_SIZE(engine_events
); i
++) {
912 if (engine_event_status(engine
,
913 engine_events
[i
].sample
))
916 str
= kasprintf(GFP_KERNEL
, "%s-%s",
917 engine
->name
, engine_events
[i
].name
);
921 *attr_iter
++ = &i915_iter
->attr
.attr
;
923 add_i915_attr(i915_iter
, str
,
924 __I915_PMU_ENGINE(engine
->uabi_class
,
926 engine_events
[i
].sample
));
928 str
= kasprintf(GFP_KERNEL
, "%s-%s.unit",
929 engine
->name
, engine_events
[i
].name
);
933 *attr_iter
++ = &pmu_iter
->attr
.attr
;
934 pmu_iter
= add_pmu_attr(pmu_iter
, str
, "ns");
938 i915
->pmu
.i915_attr
= i915_attr
;
939 i915
->pmu
.pmu_attr
= pmu_attr
;
944 for (attr_iter
= attr
; *attr_iter
; attr_iter
++)
945 kfree((*attr_iter
)->name
);
955 static void free_event_attributes(struct drm_i915_private
*i915
)
957 struct attribute
**attr_iter
= i915_pmu_events_attr_group
.attrs
;
959 for (; *attr_iter
; attr_iter
++)
960 kfree((*attr_iter
)->name
);
962 kfree(i915_pmu_events_attr_group
.attrs
);
963 kfree(i915
->pmu
.i915_attr
);
964 kfree(i915
->pmu
.pmu_attr
);
966 i915_pmu_events_attr_group
.attrs
= NULL
;
967 i915
->pmu
.i915_attr
= NULL
;
968 i915
->pmu
.pmu_attr
= NULL
;
971 static int i915_pmu_cpu_online(unsigned int cpu
, struct hlist_node
*node
)
973 struct i915_pmu
*pmu
= hlist_entry_safe(node
, typeof(*pmu
), node
);
975 GEM_BUG_ON(!pmu
->base
.event_init
);
977 /* Select the first online CPU as a designated reader. */
978 if (!cpumask_weight(&i915_pmu_cpumask
))
979 cpumask_set_cpu(cpu
, &i915_pmu_cpumask
);
984 static int i915_pmu_cpu_offline(unsigned int cpu
, struct hlist_node
*node
)
986 struct i915_pmu
*pmu
= hlist_entry_safe(node
, typeof(*pmu
), node
);
989 GEM_BUG_ON(!pmu
->base
.event_init
);
991 if (cpumask_test_and_clear_cpu(cpu
, &i915_pmu_cpumask
)) {
992 target
= cpumask_any_but(topology_sibling_cpumask(cpu
), cpu
);
993 /* Migrate events if there is a valid target */
994 if (target
< nr_cpu_ids
) {
995 cpumask_set_cpu(target
, &i915_pmu_cpumask
);
996 perf_pmu_migrate_context(&pmu
->base
, cpu
, target
);
1003 static enum cpuhp_state cpuhp_slot
= CPUHP_INVALID
;
1005 static int i915_pmu_register_cpuhp_state(struct drm_i915_private
*i915
)
1007 enum cpuhp_state slot
;
1010 ret
= cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN
,
1011 "perf/x86/intel/i915:online",
1012 i915_pmu_cpu_online
,
1013 i915_pmu_cpu_offline
);
1018 ret
= cpuhp_state_add_instance(slot
, &i915
->pmu
.node
);
1020 cpuhp_remove_multi_state(slot
);
1028 static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private
*i915
)
1030 WARN_ON(cpuhp_slot
== CPUHP_INVALID
);
1031 WARN_ON(cpuhp_state_remove_instance(cpuhp_slot
, &i915
->pmu
.node
));
1032 cpuhp_remove_multi_state(cpuhp_slot
);
1035 void i915_pmu_register(struct drm_i915_private
*i915
)
1039 if (INTEL_GEN(i915
) <= 2) {
1040 DRM_INFO("PMU not supported for this GPU.");
1044 i915_pmu_events_attr_group
.attrs
= create_event_attributes(i915
);
1045 if (!i915_pmu_events_attr_group
.attrs
) {
1050 i915
->pmu
.base
.attr_groups
= i915_pmu_attr_groups
;
1051 i915
->pmu
.base
.task_ctx_nr
= perf_invalid_context
;
1052 i915
->pmu
.base
.event_init
= i915_pmu_event_init
;
1053 i915
->pmu
.base
.add
= i915_pmu_event_add
;
1054 i915
->pmu
.base
.del
= i915_pmu_event_del
;
1055 i915
->pmu
.base
.start
= i915_pmu_event_start
;
1056 i915
->pmu
.base
.stop
= i915_pmu_event_stop
;
1057 i915
->pmu
.base
.read
= i915_pmu_event_read
;
1058 i915
->pmu
.base
.event_idx
= i915_pmu_event_event_idx
;
1060 spin_lock_init(&i915
->pmu
.lock
);
1061 hrtimer_init(&i915
->pmu
.timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_REL
);
1062 i915
->pmu
.timer
.function
= i915_sample
;
1064 ret
= perf_pmu_register(&i915
->pmu
.base
, "i915", -1);
1068 ret
= i915_pmu_register_cpuhp_state(i915
);
1075 perf_pmu_unregister(&i915
->pmu
.base
);
1077 i915
->pmu
.base
.event_init
= NULL
;
1078 free_event_attributes(i915
);
1079 DRM_NOTE("Failed to register PMU! (err=%d)\n", ret
);
1082 void i915_pmu_unregister(struct drm_i915_private
*i915
)
1084 if (!i915
->pmu
.base
.event_init
)
1087 WARN_ON(i915
->pmu
.enable
);
1089 hrtimer_cancel(&i915
->pmu
.timer
);
1091 i915_pmu_unregister_cpuhp_state(i915
);
1093 perf_pmu_unregister(&i915
->pmu
.base
);
1094 i915
->pmu
.base
.event_init
= NULL
;
1095 free_event_attributes(i915
);