1 // SPDX-License-Identifier: MIT
3 * Copyright © 2020 Intel Corporation
6 #include <linux/pm_qos.h>
7 #include <linux/sort.h>
9 #include "intel_engine_heartbeat.h"
10 #include "intel_engine_pm.h"
11 #include "intel_gpu_commands.h"
12 #include "intel_gt_clock_utils.h"
13 #include "intel_gt_pm.h"
14 #include "intel_rc6.h"
15 #include "selftest_engine_heartbeat.h"
16 #include "selftest_rps.h"
17 #include "selftests/igt_flush_test.h"
18 #include "selftests/igt_spinner.h"
19 #include "selftests/librapl.h"
21 /* Try to isolate the impact of cstates from determing frequency response */
22 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
24 static void dummy_rps_work(struct work_struct
*wrk
)
28 static int cmp_u64(const void *A
, const void *B
)
30 const u64
*a
= A
, *b
= B
;
40 static int cmp_u32(const void *A
, const void *B
)
42 const u32
*a
= A
, *b
= B
;
52 static struct i915_vma
*
53 create_spin_counter(struct intel_engine_cs
*engine
,
54 struct i915_address_space
*vm
,
64 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
65 struct drm_i915_gem_object
*obj
;
72 obj
= i915_gem_object_create_internal(vm
->i915
, 64 << 10);
76 end
= obj
->base
.size
/ sizeof(u32
) - 1;
78 vma
= i915_vma_instance(obj
, vm
, NULL
);
84 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
90 base
= i915_gem_object_pin_map(obj
, I915_MAP_WC
);
97 *cs
++ = MI_LOAD_REGISTER_IMM(__NGPR__
* 2);
98 for (i
= 0; i
< __NGPR__
; i
++) {
99 *cs
++ = i915_mmio_reg_offset(CS_GPR(i
));
101 *cs
++ = i915_mmio_reg_offset(CS_GPR(i
)) + 4;
105 *cs
++ = MI_LOAD_REGISTER_IMM(1);
106 *cs
++ = i915_mmio_reg_offset(CS_GPR(INC
));
111 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
112 for (i
= 0; i
< 1024; i
++) {
114 *cs
++ = MI_MATH_LOAD(MI_MATH_REG_SRCA
, MI_MATH_REG(COUNT
));
115 *cs
++ = MI_MATH_LOAD(MI_MATH_REG_SRCB
, MI_MATH_REG(INC
));
117 *cs
++ = MI_MATH_STORE(MI_MATH_REG(COUNT
), MI_MATH_REG_ACCU
);
120 *cs
++ = MI_STORE_REGISTER_MEM_GEN8
;
121 *cs
++ = i915_mmio_reg_offset(CS_GPR(COUNT
));
122 *cs
++ = lower_32_bits(vma
->node
.start
+ end
* sizeof(*cs
));
123 *cs
++ = upper_32_bits(vma
->node
.start
+ end
* sizeof(*cs
));
127 *cs
++ = MI_BATCH_BUFFER_START_GEN8
;
128 *cs
++ = lower_32_bits(vma
->node
.start
+ loop
* sizeof(*cs
));
129 *cs
++ = upper_32_bits(vma
->node
.start
+ loop
* sizeof(*cs
));
130 GEM_BUG_ON(cs
- base
> end
);
132 i915_gem_object_flush_map(obj
);
134 *cancel
= base
+ loop
;
135 *counter
= srm
? memset32(base
+ end
, 0, 1) : NULL
;
141 i915_vma_unlock(vma
);
143 i915_gem_object_put(obj
);
147 static u8
wait_for_freq(struct intel_rps
*rps
, u8 freq
, int timeout_ms
)
154 memset(history
, freq
, sizeof(history
));
157 /* The PCU does not change instantly, but drifts towards the goal? */
158 end
= jiffies
+ msecs_to_jiffies(timeout_ms
);
162 act
= read_cagf(rps
);
163 if (time_after(jiffies
, end
))
166 /* Target acquired */
170 /* Any change within the last N samples? */
171 if (!memchr_inv(history
, act
, sizeof(history
)))
175 i
= (i
+ 1) % ARRAY_SIZE(history
);
177 usleep_range(sleep
, 2 * sleep
);
179 if (sleep
> timeout_ms
* 20)
180 sleep
= timeout_ms
* 20;
184 static u8
rps_set_check(struct intel_rps
*rps
, u8 freq
)
186 mutex_lock(&rps
->lock
);
187 GEM_BUG_ON(!intel_rps_is_active(rps
));
188 intel_rps_set(rps
, freq
);
189 GEM_BUG_ON(rps
->last_freq
!= freq
);
190 mutex_unlock(&rps
->lock
);
192 return wait_for_freq(rps
, freq
, 50);
195 static void show_pstate_limits(struct intel_rps
*rps
)
197 struct drm_i915_private
*i915
= rps_to_i915(rps
);
199 if (IS_BROXTON(i915
)) {
200 pr_info("P_STATE_CAP[%x]: 0x%08x\n",
201 i915_mmio_reg_offset(BXT_RP_STATE_CAP
),
202 intel_uncore_read(rps_to_uncore(rps
),
204 } else if (IS_GEN(i915
, 9)) {
205 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
206 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS
),
207 intel_uncore_read(rps_to_uncore(rps
),
208 GEN9_RP_STATE_LIMITS
));
212 int live_rps_clock_interval(void *arg
)
214 struct intel_gt
*gt
= arg
;
215 struct intel_rps
*rps
= >
->rps
;
216 void (*saved_work
)(struct work_struct
*wrk
);
217 struct intel_engine_cs
*engine
;
218 enum intel_engine_id id
;
219 struct igt_spinner spin
;
222 if (!intel_rps_is_enabled(rps
) || INTEL_GEN(gt
->i915
) < 6)
225 if (igt_spinner_init(&spin
, gt
))
228 intel_gt_pm_wait_for_idle(gt
);
229 saved_work
= rps
->work
.func
;
230 rps
->work
.func
= dummy_rps_work
;
233 intel_rps_disable(>
->rps
);
235 intel_gt_check_clock_frequency(gt
);
237 for_each_engine(engine
, gt
, id
) {
238 struct i915_request
*rq
;
242 if (!intel_engine_can_store_dword(engine
))
245 st_engine_heartbeat_disable(engine
);
247 rq
= igt_spinner_create_request(&spin
,
248 engine
->kernel_context
,
251 st_engine_heartbeat_enable(engine
);
256 i915_request_add(rq
);
258 if (!igt_wait_for_spinner(&spin
, rq
)) {
259 pr_err("%s: RPS spinner did not start\n",
261 igt_spinner_end(&spin
);
262 st_engine_heartbeat_enable(engine
);
263 intel_gt_set_wedged(engine
->gt
);
268 intel_uncore_forcewake_get(gt
->uncore
, FORCEWAKE_ALL
);
270 intel_uncore_write_fw(gt
->uncore
, GEN6_RP_CUR_UP_EI
, 0);
272 /* Set the evaluation interval to infinity! */
273 intel_uncore_write_fw(gt
->uncore
,
274 GEN6_RP_UP_EI
, 0xffffffff);
275 intel_uncore_write_fw(gt
->uncore
,
276 GEN6_RP_UP_THRESHOLD
, 0xffffffff);
278 intel_uncore_write_fw(gt
->uncore
, GEN6_RP_CONTROL
,
279 GEN6_RP_ENABLE
| GEN6_RP_UP_BUSY_AVG
);
281 if (wait_for(intel_uncore_read_fw(gt
->uncore
,
284 /* Just skip the test; assume lack of HW support */
285 pr_notice("%s: rps evaluation interval not ticking\n",
293 for (i
= 0; i
< 5; i
++) {
296 dt_
[i
] = ktime_get();
297 cycles_
[i
] = -intel_uncore_read_fw(gt
->uncore
, GEN6_RP_CUR_UP_EI
);
301 dt_
[i
] = ktime_sub(ktime_get(), dt_
[i
]);
302 cycles_
[i
] += intel_uncore_read_fw(gt
->uncore
, GEN6_RP_CUR_UP_EI
);
307 /* Use the median of both cycle/dt; close enough */
308 sort(cycles_
, 5, sizeof(*cycles_
), cmp_u32
, NULL
);
309 cycles
= (cycles_
[1] + 2 * cycles_
[2] + cycles_
[3]) / 4;
310 sort(dt_
, 5, sizeof(*dt_
), cmp_u64
, NULL
);
311 dt
= div_u64(dt_
[1] + 2 * dt_
[2] + dt_
[3], 4);
314 intel_uncore_write_fw(gt
->uncore
, GEN6_RP_CONTROL
, 0);
315 intel_uncore_forcewake_put(gt
->uncore
, FORCEWAKE_ALL
);
317 igt_spinner_end(&spin
);
318 st_engine_heartbeat_enable(engine
);
321 u64 time
= intel_gt_pm_interval_to_ns(gt
, cycles
);
323 intel_gt_ns_to_pm_interval(gt
, dt
);
325 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
326 engine
->name
, cycles
, time
, dt
, expected
,
327 gt
->clock_frequency
/ 1000);
329 if (10 * time
< 8 * dt
||
330 8 * time
> 10 * dt
) {
331 pr_err("%s: rps clock time does not match walltime!\n",
336 if (10 * expected
< 8 * cycles
||
337 8 * expected
> 10 * cycles
) {
338 pr_err("%s: walltime does not match rps clock ticks!\n",
344 if (igt_flush_test(gt
->i915
))
347 break; /* once is enough */
350 intel_rps_enable(>
->rps
);
353 igt_spinner_fini(&spin
);
355 intel_gt_pm_wait_for_idle(gt
);
356 rps
->work
.func
= saved_work
;
358 if (err
== -ENODEV
) /* skipped, don't report a fail */
364 int live_rps_control(void *arg
)
366 struct intel_gt
*gt
= arg
;
367 struct intel_rps
*rps
= >
->rps
;
368 void (*saved_work
)(struct work_struct
*wrk
);
369 struct intel_engine_cs
*engine
;
370 enum intel_engine_id id
;
371 struct igt_spinner spin
;
375 * Check that the actual frequency matches our requested frequency,
376 * to verify our control mechanism. We have to be careful that the
377 * PCU may throttle the GPU in which case the actual frequency used
378 * will be lowered than requested.
381 if (!intel_rps_is_enabled(rps
))
384 if (IS_CHERRYVIEW(gt
->i915
)) /* XXX fragile PCU */
387 if (igt_spinner_init(&spin
, gt
))
390 intel_gt_pm_wait_for_idle(gt
);
391 saved_work
= rps
->work
.func
;
392 rps
->work
.func
= dummy_rps_work
;
395 for_each_engine(engine
, gt
, id
) {
396 struct i915_request
*rq
;
397 ktime_t min_dt
, max_dt
;
401 if (!intel_engine_can_store_dword(engine
))
404 st_engine_heartbeat_disable(engine
);
406 rq
= igt_spinner_create_request(&spin
,
407 engine
->kernel_context
,
414 i915_request_add(rq
);
416 if (!igt_wait_for_spinner(&spin
, rq
)) {
417 pr_err("%s: RPS spinner did not start\n",
419 igt_spinner_end(&spin
);
420 st_engine_heartbeat_enable(engine
);
421 intel_gt_set_wedged(engine
->gt
);
426 if (rps_set_check(rps
, rps
->min_freq
) != rps
->min_freq
) {
427 pr_err("%s: could not set minimum frequency [%x], only %x!\n",
428 engine
->name
, rps
->min_freq
, read_cagf(rps
));
429 igt_spinner_end(&spin
);
430 st_engine_heartbeat_enable(engine
);
431 show_pstate_limits(rps
);
436 for (f
= rps
->min_freq
+ 1; f
< rps
->max_freq
; f
++) {
437 if (rps_set_check(rps
, f
) < f
)
441 limit
= rps_set_check(rps
, f
);
443 if (rps_set_check(rps
, rps
->min_freq
) != rps
->min_freq
) {
444 pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
445 engine
->name
, rps
->min_freq
, read_cagf(rps
));
446 igt_spinner_end(&spin
);
447 st_engine_heartbeat_enable(engine
);
448 show_pstate_limits(rps
);
453 max_dt
= ktime_get();
454 max
= rps_set_check(rps
, limit
);
455 max_dt
= ktime_sub(ktime_get(), max_dt
);
457 min_dt
= ktime_get();
458 min
= rps_set_check(rps
, rps
->min_freq
);
459 min_dt
= ktime_sub(ktime_get(), min_dt
);
461 igt_spinner_end(&spin
);
462 st_engine_heartbeat_enable(engine
);
464 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
466 rps
->min_freq
, intel_gpu_freq(rps
, rps
->min_freq
),
467 rps
->max_freq
, intel_gpu_freq(rps
, rps
->max_freq
),
468 limit
, intel_gpu_freq(rps
, limit
),
469 min
, max
, ktime_to_ns(min_dt
), ktime_to_ns(max_dt
));
471 if (limit
== rps
->min_freq
) {
472 pr_err("%s: GPU throttled to minimum!\n",
474 show_pstate_limits(rps
);
479 if (igt_flush_test(gt
->i915
)) {
486 igt_spinner_fini(&spin
);
488 intel_gt_pm_wait_for_idle(gt
);
489 rps
->work
.func
= saved_work
;
494 static void show_pcu_config(struct intel_rps
*rps
)
496 struct drm_i915_private
*i915
= rps_to_i915(rps
);
497 unsigned int max_gpu_freq
, min_gpu_freq
;
498 intel_wakeref_t wakeref
;
504 min_gpu_freq
= rps
->min_freq
;
505 max_gpu_freq
= rps
->max_freq
;
506 if (INTEL_GEN(i915
) >= 9) {
507 /* Convert GT frequency to 50 HZ units */
508 min_gpu_freq
/= GEN9_FREQ_SCALER
;
509 max_gpu_freq
/= GEN9_FREQ_SCALER
;
512 wakeref
= intel_runtime_pm_get(rps_to_uncore(rps
)->rpm
);
514 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing");
515 for (gpu_freq
= min_gpu_freq
; gpu_freq
<= max_gpu_freq
; gpu_freq
++) {
516 int ia_freq
= gpu_freq
;
518 sandybridge_pcode_read(i915
,
519 GEN6_PCODE_READ_MIN_FREQ_TABLE
,
522 pr_info("%5d %5d %5d\n",
524 ((ia_freq
>> 0) & 0xff) * 100,
525 ((ia_freq
>> 8) & 0xff) * 100);
528 intel_runtime_pm_put(rps_to_uncore(rps
)->rpm
, wakeref
);
531 static u64
__measure_frequency(u32
*cntr
, int duration_ms
)
536 dc
= READ_ONCE(*cntr
);
537 usleep_range(1000 * duration_ms
, 2000 * duration_ms
);
538 dc
= READ_ONCE(*cntr
) - dc
;
539 dt
= ktime_get() - dt
;
541 return div64_u64(1000 * 1000 * dc
, dt
);
544 static u64
measure_frequency_at(struct intel_rps
*rps
, u32
*cntr
, int *freq
)
549 *freq
= rps_set_check(rps
, *freq
);
550 for (i
= 0; i
< 5; i
++)
551 x
[i
] = __measure_frequency(cntr
, 2);
552 *freq
= (*freq
+ read_cagf(rps
)) / 2;
554 /* A simple triangle filter for better result stability */
555 sort(x
, 5, sizeof(*x
), cmp_u64
, NULL
);
556 return div_u64(x
[1] + 2 * x
[2] + x
[3], 4);
559 static u64
__measure_cs_frequency(struct intel_engine_cs
*engine
,
565 dc
= intel_uncore_read_fw(engine
->uncore
, CS_GPR(0));
566 usleep_range(1000 * duration_ms
, 2000 * duration_ms
);
567 dc
= intel_uncore_read_fw(engine
->uncore
, CS_GPR(0)) - dc
;
568 dt
= ktime_get() - dt
;
570 return div64_u64(1000 * 1000 * dc
, dt
);
573 static u64
measure_cs_frequency_at(struct intel_rps
*rps
,
574 struct intel_engine_cs
*engine
,
580 *freq
= rps_set_check(rps
, *freq
);
581 for (i
= 0; i
< 5; i
++)
582 x
[i
] = __measure_cs_frequency(engine
, 2);
583 *freq
= (*freq
+ read_cagf(rps
)) / 2;
585 /* A simple triangle filter for better result stability */
586 sort(x
, 5, sizeof(*x
), cmp_u64
, NULL
);
587 return div_u64(x
[1] + 2 * x
[2] + x
[3], 4);
590 static bool scaled_within(u64 x
, u64 y
, u32 f_n
, u32 f_d
)
592 return f_d
* x
> f_n
* y
&& f_n
* x
< f_d
* y
;
595 int live_rps_frequency_cs(void *arg
)
597 void (*saved_work
)(struct work_struct
*wrk
);
598 struct intel_gt
*gt
= arg
;
599 struct intel_rps
*rps
= >
->rps
;
600 struct intel_engine_cs
*engine
;
601 struct pm_qos_request qos
;
602 enum intel_engine_id id
;
606 * The premise is that the GPU does change freqency at our behest.
607 * Let's check there is a correspondence between the requested
608 * frequency, the actual frequency, and the observed clock rate.
611 if (!intel_rps_is_enabled(rps
))
614 if (INTEL_GEN(gt
->i915
) < 8) /* for CS simplicity */
617 if (CPU_LATENCY
>= 0)
618 cpu_latency_qos_add_request(&qos
, CPU_LATENCY
);
620 intel_gt_pm_wait_for_idle(gt
);
621 saved_work
= rps
->work
.func
;
622 rps
->work
.func
= dummy_rps_work
;
624 for_each_engine(engine
, gt
, id
) {
625 struct i915_request
*rq
;
626 struct i915_vma
*vma
;
633 st_engine_heartbeat_disable(engine
);
635 vma
= create_spin_counter(engine
,
636 engine
->kernel_context
->vm
, false,
640 st_engine_heartbeat_enable(engine
);
644 rq
= intel_engine_create_kernel_request(engine
);
650 err
= i915_request_await_object(rq
, vma
->obj
, false);
652 err
= i915_vma_move_to_active(vma
, rq
, 0);
654 err
= rq
->engine
->emit_bb_start(rq
,
657 i915_request_add(rq
);
661 if (wait_for(intel_uncore_read(engine
->uncore
, CS_GPR(0)),
663 pr_err("%s: timed loop did not start\n",
668 min
.freq
= rps
->min_freq
;
669 min
.count
= measure_cs_frequency_at(rps
, engine
, &min
.freq
);
671 max
.freq
= rps
->max_freq
;
672 max
.count
= measure_cs_frequency_at(rps
, engine
, &max
.freq
);
674 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
676 min
.count
, intel_gpu_freq(rps
, min
.freq
),
677 max
.count
, intel_gpu_freq(rps
, max
.freq
),
678 (int)DIV64_U64_ROUND_CLOSEST(100 * min
.freq
* max
.count
,
679 max
.freq
* min
.count
));
681 if (!scaled_within(max
.freq
* min
.count
,
682 min
.freq
* max
.count
,
686 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
688 max
.freq
* min
.count
,
689 min
.freq
* max
.count
);
690 show_pcu_config(rps
);
692 for (f
= min
.freq
+ 1; f
<= rps
->max_freq
; f
++) {
696 count
= measure_cs_frequency_at(rps
, engine
, &act
);
700 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
702 act
, intel_gpu_freq(rps
, act
), count
,
703 (int)DIV64_U64_ROUND_CLOSEST(100 * min
.freq
* count
,
706 f
= act
; /* may skip ahead [pcu granularity] */
709 err
= -EINTR
; /* ignore error, continue on with test */
713 *cancel
= MI_BATCH_BUFFER_END
;
714 i915_gem_object_flush_map(vma
->obj
);
715 i915_gem_object_unpin_map(vma
->obj
);
717 i915_vma_unlock(vma
);
720 st_engine_heartbeat_enable(engine
);
721 if (igt_flush_test(gt
->i915
))
727 intel_gt_pm_wait_for_idle(gt
);
728 rps
->work
.func
= saved_work
;
730 if (CPU_LATENCY
>= 0)
731 cpu_latency_qos_remove_request(&qos
);
736 int live_rps_frequency_srm(void *arg
)
738 void (*saved_work
)(struct work_struct
*wrk
);
739 struct intel_gt
*gt
= arg
;
740 struct intel_rps
*rps
= >
->rps
;
741 struct intel_engine_cs
*engine
;
742 struct pm_qos_request qos
;
743 enum intel_engine_id id
;
747 * The premise is that the GPU does change freqency at our behest.
748 * Let's check there is a correspondence between the requested
749 * frequency, the actual frequency, and the observed clock rate.
752 if (!intel_rps_is_enabled(rps
))
755 if (INTEL_GEN(gt
->i915
) < 8) /* for CS simplicity */
758 if (CPU_LATENCY
>= 0)
759 cpu_latency_qos_add_request(&qos
, CPU_LATENCY
);
761 intel_gt_pm_wait_for_idle(gt
);
762 saved_work
= rps
->work
.func
;
763 rps
->work
.func
= dummy_rps_work
;
765 for_each_engine(engine
, gt
, id
) {
766 struct i915_request
*rq
;
767 struct i915_vma
*vma
;
774 st_engine_heartbeat_disable(engine
);
776 vma
= create_spin_counter(engine
,
777 engine
->kernel_context
->vm
, true,
781 st_engine_heartbeat_enable(engine
);
785 rq
= intel_engine_create_kernel_request(engine
);
791 err
= i915_request_await_object(rq
, vma
->obj
, false);
793 err
= i915_vma_move_to_active(vma
, rq
, 0);
795 err
= rq
->engine
->emit_bb_start(rq
,
798 i915_request_add(rq
);
802 if (wait_for(READ_ONCE(*cntr
), 10)) {
803 pr_err("%s: timed loop did not start\n",
808 min
.freq
= rps
->min_freq
;
809 min
.count
= measure_frequency_at(rps
, cntr
, &min
.freq
);
811 max
.freq
= rps
->max_freq
;
812 max
.count
= measure_frequency_at(rps
, cntr
, &max
.freq
);
814 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
816 min
.count
, intel_gpu_freq(rps
, min
.freq
),
817 max
.count
, intel_gpu_freq(rps
, max
.freq
),
818 (int)DIV64_U64_ROUND_CLOSEST(100 * min
.freq
* max
.count
,
819 max
.freq
* min
.count
));
821 if (!scaled_within(max
.freq
* min
.count
,
822 min
.freq
* max
.count
,
826 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
828 max
.freq
* min
.count
,
829 min
.freq
* max
.count
);
830 show_pcu_config(rps
);
832 for (f
= min
.freq
+ 1; f
<= rps
->max_freq
; f
++) {
836 count
= measure_frequency_at(rps
, cntr
, &act
);
840 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
842 act
, intel_gpu_freq(rps
, act
), count
,
843 (int)DIV64_U64_ROUND_CLOSEST(100 * min
.freq
* count
,
846 f
= act
; /* may skip ahead [pcu granularity] */
849 err
= -EINTR
; /* ignore error, continue on with test */
853 *cancel
= MI_BATCH_BUFFER_END
;
854 i915_gem_object_flush_map(vma
->obj
);
855 i915_gem_object_unpin_map(vma
->obj
);
857 i915_vma_unlock(vma
);
860 st_engine_heartbeat_enable(engine
);
861 if (igt_flush_test(gt
->i915
))
867 intel_gt_pm_wait_for_idle(gt
);
868 rps
->work
.func
= saved_work
;
870 if (CPU_LATENCY
>= 0)
871 cpu_latency_qos_remove_request(&qos
);
876 static void sleep_for_ei(struct intel_rps
*rps
, int timeout_us
)
878 /* Flush any previous EI */
879 usleep_range(timeout_us
, 2 * timeout_us
);
881 /* Reset the interrupt status */
882 rps_disable_interrupts(rps
);
883 GEM_BUG_ON(rps
->pm_iir
);
884 rps_enable_interrupts(rps
);
886 /* And then wait for the timeout, for real this time */
887 usleep_range(2 * timeout_us
, 3 * timeout_us
);
890 static int __rps_up_interrupt(struct intel_rps
*rps
,
891 struct intel_engine_cs
*engine
,
892 struct igt_spinner
*spin
)
894 struct intel_uncore
*uncore
= engine
->uncore
;
895 struct i915_request
*rq
;
898 if (!intel_engine_can_store_dword(engine
))
901 rps_set_check(rps
, rps
->min_freq
);
903 rq
= igt_spinner_create_request(spin
, engine
->kernel_context
, MI_NOOP
);
907 i915_request_get(rq
);
908 i915_request_add(rq
);
910 if (!igt_wait_for_spinner(spin
, rq
)) {
911 pr_err("%s: RPS spinner did not start\n",
913 i915_request_put(rq
);
914 intel_gt_set_wedged(engine
->gt
);
918 if (!intel_rps_is_active(rps
)) {
919 pr_err("%s: RPS not enabled on starting spinner\n",
921 igt_spinner_end(spin
);
922 i915_request_put(rq
);
926 if (!(rps
->pm_events
& GEN6_PM_RP_UP_THRESHOLD
)) {
927 pr_err("%s: RPS did not register UP interrupt\n",
929 i915_request_put(rq
);
933 if (rps
->last_freq
!= rps
->min_freq
) {
934 pr_err("%s: RPS did not program min frequency\n",
936 i915_request_put(rq
);
940 timeout
= intel_uncore_read(uncore
, GEN6_RP_UP_EI
);
941 timeout
= intel_gt_pm_interval_to_ns(engine
->gt
, timeout
);
942 timeout
= DIV_ROUND_UP(timeout
, 1000);
944 sleep_for_ei(rps
, timeout
);
945 GEM_BUG_ON(i915_request_completed(rq
));
947 igt_spinner_end(spin
);
948 i915_request_put(rq
);
950 if (rps
->cur_freq
!= rps
->min_freq
) {
951 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
952 engine
->name
, intel_rps_read_actual_frequency(rps
));
956 if (!(rps
->pm_iir
& GEN6_PM_RP_UP_THRESHOLD
)) {
957 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
958 engine
->name
, rps
->pm_iir
,
959 intel_uncore_read(uncore
, GEN6_RP_PREV_UP
),
960 intel_uncore_read(uncore
, GEN6_RP_UP_THRESHOLD
),
961 intel_uncore_read(uncore
, GEN6_RP_UP_EI
));
968 static int __rps_down_interrupt(struct intel_rps
*rps
,
969 struct intel_engine_cs
*engine
)
971 struct intel_uncore
*uncore
= engine
->uncore
;
974 rps_set_check(rps
, rps
->max_freq
);
976 if (!(rps
->pm_events
& GEN6_PM_RP_DOWN_THRESHOLD
)) {
977 pr_err("%s: RPS did not register DOWN interrupt\n",
982 if (rps
->last_freq
!= rps
->max_freq
) {
983 pr_err("%s: RPS did not program max frequency\n",
988 timeout
= intel_uncore_read(uncore
, GEN6_RP_DOWN_EI
);
989 timeout
= intel_gt_pm_interval_to_ns(engine
->gt
, timeout
);
990 timeout
= DIV_ROUND_UP(timeout
, 1000);
992 sleep_for_ei(rps
, timeout
);
994 if (rps
->cur_freq
!= rps
->max_freq
) {
995 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
997 intel_rps_read_actual_frequency(rps
));
1001 if (!(rps
->pm_iir
& (GEN6_PM_RP_DOWN_THRESHOLD
| GEN6_PM_RP_DOWN_TIMEOUT
))) {
1002 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1003 engine
->name
, rps
->pm_iir
,
1004 intel_uncore_read(uncore
, GEN6_RP_PREV_DOWN
),
1005 intel_uncore_read(uncore
, GEN6_RP_DOWN_THRESHOLD
),
1006 intel_uncore_read(uncore
, GEN6_RP_DOWN_EI
),
1007 intel_uncore_read(uncore
, GEN6_RP_PREV_UP
),
1008 intel_uncore_read(uncore
, GEN6_RP_UP_THRESHOLD
),
1009 intel_uncore_read(uncore
, GEN6_RP_UP_EI
));
1016 int live_rps_interrupt(void *arg
)
1018 struct intel_gt
*gt
= arg
;
1019 struct intel_rps
*rps
= >
->rps
;
1020 void (*saved_work
)(struct work_struct
*wrk
);
1021 struct intel_engine_cs
*engine
;
1022 enum intel_engine_id id
;
1023 struct igt_spinner spin
;
1028 * First, let's check whether or not we are receiving interrupts.
1031 if (!intel_rps_has_interrupts(rps
) || INTEL_GEN(gt
->i915
) < 6)
1034 intel_gt_pm_get(gt
);
1035 pm_events
= rps
->pm_events
;
1036 intel_gt_pm_put(gt
);
1038 pr_err("No RPS PM events registered, but RPS is enabled?\n");
1042 if (igt_spinner_init(&spin
, gt
))
1045 intel_gt_pm_wait_for_idle(gt
);
1046 saved_work
= rps
->work
.func
;
1047 rps
->work
.func
= dummy_rps_work
;
1049 for_each_engine(engine
, gt
, id
) {
1050 /* Keep the engine busy with a spinner; expect an UP! */
1051 if (pm_events
& GEN6_PM_RP_UP_THRESHOLD
) {
1052 intel_gt_pm_wait_for_idle(engine
->gt
);
1053 GEM_BUG_ON(intel_rps_is_active(rps
));
1055 st_engine_heartbeat_disable(engine
);
1057 err
= __rps_up_interrupt(rps
, engine
, &spin
);
1059 st_engine_heartbeat_enable(engine
);
1063 intel_gt_pm_wait_for_idle(engine
->gt
);
1066 /* Keep the engine awake but idle and check for DOWN */
1067 if (pm_events
& GEN6_PM_RP_DOWN_THRESHOLD
) {
1068 st_engine_heartbeat_disable(engine
);
1069 intel_rc6_disable(>
->rc6
);
1071 err
= __rps_down_interrupt(rps
, engine
);
1073 intel_rc6_enable(>
->rc6
);
1074 st_engine_heartbeat_enable(engine
);
1081 if (igt_flush_test(gt
->i915
))
1084 igt_spinner_fini(&spin
);
1086 intel_gt_pm_wait_for_idle(gt
);
1087 rps
->work
.func
= saved_work
;
1092 static u64
__measure_power(int duration_ms
)
1097 dE
= librapl_energy_uJ();
1098 usleep_range(1000 * duration_ms
, 2000 * duration_ms
);
1099 dE
= librapl_energy_uJ() - dE
;
1100 dt
= ktime_get() - dt
;
1102 return div64_u64(1000 * 1000 * dE
, dt
);
1105 static u64
measure_power_at(struct intel_rps
*rps
, int *freq
)
1110 *freq
= rps_set_check(rps
, *freq
);
1111 for (i
= 0; i
< 5; i
++)
1112 x
[i
] = __measure_power(5);
1113 *freq
= (*freq
+ read_cagf(rps
)) / 2;
1115 /* A simple triangle filter for better result stability */
1116 sort(x
, 5, sizeof(*x
), cmp_u64
, NULL
);
1117 return div_u64(x
[1] + 2 * x
[2] + x
[3], 4);
1120 int live_rps_power(void *arg
)
1122 struct intel_gt
*gt
= arg
;
1123 struct intel_rps
*rps
= >
->rps
;
1124 void (*saved_work
)(struct work_struct
*wrk
);
1125 struct intel_engine_cs
*engine
;
1126 enum intel_engine_id id
;
1127 struct igt_spinner spin
;
1131 * Our fundamental assumption is that running at lower frequency
1132 * actually saves power. Let's see if our RAPL measurement support
1136 if (!intel_rps_is_enabled(rps
) || INTEL_GEN(gt
->i915
) < 6)
1139 if (!librapl_energy_uJ())
1142 if (igt_spinner_init(&spin
, gt
))
1145 intel_gt_pm_wait_for_idle(gt
);
1146 saved_work
= rps
->work
.func
;
1147 rps
->work
.func
= dummy_rps_work
;
1149 for_each_engine(engine
, gt
, id
) {
1150 struct i915_request
*rq
;
1156 if (!intel_engine_can_store_dword(engine
))
1159 st_engine_heartbeat_disable(engine
);
1161 rq
= igt_spinner_create_request(&spin
,
1162 engine
->kernel_context
,
1165 st_engine_heartbeat_enable(engine
);
1170 i915_request_add(rq
);
1172 if (!igt_wait_for_spinner(&spin
, rq
)) {
1173 pr_err("%s: RPS spinner did not start\n",
1175 igt_spinner_end(&spin
);
1176 st_engine_heartbeat_enable(engine
);
1177 intel_gt_set_wedged(engine
->gt
);
1182 max
.freq
= rps
->max_freq
;
1183 max
.power
= measure_power_at(rps
, &max
.freq
);
1185 min
.freq
= rps
->min_freq
;
1186 min
.power
= measure_power_at(rps
, &min
.freq
);
1188 igt_spinner_end(&spin
);
1189 st_engine_heartbeat_enable(engine
);
1191 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1193 min
.power
, intel_gpu_freq(rps
, min
.freq
),
1194 max
.power
, intel_gpu_freq(rps
, max
.freq
));
1196 if (10 * min
.freq
>= 9 * max
.freq
) {
1197 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1198 min
.freq
, intel_gpu_freq(rps
, min
.freq
),
1199 max
.freq
, intel_gpu_freq(rps
, max
.freq
));
1203 if (11 * min
.power
> 10 * max
.power
) {
1204 pr_err("%s: did not conserve power when setting lower frequency!\n",
1210 if (igt_flush_test(gt
->i915
)) {
1216 igt_spinner_fini(&spin
);
1218 intel_gt_pm_wait_for_idle(gt
);
1219 rps
->work
.func
= saved_work
;
1224 int live_rps_dynamic(void *arg
)
1226 struct intel_gt
*gt
= arg
;
1227 struct intel_rps
*rps
= >
->rps
;
1228 struct intel_engine_cs
*engine
;
1229 enum intel_engine_id id
;
1230 struct igt_spinner spin
;
1234 * We've looked at the bascs, and have established that we
1235 * can change the clock frequency and that the HW will generate
1236 * interrupts based on load. Now we check how we integrate those
1237 * moving parts into dynamic reclocking based on load.
1240 if (!intel_rps_is_enabled(rps
) || INTEL_GEN(gt
->i915
) < 6)
1243 if (igt_spinner_init(&spin
, gt
))
1246 if (intel_rps_has_interrupts(rps
))
1247 pr_info("RPS has interrupt support\n");
1248 if (intel_rps_uses_timer(rps
))
1249 pr_info("RPS has timer support\n");
1251 for_each_engine(engine
, gt
, id
) {
1252 struct i915_request
*rq
;
1258 if (!intel_engine_can_store_dword(engine
))
1261 intel_gt_pm_wait_for_idle(gt
);
1262 GEM_BUG_ON(intel_rps_is_active(rps
));
1263 rps
->cur_freq
= rps
->min_freq
;
1265 intel_engine_pm_get(engine
);
1266 intel_rc6_disable(>
->rc6
);
1267 GEM_BUG_ON(rps
->last_freq
!= rps
->min_freq
);
1269 rq
= igt_spinner_create_request(&spin
,
1270 engine
->kernel_context
,
1277 i915_request_add(rq
);
1279 max
.dt
= ktime_get();
1280 max
.freq
= wait_for_freq(rps
, rps
->max_freq
, 500);
1281 max
.dt
= ktime_sub(ktime_get(), max
.dt
);
1283 igt_spinner_end(&spin
);
1285 min
.dt
= ktime_get();
1286 min
.freq
= wait_for_freq(rps
, rps
->min_freq
, 2000);
1287 min
.dt
= ktime_sub(ktime_get(), min
.dt
);
1289 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1291 max
.freq
, intel_gpu_freq(rps
, max
.freq
),
1292 ktime_to_ns(max
.dt
),
1293 min
.freq
, intel_gpu_freq(rps
, min
.freq
),
1294 ktime_to_ns(min
.dt
));
1295 if (min
.freq
>= max
.freq
) {
1296 pr_err("%s: dynamic reclocking of spinner failed\n!",
1302 intel_rc6_enable(>
->rc6
);
1303 intel_engine_pm_put(engine
);
1305 if (igt_flush_test(gt
->i915
))
1311 igt_spinner_fini(&spin
);