2 * SPDX-License-Identifier: MIT
4 * Copyright © 2019 Intel Corporation
9 #include "intel_gt_irq.h"
10 #include "intel_gt_pm_irq.h"
11 #include "intel_rps.h"
12 #include "intel_sideband.h"
13 #include "../../../platform/x86/intel_ips.h"
16 * Lock protecting IPS related data structures
18 static DEFINE_SPINLOCK(mchdev_lock
);
20 static struct intel_gt
*rps_to_gt(struct intel_rps
*rps
)
22 return container_of(rps
, struct intel_gt
, rps
);
25 static struct drm_i915_private
*rps_to_i915(struct intel_rps
*rps
)
27 return rps_to_gt(rps
)->i915
;
30 static struct intel_uncore
*rps_to_uncore(struct intel_rps
*rps
)
32 return rps_to_gt(rps
)->uncore
;
35 static u32
rps_pm_sanitize_mask(struct intel_rps
*rps
, u32 mask
)
37 return mask
& ~rps
->pm_intrmsk_mbz
;
40 static inline void set(struct intel_uncore
*uncore
, i915_reg_t reg
, u32 val
)
42 intel_uncore_write_fw(uncore
, reg
, val
);
45 static u32
rps_pm_mask(struct intel_rps
*rps
, u8 val
)
49 /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */
50 if (val
> rps
->min_freq_softlimit
)
51 mask
|= (GEN6_PM_RP_UP_EI_EXPIRED
|
52 GEN6_PM_RP_DOWN_THRESHOLD
|
53 GEN6_PM_RP_DOWN_TIMEOUT
);
55 if (val
< rps
->max_freq_softlimit
)
56 mask
|= GEN6_PM_RP_UP_EI_EXPIRED
| GEN6_PM_RP_UP_THRESHOLD
;
58 mask
&= rps
->pm_events
;
60 return rps_pm_sanitize_mask(rps
, ~mask
);
63 static void rps_reset_ei(struct intel_rps
*rps
)
65 memset(&rps
->ei
, 0, sizeof(rps
->ei
));
68 static void rps_enable_interrupts(struct intel_rps
*rps
)
70 struct intel_gt
*gt
= rps_to_gt(rps
);
74 if (IS_VALLEYVIEW(gt
->i915
))
75 /* WaGsvRC0ResidencyMethod:vlv */
76 rps
->pm_events
= GEN6_PM_RP_UP_EI_EXPIRED
;
78 rps
->pm_events
= (GEN6_PM_RP_UP_THRESHOLD
|
79 GEN6_PM_RP_DOWN_THRESHOLD
|
80 GEN6_PM_RP_DOWN_TIMEOUT
);
82 spin_lock_irq(>
->irq_lock
);
83 gen6_gt_pm_enable_irq(gt
, rps
->pm_events
);
84 spin_unlock_irq(>
->irq_lock
);
86 set(gt
->uncore
, GEN6_PMINTRMSK
, rps_pm_mask(rps
, rps
->cur_freq
));
89 static void gen6_rps_reset_interrupts(struct intel_rps
*rps
)
91 gen6_gt_pm_reset_iir(rps_to_gt(rps
), GEN6_PM_RPS_EVENTS
);
94 static void gen11_rps_reset_interrupts(struct intel_rps
*rps
)
96 while (gen11_gt_reset_one_iir(rps_to_gt(rps
), 0, GEN11_GTPM
))
100 static void rps_reset_interrupts(struct intel_rps
*rps
)
102 struct intel_gt
*gt
= rps_to_gt(rps
);
104 spin_lock_irq(>
->irq_lock
);
105 if (INTEL_GEN(gt
->i915
) >= 11)
106 gen11_rps_reset_interrupts(rps
);
108 gen6_rps_reset_interrupts(rps
);
111 spin_unlock_irq(>
->irq_lock
);
114 static void rps_disable_interrupts(struct intel_rps
*rps
)
116 struct intel_gt
*gt
= rps_to_gt(rps
);
120 set(gt
->uncore
, GEN6_PMINTRMSK
, rps_pm_sanitize_mask(rps
, ~0u));
122 spin_lock_irq(>
->irq_lock
);
123 gen6_gt_pm_disable_irq(gt
, GEN6_PM_RPS_EVENTS
);
124 spin_unlock_irq(>
->irq_lock
);
126 intel_synchronize_irq(gt
->i915
);
129 * Now that we will not be generating any more work, flush any
130 * outstanding tasks. As we are called on the RPS idle path,
131 * we will reset the GPU to minimum frequencies, so the current
132 * state of the worker can be discarded.
134 cancel_work_sync(&rps
->work
);
136 rps_reset_interrupts(rps
);
139 static const struct cparams
{
145 { 1, 1333, 301, 28664 },
146 { 1, 1066, 294, 24460 },
147 { 1, 800, 294, 25192 },
148 { 0, 1333, 276, 27605 },
149 { 0, 1066, 276, 27605 },
150 { 0, 800, 231, 23784 },
153 static void gen5_rps_init(struct intel_rps
*rps
)
155 struct drm_i915_private
*i915
= rps_to_i915(rps
);
156 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
157 u8 fmax
, fmin
, fstart
;
161 if (i915
->fsb_freq
<= 3200)
163 else if (i915
->fsb_freq
<= 4800)
168 for (i
= 0; i
< ARRAY_SIZE(cparams
); i
++) {
169 if (cparams
[i
].i
== c_m
&& cparams
[i
].t
== i915
->mem_freq
) {
170 rps
->ips
.m
= cparams
[i
].m
;
171 rps
->ips
.c
= cparams
[i
].c
;
176 rgvmodectl
= intel_uncore_read(uncore
, MEMMODECTL
);
178 /* Set up min, max, and cur for interrupt handling */
179 fmax
= (rgvmodectl
& MEMMODE_FMAX_MASK
) >> MEMMODE_FMAX_SHIFT
;
180 fmin
= (rgvmodectl
& MEMMODE_FMIN_MASK
);
181 fstart
= (rgvmodectl
& MEMMODE_FSTART_MASK
) >>
182 MEMMODE_FSTART_SHIFT
;
183 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
186 rps
->min_freq
= fmax
;
187 rps
->max_freq
= fmin
;
189 rps
->idle_freq
= rps
->min_freq
;
190 rps
->cur_freq
= rps
->idle_freq
;
194 __ips_chipset_val(struct intel_ips
*ips
)
196 struct intel_uncore
*uncore
=
197 rps_to_uncore(container_of(ips
, struct intel_rps
, ips
));
198 unsigned long now
= jiffies_to_msecs(jiffies
), dt
;
199 unsigned long result
;
202 lockdep_assert_held(&mchdev_lock
);
205 * Prevent division-by-zero if we are asking too fast.
206 * Also, we don't get interesting results if we are polling
207 * faster than once in 10ms, so just return the saved value
210 dt
= now
- ips
->last_time1
;
212 return ips
->chipset_power
;
214 /* FIXME: handle per-counter overflow */
215 total
= intel_uncore_read(uncore
, DMIEC
);
216 total
+= intel_uncore_read(uncore
, DDREC
);
217 total
+= intel_uncore_read(uncore
, CSIEC
);
219 delta
= total
- ips
->last_count1
;
221 result
= div_u64(div_u64(ips
->m
* delta
, dt
) + ips
->c
, 10);
223 ips
->last_count1
= total
;
224 ips
->last_time1
= now
;
226 ips
->chipset_power
= result
;
231 static unsigned long ips_mch_val(struct intel_uncore
*uncore
)
233 unsigned int m
, x
, b
;
236 tsfs
= intel_uncore_read(uncore
, TSFS
);
237 x
= intel_uncore_read8(uncore
, TR1
);
239 b
= tsfs
& TSFS_INTR_MASK
;
240 m
= (tsfs
& TSFS_SLOPE_MASK
) >> TSFS_SLOPE_SHIFT
;
242 return m
* x
/ 127 - b
;
245 static int _pxvid_to_vd(u8 pxvid
)
250 if (pxvid
>= 8 && pxvid
< 31)
253 return (pxvid
+ 2) * 125;
256 static u32
pvid_to_extvid(struct drm_i915_private
*i915
, u8 pxvid
)
258 const int vd
= _pxvid_to_vd(pxvid
);
260 if (INTEL_INFO(i915
)->is_mobile
)
261 return max(vd
- 1125, 0);
266 static void __gen5_ips_update(struct intel_ips
*ips
)
268 struct intel_uncore
*uncore
=
269 rps_to_uncore(container_of(ips
, struct intel_rps
, ips
));
273 lockdep_assert_held(&mchdev_lock
);
275 now
= ktime_get_raw_ns();
276 dt
= now
- ips
->last_time2
;
277 do_div(dt
, NSEC_PER_MSEC
);
279 /* Don't divide by 0 */
283 count
= intel_uncore_read(uncore
, GFXEC
);
284 delta
= count
- ips
->last_count2
;
286 ips
->last_count2
= count
;
287 ips
->last_time2
= now
;
289 /* More magic constants... */
290 ips
->gfx_power
= div_u64(delta
* 1181, dt
* 10);
293 static void gen5_rps_update(struct intel_rps
*rps
)
295 spin_lock_irq(&mchdev_lock
);
296 __gen5_ips_update(&rps
->ips
);
297 spin_unlock_irq(&mchdev_lock
);
300 static bool gen5_rps_set(struct intel_rps
*rps
, u8 val
)
302 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
305 lockdep_assert_held(&mchdev_lock
);
307 rgvswctl
= intel_uncore_read16(uncore
, MEMSWCTL
);
308 if (rgvswctl
& MEMCTL_CMD_STS
) {
309 DRM_DEBUG("gpu busy, RCS change rejected\n");
310 return false; /* still busy with another command */
313 /* Invert the frequency bin into an ips delay */
314 val
= rps
->max_freq
- val
;
315 val
= rps
->min_freq
+ val
;
318 (MEMCTL_CMD_CHFREQ
<< MEMCTL_CMD_SHIFT
) |
319 (val
<< MEMCTL_FREQ_SHIFT
) |
321 intel_uncore_write16(uncore
, MEMSWCTL
, rgvswctl
);
322 intel_uncore_posting_read16(uncore
, MEMSWCTL
);
324 rgvswctl
|= MEMCTL_CMD_STS
;
325 intel_uncore_write16(uncore
, MEMSWCTL
, rgvswctl
);
330 static unsigned long intel_pxfreq(u32 vidfreq
)
332 int div
= (vidfreq
& 0x3f0000) >> 16;
333 int post
= (vidfreq
& 0x3000) >> 12;
334 int pre
= (vidfreq
& 0x7);
339 return div
* 133333 / (pre
<< post
);
342 static unsigned int init_emon(struct intel_uncore
*uncore
)
347 /* Disable to program */
348 intel_uncore_write(uncore
, ECR
, 0);
349 intel_uncore_posting_read(uncore
, ECR
);
351 /* Program energy weights for various events */
352 intel_uncore_write(uncore
, SDEW
, 0x15040d00);
353 intel_uncore_write(uncore
, CSIEW0
, 0x007f0000);
354 intel_uncore_write(uncore
, CSIEW1
, 0x1e220004);
355 intel_uncore_write(uncore
, CSIEW2
, 0x04000004);
357 for (i
= 0; i
< 5; i
++)
358 intel_uncore_write(uncore
, PEW(i
), 0);
359 for (i
= 0; i
< 3; i
++)
360 intel_uncore_write(uncore
, DEW(i
), 0);
362 /* Program P-state weights to account for frequency power adjustment */
363 for (i
= 0; i
< 16; i
++) {
364 u32 pxvidfreq
= intel_uncore_read(uncore
, PXVFREQ(i
));
365 unsigned int freq
= intel_pxfreq(pxvidfreq
);
367 (pxvidfreq
& PXVFREQ_PX_MASK
) >> PXVFREQ_PX_SHIFT
;
370 val
= vid
* vid
* freq
/ 1000 * 255;
371 val
/= 127 * 127 * 900;
375 /* Render standby states get 0 weight */
379 for (i
= 0; i
< 4; i
++) {
380 intel_uncore_write(uncore
, PXW(i
),
381 pxw
[i
* 4 + 0] << 24 |
382 pxw
[i
* 4 + 1] << 16 |
383 pxw
[i
* 4 + 2] << 8 |
384 pxw
[i
* 4 + 3] << 0);
387 /* Adjust magic regs to magic values (more experimental results) */
388 intel_uncore_write(uncore
, OGW0
, 0);
389 intel_uncore_write(uncore
, OGW1
, 0);
390 intel_uncore_write(uncore
, EG0
, 0x00007f00);
391 intel_uncore_write(uncore
, EG1
, 0x0000000e);
392 intel_uncore_write(uncore
, EG2
, 0x000e0000);
393 intel_uncore_write(uncore
, EG3
, 0x68000300);
394 intel_uncore_write(uncore
, EG4
, 0x42000000);
395 intel_uncore_write(uncore
, EG5
, 0x00140031);
396 intel_uncore_write(uncore
, EG6
, 0);
397 intel_uncore_write(uncore
, EG7
, 0);
399 for (i
= 0; i
< 8; i
++)
400 intel_uncore_write(uncore
, PXWL(i
), 0);
402 /* Enable PMON + select events */
403 intel_uncore_write(uncore
, ECR
, 0x80000019);
405 return intel_uncore_read(uncore
, LCFUSE02
) & LCFUSE_HIV_MASK
;
408 static bool gen5_rps_enable(struct intel_rps
*rps
)
410 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
414 spin_lock_irq(&mchdev_lock
);
416 rgvmodectl
= intel_uncore_read(uncore
, MEMMODECTL
);
418 /* Enable temp reporting */
419 intel_uncore_write16(uncore
, PMMISC
,
420 intel_uncore_read16(uncore
, PMMISC
) | MCPPCE_EN
);
421 intel_uncore_write16(uncore
, TSC1
,
422 intel_uncore_read16(uncore
, TSC1
) | TSE
);
424 /* 100ms RC evaluation intervals */
425 intel_uncore_write(uncore
, RCUPEI
, 100000);
426 intel_uncore_write(uncore
, RCDNEI
, 100000);
428 /* Set max/min thresholds to 90ms and 80ms respectively */
429 intel_uncore_write(uncore
, RCBMAXAVG
, 90000);
430 intel_uncore_write(uncore
, RCBMINAVG
, 80000);
432 intel_uncore_write(uncore
, MEMIHYST
, 1);
434 /* Set up min, max, and cur for interrupt handling */
435 fstart
= (rgvmodectl
& MEMMODE_FSTART_MASK
) >>
436 MEMMODE_FSTART_SHIFT
;
438 vstart
= (intel_uncore_read(uncore
, PXVFREQ(fstart
)) &
439 PXVFREQ_PX_MASK
) >> PXVFREQ_PX_SHIFT
;
441 intel_uncore_write(uncore
,
443 MEMINT_CX_SUPR_EN
| MEMINT_EVAL_CHG_EN
);
445 intel_uncore_write(uncore
, VIDSTART
, vstart
);
446 intel_uncore_posting_read(uncore
, VIDSTART
);
448 rgvmodectl
|= MEMMODE_SWMODE_EN
;
449 intel_uncore_write(uncore
, MEMMODECTL
, rgvmodectl
);
451 if (wait_for_atomic((intel_uncore_read(uncore
, MEMSWCTL
) &
452 MEMCTL_CMD_STS
) == 0, 10))
453 DRM_ERROR("stuck trying to change perf mode\n");
456 gen5_rps_set(rps
, rps
->cur_freq
);
458 rps
->ips
.last_count1
= intel_uncore_read(uncore
, DMIEC
);
459 rps
->ips
.last_count1
+= intel_uncore_read(uncore
, DDREC
);
460 rps
->ips
.last_count1
+= intel_uncore_read(uncore
, CSIEC
);
461 rps
->ips
.last_time1
= jiffies_to_msecs(jiffies
);
463 rps
->ips
.last_count2
= intel_uncore_read(uncore
, GFXEC
);
464 rps
->ips
.last_time2
= ktime_get_raw_ns();
466 spin_unlock_irq(&mchdev_lock
);
468 rps
->ips
.corr
= init_emon(uncore
);
473 static void gen5_rps_disable(struct intel_rps
*rps
)
475 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
478 spin_lock_irq(&mchdev_lock
);
480 rgvswctl
= intel_uncore_read16(uncore
, MEMSWCTL
);
482 /* Ack interrupts, disable EFC interrupt */
483 intel_uncore_write(uncore
, MEMINTREN
,
484 intel_uncore_read(uncore
, MEMINTREN
) &
485 ~MEMINT_EVAL_CHG_EN
);
486 intel_uncore_write(uncore
, MEMINTRSTS
, MEMINT_EVAL_CHG
);
487 intel_uncore_write(uncore
, DEIER
,
488 intel_uncore_read(uncore
, DEIER
) & ~DE_PCU_EVENT
);
489 intel_uncore_write(uncore
, DEIIR
, DE_PCU_EVENT
);
490 intel_uncore_write(uncore
, DEIMR
,
491 intel_uncore_read(uncore
, DEIMR
) | DE_PCU_EVENT
);
493 /* Go back to the starting frequency */
494 gen5_rps_set(rps
, rps
->idle_freq
);
496 rgvswctl
|= MEMCTL_CMD_STS
;
497 intel_uncore_write(uncore
, MEMSWCTL
, rgvswctl
);
500 spin_unlock_irq(&mchdev_lock
);
503 static u32
rps_limits(struct intel_rps
*rps
, u8 val
)
508 * Only set the down limit when we've reached the lowest level to avoid
509 * getting more interrupts, otherwise leave this clear. This prevents a
510 * race in the hw when coming out of rc6: There's a tiny window where
511 * the hw runs at the minimal clock before selecting the desired
512 * frequency, if the down threshold expires in that window we will not
513 * receive a down interrupt.
515 if (INTEL_GEN(rps_to_i915(rps
)) >= 9) {
516 limits
= rps
->max_freq_softlimit
<< 23;
517 if (val
<= rps
->min_freq_softlimit
)
518 limits
|= rps
->min_freq_softlimit
<< 14;
520 limits
= rps
->max_freq_softlimit
<< 24;
521 if (val
<= rps
->min_freq_softlimit
)
522 limits
|= rps
->min_freq_softlimit
<< 16;
528 static void rps_set_power(struct intel_rps
*rps
, int new_power
)
530 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
531 struct drm_i915_private
*i915
= rps_to_i915(rps
);
532 u32 threshold_up
= 0, threshold_down
= 0; /* in % */
533 u32 ei_up
= 0, ei_down
= 0;
535 lockdep_assert_held(&rps
->power
.mutex
);
537 if (new_power
== rps
->power
.mode
)
540 /* Note the units here are not exactly 1us, but 1280ns. */
543 /* Upclock if more than 95% busy over 16ms */
547 /* Downclock if less than 85% busy over 32ms */
553 /* Upclock if more than 90% busy over 13ms */
557 /* Downclock if less than 75% busy over 32ms */
563 /* Upclock if more than 85% busy over 10ms */
567 /* Downclock if less than 60% busy over 32ms */
573 /* When byt can survive without system hang with dynamic
574 * sw freq adjustments, this restriction can be lifted.
576 if (IS_VALLEYVIEW(i915
))
579 set(uncore
, GEN6_RP_UP_EI
, GT_INTERVAL_FROM_US(i915
, ei_up
));
580 set(uncore
, GEN6_RP_UP_THRESHOLD
,
581 GT_INTERVAL_FROM_US(i915
, ei_up
* threshold_up
/ 100));
583 set(uncore
, GEN6_RP_DOWN_EI
, GT_INTERVAL_FROM_US(i915
, ei_down
));
584 set(uncore
, GEN6_RP_DOWN_THRESHOLD
,
585 GT_INTERVAL_FROM_US(i915
, ei_down
* threshold_down
/ 100));
587 set(uncore
, GEN6_RP_CONTROL
,
588 (INTEL_GEN(i915
) > 9 ? 0 : GEN6_RP_MEDIA_TURBO
) |
589 GEN6_RP_MEDIA_HW_NORMAL_MODE
|
590 GEN6_RP_MEDIA_IS_GFX
|
592 GEN6_RP_UP_BUSY_AVG
|
593 GEN6_RP_DOWN_IDLE_AVG
);
596 rps
->power
.mode
= new_power
;
597 rps
->power
.up_threshold
= threshold_up
;
598 rps
->power
.down_threshold
= threshold_down
;
601 static void gen6_rps_set_thresholds(struct intel_rps
*rps
, u8 val
)
605 new_power
= rps
->power
.mode
;
606 switch (rps
->power
.mode
) {
608 if (val
> rps
->efficient_freq
+ 1 &&
614 if (val
<= rps
->efficient_freq
&&
616 new_power
= LOW_POWER
;
617 else if (val
>= rps
->rp0_freq
&&
619 new_power
= HIGH_POWER
;
623 if (val
< (rps
->rp1_freq
+ rps
->rp0_freq
) >> 1 &&
628 /* Max/min bins are special */
629 if (val
<= rps
->min_freq_softlimit
)
630 new_power
= LOW_POWER
;
631 if (val
>= rps
->max_freq_softlimit
)
632 new_power
= HIGH_POWER
;
634 mutex_lock(&rps
->power
.mutex
);
635 if (rps
->power
.interactive
)
636 new_power
= HIGH_POWER
;
637 rps_set_power(rps
, new_power
);
638 mutex_unlock(&rps
->power
.mutex
);
641 void intel_rps_mark_interactive(struct intel_rps
*rps
, bool interactive
)
643 mutex_lock(&rps
->power
.mutex
);
645 if (!rps
->power
.interactive
++ && rps
->active
)
646 rps_set_power(rps
, HIGH_POWER
);
648 GEM_BUG_ON(!rps
->power
.interactive
);
649 rps
->power
.interactive
--;
651 mutex_unlock(&rps
->power
.mutex
);
654 static int gen6_rps_set(struct intel_rps
*rps
, u8 val
)
656 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
657 struct drm_i915_private
*i915
= rps_to_i915(rps
);
660 if (INTEL_GEN(i915
) >= 9)
661 swreq
= GEN9_FREQUENCY(val
);
662 else if (IS_HASWELL(i915
) || IS_BROADWELL(i915
))
663 swreq
= HSW_FREQUENCY(val
);
665 swreq
= (GEN6_FREQUENCY(val
) |
667 GEN6_AGGRESSIVE_TURBO
);
668 set(uncore
, GEN6_RPNSWREQ
, swreq
);
673 static int vlv_rps_set(struct intel_rps
*rps
, u8 val
)
675 struct drm_i915_private
*i915
= rps_to_i915(rps
);
679 err
= vlv_punit_write(i915
, PUNIT_REG_GPU_FREQ_REQ
, val
);
685 static int rps_set(struct intel_rps
*rps
, u8 val
, bool update
)
687 struct drm_i915_private
*i915
= rps_to_i915(rps
);
690 if (INTEL_GEN(i915
) < 6)
693 if (val
== rps
->last_freq
)
696 if (IS_VALLEYVIEW(i915
) || IS_CHERRYVIEW(i915
))
697 err
= vlv_rps_set(rps
, val
);
699 err
= gen6_rps_set(rps
, val
);
704 gen6_rps_set_thresholds(rps
, val
);
705 rps
->last_freq
= val
;
710 void intel_rps_unpark(struct intel_rps
*rps
)
718 * Use the user's desired frequency as a guide, but for better
719 * performance, jump directly to RPe as our starting frequency.
721 mutex_lock(&rps
->lock
);
723 freq
= max(rps
->cur_freq
, rps
->efficient_freq
),
724 freq
= clamp(freq
, rps
->min_freq_softlimit
, rps
->max_freq_softlimit
);
725 intel_rps_set(rps
, freq
);
727 mutex_unlock(&rps
->lock
);
729 if (INTEL_GEN(rps_to_i915(rps
)) >= 6)
730 rps_enable_interrupts(rps
);
732 if (IS_GEN(rps_to_i915(rps
), 5))
733 gen5_rps_update(rps
);
736 void intel_rps_park(struct intel_rps
*rps
)
738 struct drm_i915_private
*i915
= rps_to_i915(rps
);
743 if (INTEL_GEN(i915
) >= 6)
744 rps_disable_interrupts(rps
);
747 if (rps
->last_freq
<= rps
->idle_freq
)
751 * The punit delays the write of the frequency and voltage until it
752 * determines the GPU is awake. During normal usage we don't want to
753 * waste power changing the frequency if the GPU is sleeping (rc6).
754 * However, the GPU and driver is now idle and we do not want to delay
755 * switching to minimum voltage (reducing power whilst idle) as we do
756 * not expect to be woken in the near future and so must flush the
757 * change by waking the device.
759 * We choose to take the media powerwell (either would do to trick the
760 * punit into committing the voltage change) as that takes a lot less
761 * power than the render powerwell.
763 intel_uncore_forcewake_get(rps_to_uncore(rps
), FORCEWAKE_MEDIA
);
764 rps_set(rps
, rps
->idle_freq
, false);
765 intel_uncore_forcewake_put(rps_to_uncore(rps
), FORCEWAKE_MEDIA
);
768 void intel_rps_boost(struct i915_request
*rq
)
770 struct intel_rps
*rps
= &rq
->engine
->gt
->rps
;
773 if (i915_request_signaled(rq
) || !rps
->active
)
776 /* Serializes with i915_request_retire() */
777 spin_lock_irqsave(&rq
->lock
, flags
);
778 if (!i915_request_has_waitboost(rq
) &&
779 !dma_fence_is_signaled_locked(&rq
->fence
)) {
780 set_bit(I915_FENCE_FLAG_BOOST
, &rq
->fence
.flags
);
782 if (!atomic_fetch_inc(&rps
->num_waiters
) &&
783 READ_ONCE(rps
->cur_freq
) < rps
->boost_freq
)
784 schedule_work(&rps
->work
);
786 atomic_inc(&rps
->boosts
);
788 spin_unlock_irqrestore(&rq
->lock
, flags
);
791 int intel_rps_set(struct intel_rps
*rps
, u8 val
)
795 lockdep_assert_held(&rps
->lock
);
796 GEM_BUG_ON(val
> rps
->max_freq
);
797 GEM_BUG_ON(val
< rps
->min_freq
);
800 err
= rps_set(rps
, val
, true);
805 * Make sure we continue to get interrupts
806 * until we hit the minimum or maximum frequencies.
808 if (INTEL_GEN(rps_to_i915(rps
)) >= 6) {
809 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
812 GEN6_RP_INTERRUPT_LIMITS
, rps_limits(rps
, val
));
814 set(uncore
, GEN6_PMINTRMSK
, rps_pm_mask(rps
, val
));
822 static void gen6_rps_init(struct intel_rps
*rps
)
824 struct drm_i915_private
*i915
= rps_to_i915(rps
);
825 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
827 /* All of these values are in units of 50MHz */
829 /* static values from HW: RP0 > RP1 > RPn (min_freq) */
830 if (IS_GEN9_LP(i915
)) {
831 u32 rp_state_cap
= intel_uncore_read(uncore
, BXT_RP_STATE_CAP
);
833 rps
->rp0_freq
= (rp_state_cap
>> 16) & 0xff;
834 rps
->rp1_freq
= (rp_state_cap
>> 8) & 0xff;
835 rps
->min_freq
= (rp_state_cap
>> 0) & 0xff;
837 u32 rp_state_cap
= intel_uncore_read(uncore
, GEN6_RP_STATE_CAP
);
839 rps
->rp0_freq
= (rp_state_cap
>> 0) & 0xff;
840 rps
->rp1_freq
= (rp_state_cap
>> 8) & 0xff;
841 rps
->min_freq
= (rp_state_cap
>> 16) & 0xff;
844 /* hw_max = RP0 until we check for overclocking */
845 rps
->max_freq
= rps
->rp0_freq
;
847 rps
->efficient_freq
= rps
->rp1_freq
;
848 if (IS_HASWELL(i915
) || IS_BROADWELL(i915
) ||
849 IS_GEN9_BC(i915
) || INTEL_GEN(i915
) >= 10) {
852 if (sandybridge_pcode_read(i915
,
853 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL
,
854 &ddcc_status
, NULL
) == 0)
855 rps
->efficient_freq
=
857 (ddcc_status
>> 8) & 0xff,
862 if (IS_GEN9_BC(i915
) || INTEL_GEN(i915
) >= 10) {
863 /* Store the frequency values in 16.66 MHZ units, which is
864 * the natural hardware unit for SKL
866 rps
->rp0_freq
*= GEN9_FREQ_SCALER
;
867 rps
->rp1_freq
*= GEN9_FREQ_SCALER
;
868 rps
->min_freq
*= GEN9_FREQ_SCALER
;
869 rps
->max_freq
*= GEN9_FREQ_SCALER
;
870 rps
->efficient_freq
*= GEN9_FREQ_SCALER
;
874 static bool rps_reset(struct intel_rps
*rps
)
877 rps
->power
.mode
= -1;
880 if (rps_set(rps
, rps
->min_freq
, true)) {
881 DRM_ERROR("Failed to reset RPS to initial values\n");
885 rps
->cur_freq
= rps
->min_freq
;
889 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
890 static bool gen9_rps_enable(struct intel_rps
*rps
)
892 struct drm_i915_private
*i915
= rps_to_i915(rps
);
893 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
895 /* Program defaults and thresholds for RPS */
897 intel_uncore_write_fw(uncore
, GEN6_RC_VIDEO_FREQ
,
898 GEN9_FREQUENCY(rps
->rp1_freq
));
900 /* 1 second timeout */
901 intel_uncore_write_fw(uncore
, GEN6_RP_DOWN_TIMEOUT
,
902 GT_INTERVAL_FROM_US(i915
, 1000000));
904 intel_uncore_write_fw(uncore
, GEN6_RP_IDLE_HYSTERSIS
, 0xa);
906 return rps_reset(rps
);
909 static bool gen8_rps_enable(struct intel_rps
*rps
)
911 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
913 intel_uncore_write_fw(uncore
, GEN6_RC_VIDEO_FREQ
,
914 HSW_FREQUENCY(rps
->rp1_freq
));
916 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
917 intel_uncore_write_fw(uncore
, GEN6_RP_DOWN_TIMEOUT
,
918 100000000 / 128); /* 1 second timeout */
920 intel_uncore_write_fw(uncore
, GEN6_RP_IDLE_HYSTERSIS
, 10);
922 return rps_reset(rps
);
925 static bool gen6_rps_enable(struct intel_rps
*rps
)
927 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
929 /* Power down if completely idle for over 50ms */
930 intel_uncore_write_fw(uncore
, GEN6_RP_DOWN_TIMEOUT
, 50000);
931 intel_uncore_write_fw(uncore
, GEN6_RP_IDLE_HYSTERSIS
, 10);
933 return rps_reset(rps
);
936 static int chv_rps_max_freq(struct intel_rps
*rps
)
938 struct drm_i915_private
*i915
= rps_to_i915(rps
);
941 val
= vlv_punit_read(i915
, FB_GFX_FMAX_AT_VMAX_FUSE
);
943 switch (RUNTIME_INFO(i915
)->sseu
.eu_total
) {
946 val
>>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT
;
950 val
>>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT
;
955 /* Setting (2 * 8) Min RP0 for any other combination */
956 val
>>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT
;
960 return val
& FB_GFX_FREQ_FUSE_MASK
;
963 static int chv_rps_rpe_freq(struct intel_rps
*rps
)
965 struct drm_i915_private
*i915
= rps_to_i915(rps
);
968 val
= vlv_punit_read(i915
, PUNIT_GPU_DUTYCYCLE_REG
);
969 val
>>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT
;
971 return val
& PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK
;
974 static int chv_rps_guar_freq(struct intel_rps
*rps
)
976 struct drm_i915_private
*i915
= rps_to_i915(rps
);
979 val
= vlv_punit_read(i915
, FB_GFX_FMAX_AT_VMAX_FUSE
);
981 return val
& FB_GFX_FREQ_FUSE_MASK
;
984 static u32
chv_rps_min_freq(struct intel_rps
*rps
)
986 struct drm_i915_private
*i915
= rps_to_i915(rps
);
989 val
= vlv_punit_read(i915
, FB_GFX_FMIN_AT_VMIN_FUSE
);
990 val
>>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT
;
992 return val
& FB_GFX_FREQ_FUSE_MASK
;
995 static bool chv_rps_enable(struct intel_rps
*rps
)
997 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
998 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1001 /* 1: Program defaults and thresholds for RPS*/
1002 intel_uncore_write_fw(uncore
, GEN6_RP_DOWN_TIMEOUT
, 1000000);
1003 intel_uncore_write_fw(uncore
, GEN6_RP_UP_THRESHOLD
, 59400);
1004 intel_uncore_write_fw(uncore
, GEN6_RP_DOWN_THRESHOLD
, 245000);
1005 intel_uncore_write_fw(uncore
, GEN6_RP_UP_EI
, 66000);
1006 intel_uncore_write_fw(uncore
, GEN6_RP_DOWN_EI
, 350000);
1008 intel_uncore_write_fw(uncore
, GEN6_RP_IDLE_HYSTERSIS
, 10);
1011 intel_uncore_write_fw(uncore
, GEN6_RP_CONTROL
,
1012 GEN6_RP_MEDIA_HW_NORMAL_MODE
|
1013 GEN6_RP_MEDIA_IS_GFX
|
1015 GEN6_RP_UP_BUSY_AVG
|
1016 GEN6_RP_DOWN_IDLE_AVG
);
1018 /* Setting Fixed Bias */
1019 vlv_punit_get(i915
);
1021 val
= VLV_OVERRIDE_EN
| VLV_SOC_TDP_EN
| CHV_BIAS_CPU_50_SOC_50
;
1022 vlv_punit_write(i915
, VLV_TURBO_SOC_OVERRIDE
, val
);
1024 val
= vlv_punit_read(i915
, PUNIT_REG_GPU_FREQ_STS
);
1026 vlv_punit_put(i915
);
1028 /* RPS code assumes GPLL is used */
1029 WARN_ONCE((val
& GPLLENABLE
) == 0, "GPLL not enabled\n");
1031 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val
& GPLLENABLE
));
1032 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val
);
1034 return rps_reset(rps
);
1037 static int vlv_rps_guar_freq(struct intel_rps
*rps
)
1039 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1042 val
= vlv_nc_read(i915
, IOSF_NC_FB_GFX_FREQ_FUSE
);
1044 rp1
= val
& FB_GFX_FGUARANTEED_FREQ_FUSE_MASK
;
1045 rp1
>>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT
;
1050 static int vlv_rps_max_freq(struct intel_rps
*rps
)
1052 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1055 val
= vlv_nc_read(i915
, IOSF_NC_FB_GFX_FREQ_FUSE
);
1057 rp0
= (val
& FB_GFX_MAX_FREQ_FUSE_MASK
) >> FB_GFX_MAX_FREQ_FUSE_SHIFT
;
1059 rp0
= min_t(u32
, rp0
, 0xea);
1064 static int vlv_rps_rpe_freq(struct intel_rps
*rps
)
1066 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1069 val
= vlv_nc_read(i915
, IOSF_NC_FB_GFX_FMAX_FUSE_LO
);
1070 rpe
= (val
& FB_FMAX_VMIN_FREQ_LO_MASK
) >> FB_FMAX_VMIN_FREQ_LO_SHIFT
;
1071 val
= vlv_nc_read(i915
, IOSF_NC_FB_GFX_FMAX_FUSE_HI
);
1072 rpe
|= (val
& FB_FMAX_VMIN_FREQ_HI_MASK
) << 5;
1077 static int vlv_rps_min_freq(struct intel_rps
*rps
)
1079 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1082 val
= vlv_punit_read(i915
, PUNIT_REG_GPU_LFM
) & 0xff;
1084 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
1085 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
1086 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
1087 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
1088 * to make sure it matches what Punit accepts.
1090 return max_t(u32
, val
, 0xc0);
1093 static bool vlv_rps_enable(struct intel_rps
*rps
)
1095 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
1096 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1099 intel_uncore_write_fw(uncore
, GEN6_RP_DOWN_TIMEOUT
, 1000000);
1100 intel_uncore_write_fw(uncore
, GEN6_RP_UP_THRESHOLD
, 59400);
1101 intel_uncore_write_fw(uncore
, GEN6_RP_DOWN_THRESHOLD
, 245000);
1102 intel_uncore_write_fw(uncore
, GEN6_RP_UP_EI
, 66000);
1103 intel_uncore_write_fw(uncore
, GEN6_RP_DOWN_EI
, 350000);
1105 intel_uncore_write_fw(uncore
, GEN6_RP_IDLE_HYSTERSIS
, 10);
1107 intel_uncore_write_fw(uncore
, GEN6_RP_CONTROL
,
1108 GEN6_RP_MEDIA_TURBO
|
1109 GEN6_RP_MEDIA_HW_NORMAL_MODE
|
1110 GEN6_RP_MEDIA_IS_GFX
|
1112 GEN6_RP_UP_BUSY_AVG
|
1113 GEN6_RP_DOWN_IDLE_CONT
);
1115 vlv_punit_get(i915
);
1117 /* Setting Fixed Bias */
1118 val
= VLV_OVERRIDE_EN
| VLV_SOC_TDP_EN
| VLV_BIAS_CPU_125_SOC_875
;
1119 vlv_punit_write(i915
, VLV_TURBO_SOC_OVERRIDE
, val
);
1121 val
= vlv_punit_read(i915
, PUNIT_REG_GPU_FREQ_STS
);
1123 vlv_punit_put(i915
);
1125 /* RPS code assumes GPLL is used */
1126 WARN_ONCE((val
& GPLLENABLE
) == 0, "GPLL not enabled\n");
1128 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val
& GPLLENABLE
));
1129 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val
);
1131 return rps_reset(rps
);
1134 static unsigned long __ips_gfx_val(struct intel_ips
*ips
)
1136 struct intel_rps
*rps
= container_of(ips
, typeof(*rps
), ips
);
1137 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
1138 unsigned long t
, corr
, state1
, corr2
, state2
;
1141 lockdep_assert_held(&mchdev_lock
);
1143 pxvid
= intel_uncore_read(uncore
, PXVFREQ(rps
->cur_freq
));
1144 pxvid
= (pxvid
>> 24) & 0x7f;
1145 ext_v
= pvid_to_extvid(rps_to_i915(rps
), pxvid
);
1149 /* Revel in the empirically derived constants */
1151 /* Correction factor in 1/100000 units */
1152 t
= ips_mch_val(uncore
);
1154 corr
= t
* 2349 + 135940;
1156 corr
= t
* 964 + 29317;
1158 corr
= t
* 301 + 1004;
1160 corr
= corr
* 150142 * state1
/ 10000 - 78642;
1162 corr2
= corr
* ips
->corr
;
1164 state2
= corr2
* state1
/ 10000;
1165 state2
/= 100; /* convert to mW */
1167 __gen5_ips_update(ips
);
1169 return ips
->gfx_power
+ state2
;
1172 void intel_rps_enable(struct intel_rps
*rps
)
1174 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1175 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
1177 intel_uncore_forcewake_get(uncore
, FORCEWAKE_ALL
);
1178 if (IS_CHERRYVIEW(i915
))
1179 rps
->enabled
= chv_rps_enable(rps
);
1180 else if (IS_VALLEYVIEW(i915
))
1181 rps
->enabled
= vlv_rps_enable(rps
);
1182 else if (INTEL_GEN(i915
) >= 9)
1183 rps
->enabled
= gen9_rps_enable(rps
);
1184 else if (INTEL_GEN(i915
) >= 8)
1185 rps
->enabled
= gen8_rps_enable(rps
);
1186 else if (INTEL_GEN(i915
) >= 6)
1187 rps
->enabled
= gen6_rps_enable(rps
);
1188 else if (IS_IRONLAKE_M(i915
))
1189 rps
->enabled
= gen5_rps_enable(rps
);
1190 intel_uncore_forcewake_put(uncore
, FORCEWAKE_ALL
);
1194 WARN_ON(rps
->max_freq
< rps
->min_freq
);
1195 WARN_ON(rps
->idle_freq
> rps
->max_freq
);
1197 WARN_ON(rps
->efficient_freq
< rps
->min_freq
);
1198 WARN_ON(rps
->efficient_freq
> rps
->max_freq
);
1201 static void gen6_rps_disable(struct intel_rps
*rps
)
1203 set(rps_to_uncore(rps
), GEN6_RP_CONTROL
, 0);
1206 void intel_rps_disable(struct intel_rps
*rps
)
1208 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1210 rps
->enabled
= false;
1212 if (INTEL_GEN(i915
) >= 6)
1213 gen6_rps_disable(rps
);
1214 else if (IS_IRONLAKE_M(i915
))
1215 gen5_rps_disable(rps
);
1218 static int byt_gpu_freq(struct intel_rps
*rps
, int val
)
1222 * Slow = Fast = GPLL ref * N
1224 return DIV_ROUND_CLOSEST(rps
->gpll_ref_freq
* (val
- 0xb7), 1000);
1227 static int byt_freq_opcode(struct intel_rps
*rps
, int val
)
1229 return DIV_ROUND_CLOSEST(1000 * val
, rps
->gpll_ref_freq
) + 0xb7;
1232 static int chv_gpu_freq(struct intel_rps
*rps
, int val
)
1236 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
1238 return DIV_ROUND_CLOSEST(rps
->gpll_ref_freq
* val
, 2 * 2 * 1000);
1241 static int chv_freq_opcode(struct intel_rps
*rps
, int val
)
1243 /* CHV needs even values */
1244 return DIV_ROUND_CLOSEST(2 * 1000 * val
, rps
->gpll_ref_freq
) * 2;
1247 int intel_gpu_freq(struct intel_rps
*rps
, int val
)
1249 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1251 if (INTEL_GEN(i915
) >= 9)
1252 return DIV_ROUND_CLOSEST(val
* GT_FREQUENCY_MULTIPLIER
,
1254 else if (IS_CHERRYVIEW(i915
))
1255 return chv_gpu_freq(rps
, val
);
1256 else if (IS_VALLEYVIEW(i915
))
1257 return byt_gpu_freq(rps
, val
);
1259 return val
* GT_FREQUENCY_MULTIPLIER
;
1262 int intel_freq_opcode(struct intel_rps
*rps
, int val
)
1264 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1266 if (INTEL_GEN(i915
) >= 9)
1267 return DIV_ROUND_CLOSEST(val
* GEN9_FREQ_SCALER
,
1268 GT_FREQUENCY_MULTIPLIER
);
1269 else if (IS_CHERRYVIEW(i915
))
1270 return chv_freq_opcode(rps
, val
);
1271 else if (IS_VALLEYVIEW(i915
))
1272 return byt_freq_opcode(rps
, val
);
1274 return DIV_ROUND_CLOSEST(val
, GT_FREQUENCY_MULTIPLIER
);
1277 static void vlv_init_gpll_ref_freq(struct intel_rps
*rps
)
1279 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1281 rps
->gpll_ref_freq
=
1282 vlv_get_cck_clock(i915
, "GPLL ref",
1283 CCK_GPLL_CLOCK_CONTROL
,
1286 DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps
->gpll_ref_freq
);
1289 static void vlv_rps_init(struct intel_rps
*rps
)
1291 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1294 vlv_iosf_sb_get(i915
,
1295 BIT(VLV_IOSF_SB_PUNIT
) |
1296 BIT(VLV_IOSF_SB_NC
) |
1297 BIT(VLV_IOSF_SB_CCK
));
1299 vlv_init_gpll_ref_freq(rps
);
1301 val
= vlv_punit_read(i915
, PUNIT_REG_GPU_FREQ_STS
);
1302 switch ((val
>> 6) & 3) {
1305 i915
->mem_freq
= 800;
1308 i915
->mem_freq
= 1066;
1311 i915
->mem_freq
= 1333;
1314 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915
->mem_freq
);
1316 rps
->max_freq
= vlv_rps_max_freq(rps
);
1317 rps
->rp0_freq
= rps
->max_freq
;
1318 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
1319 intel_gpu_freq(rps
, rps
->max_freq
),
1322 rps
->efficient_freq
= vlv_rps_rpe_freq(rps
);
1323 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
1324 intel_gpu_freq(rps
, rps
->efficient_freq
),
1325 rps
->efficient_freq
);
1327 rps
->rp1_freq
= vlv_rps_guar_freq(rps
);
1328 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
1329 intel_gpu_freq(rps
, rps
->rp1_freq
),
1332 rps
->min_freq
= vlv_rps_min_freq(rps
);
1333 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
1334 intel_gpu_freq(rps
, rps
->min_freq
),
1337 vlv_iosf_sb_put(i915
,
1338 BIT(VLV_IOSF_SB_PUNIT
) |
1339 BIT(VLV_IOSF_SB_NC
) |
1340 BIT(VLV_IOSF_SB_CCK
));
1343 static void chv_rps_init(struct intel_rps
*rps
)
1345 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1348 vlv_iosf_sb_get(i915
,
1349 BIT(VLV_IOSF_SB_PUNIT
) |
1350 BIT(VLV_IOSF_SB_NC
) |
1351 BIT(VLV_IOSF_SB_CCK
));
1353 vlv_init_gpll_ref_freq(rps
);
1355 val
= vlv_cck_read(i915
, CCK_FUSE_REG
);
1357 switch ((val
>> 2) & 0x7) {
1359 i915
->mem_freq
= 2000;
1362 i915
->mem_freq
= 1600;
1365 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915
->mem_freq
);
1367 rps
->max_freq
= chv_rps_max_freq(rps
);
1368 rps
->rp0_freq
= rps
->max_freq
;
1369 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
1370 intel_gpu_freq(rps
, rps
->max_freq
),
1373 rps
->efficient_freq
= chv_rps_rpe_freq(rps
);
1374 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
1375 intel_gpu_freq(rps
, rps
->efficient_freq
),
1376 rps
->efficient_freq
);
1378 rps
->rp1_freq
= chv_rps_guar_freq(rps
);
1379 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
1380 intel_gpu_freq(rps
, rps
->rp1_freq
),
1383 rps
->min_freq
= chv_rps_min_freq(rps
);
1384 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
1385 intel_gpu_freq(rps
, rps
->min_freq
),
1388 vlv_iosf_sb_put(i915
,
1389 BIT(VLV_IOSF_SB_PUNIT
) |
1390 BIT(VLV_IOSF_SB_NC
) |
1391 BIT(VLV_IOSF_SB_CCK
));
1393 WARN_ONCE((rps
->max_freq
| rps
->efficient_freq
| rps
->rp1_freq
|
1395 "Odd GPU freq values\n");
1398 static void vlv_c0_read(struct intel_uncore
*uncore
, struct intel_rps_ei
*ei
)
1400 ei
->ktime
= ktime_get_raw();
1401 ei
->render_c0
= intel_uncore_read(uncore
, VLV_RENDER_C0_COUNT
);
1402 ei
->media_c0
= intel_uncore_read(uncore
, VLV_MEDIA_C0_COUNT
);
1405 static u32
vlv_wa_c0_ei(struct intel_rps
*rps
, u32 pm_iir
)
1407 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
1408 const struct intel_rps_ei
*prev
= &rps
->ei
;
1409 struct intel_rps_ei now
;
1412 if ((pm_iir
& GEN6_PM_RP_UP_EI_EXPIRED
) == 0)
1415 vlv_c0_read(uncore
, &now
);
1421 time
= ktime_us_delta(now
.ktime
, prev
->ktime
);
1423 time
*= rps_to_i915(rps
)->czclk_freq
;
1425 /* Workload can be split between render + media,
1426 * e.g. SwapBuffers being blitted in X after being rendered in
1427 * mesa. To account for this we need to combine both engines
1428 * into our activity counter.
1430 render
= now
.render_c0
- prev
->render_c0
;
1431 media
= now
.media_c0
- prev
->media_c0
;
1432 c0
= max(render
, media
);
1433 c0
*= 1000 * 100 << 8; /* to usecs and scale to threshold% */
1435 if (c0
> time
* rps
->power
.up_threshold
)
1436 events
= GEN6_PM_RP_UP_THRESHOLD
;
1437 else if (c0
< time
* rps
->power
.down_threshold
)
1438 events
= GEN6_PM_RP_DOWN_THRESHOLD
;
1445 static void rps_work(struct work_struct
*work
)
1447 struct intel_rps
*rps
= container_of(work
, typeof(*rps
), work
);
1448 struct intel_gt
*gt
= rps_to_gt(rps
);
1449 bool client_boost
= false;
1450 int new_freq
, adj
, min
, max
;
1453 spin_lock_irq(>
->irq_lock
);
1454 pm_iir
= fetch_and_zero(&rps
->pm_iir
);
1455 client_boost
= atomic_read(&rps
->num_waiters
);
1456 spin_unlock_irq(>
->irq_lock
);
1458 /* Make sure we didn't queue anything we're not going to process. */
1459 if ((pm_iir
& rps
->pm_events
) == 0 && !client_boost
)
1462 mutex_lock(&rps
->lock
);
1464 pm_iir
|= vlv_wa_c0_ei(rps
, pm_iir
);
1466 adj
= rps
->last_adj
;
1467 new_freq
= rps
->cur_freq
;
1468 min
= rps
->min_freq_softlimit
;
1469 max
= rps
->max_freq_softlimit
;
1471 max
= rps
->max_freq
;
1472 if (client_boost
&& new_freq
< rps
->boost_freq
) {
1473 new_freq
= rps
->boost_freq
;
1475 } else if (pm_iir
& GEN6_PM_RP_UP_THRESHOLD
) {
1478 else /* CHV needs even encode values */
1479 adj
= IS_CHERRYVIEW(gt
->i915
) ? 2 : 1;
1481 if (new_freq
>= rps
->max_freq_softlimit
)
1483 } else if (client_boost
) {
1485 } else if (pm_iir
& GEN6_PM_RP_DOWN_TIMEOUT
) {
1486 if (rps
->cur_freq
> rps
->efficient_freq
)
1487 new_freq
= rps
->efficient_freq
;
1488 else if (rps
->cur_freq
> rps
->min_freq_softlimit
)
1489 new_freq
= rps
->min_freq_softlimit
;
1491 } else if (pm_iir
& GEN6_PM_RP_DOWN_THRESHOLD
) {
1494 else /* CHV needs even encode values */
1495 adj
= IS_CHERRYVIEW(gt
->i915
) ? -2 : -1;
1497 if (new_freq
<= rps
->min_freq_softlimit
)
1499 } else { /* unknown event */
1503 rps
->last_adj
= adj
;
1506 * Limit deboosting and boosting to keep ourselves at the extremes
1507 * when in the respective power modes (i.e. slowly decrease frequencies
1508 * while in the HIGH_POWER zone and slowly increase frequencies while
1509 * in the LOW_POWER zone). On idle, we will hit the timeout and drop
1510 * to the next level quickly, and conversely if busy we expect to
1511 * hit a waitboost and rapidly switch into max power.
1513 if ((adj
< 0 && rps
->power
.mode
== HIGH_POWER
) ||
1514 (adj
> 0 && rps
->power
.mode
== LOW_POWER
))
1517 /* sysfs frequency interfaces may have snuck in while servicing the
1521 new_freq
= clamp_t(int, new_freq
, min
, max
);
1523 if (intel_rps_set(rps
, new_freq
)) {
1524 DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
1528 mutex_unlock(&rps
->lock
);
1531 spin_lock_irq(>
->irq_lock
);
1532 gen6_gt_pm_unmask_irq(gt
, rps
->pm_events
);
1533 spin_unlock_irq(>
->irq_lock
);
1536 void gen11_rps_irq_handler(struct intel_rps
*rps
, u32 pm_iir
)
1538 struct intel_gt
*gt
= rps_to_gt(rps
);
1539 const u32 events
= rps
->pm_events
& pm_iir
;
1541 lockdep_assert_held(>
->irq_lock
);
1543 if (unlikely(!events
))
1546 gen6_gt_pm_mask_irq(gt
, events
);
1548 rps
->pm_iir
|= events
;
1549 schedule_work(&rps
->work
);
1552 void gen6_rps_irq_handler(struct intel_rps
*rps
, u32 pm_iir
)
1554 struct intel_gt
*gt
= rps_to_gt(rps
);
1556 if (pm_iir
& rps
->pm_events
) {
1557 spin_lock(>
->irq_lock
);
1558 gen6_gt_pm_mask_irq(gt
, pm_iir
& rps
->pm_events
);
1559 rps
->pm_iir
|= pm_iir
& rps
->pm_events
;
1560 schedule_work(&rps
->work
);
1561 spin_unlock(>
->irq_lock
);
1564 if (INTEL_GEN(gt
->i915
) >= 8)
1567 if (pm_iir
& PM_VEBOX_USER_INTERRUPT
)
1568 intel_engine_signal_breadcrumbs(gt
->engine
[VECS0
]);
1570 if (pm_iir
& PM_VEBOX_CS_ERROR_INTERRUPT
)
1571 DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir
);
1574 void gen5_rps_irq_handler(struct intel_rps
*rps
)
1576 struct intel_uncore
*uncore
= rps_to_uncore(rps
);
1577 u32 busy_up
, busy_down
, max_avg
, min_avg
;
1580 spin_lock(&mchdev_lock
);
1582 intel_uncore_write16(uncore
,
1584 intel_uncore_read(uncore
, MEMINTRSTS
));
1586 intel_uncore_write16(uncore
, MEMINTRSTS
, MEMINT_EVAL_CHG
);
1587 busy_up
= intel_uncore_read(uncore
, RCPREVBSYTUPAVG
);
1588 busy_down
= intel_uncore_read(uncore
, RCPREVBSYTDNAVG
);
1589 max_avg
= intel_uncore_read(uncore
, RCBMAXAVG
);
1590 min_avg
= intel_uncore_read(uncore
, RCBMINAVG
);
1592 /* Handle RCS change request from hw */
1593 new_freq
= rps
->cur_freq
;
1594 if (busy_up
> max_avg
)
1596 else if (busy_down
< min_avg
)
1598 new_freq
= clamp(new_freq
,
1599 rps
->min_freq_softlimit
,
1600 rps
->max_freq_softlimit
);
1602 if (new_freq
!= rps
->cur_freq
&& gen5_rps_set(rps
, new_freq
))
1603 rps
->cur_freq
= new_freq
;
1605 spin_unlock(&mchdev_lock
);
1608 void intel_rps_init_early(struct intel_rps
*rps
)
1610 mutex_init(&rps
->lock
);
1611 mutex_init(&rps
->power
.mutex
);
1613 INIT_WORK(&rps
->work
, rps_work
);
1615 atomic_set(&rps
->num_waiters
, 0);
1618 void intel_rps_init(struct intel_rps
*rps
)
1620 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1622 if (IS_CHERRYVIEW(i915
))
1624 else if (IS_VALLEYVIEW(i915
))
1626 else if (INTEL_GEN(i915
) >= 6)
1628 else if (IS_IRONLAKE_M(i915
))
1631 /* Derive initial user preferences/limits from the hardware limits */
1632 rps
->max_freq_softlimit
= rps
->max_freq
;
1633 rps
->min_freq_softlimit
= rps
->min_freq
;
1635 /* After setting max-softlimit, find the overclock max freq */
1636 if (IS_GEN(i915
, 6) || IS_IVYBRIDGE(i915
) || IS_HASWELL(i915
)) {
1639 sandybridge_pcode_read(i915
, GEN6_READ_OC_PARAMS
,
1641 if (params
& BIT(31)) { /* OC supported */
1642 DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
1643 (rps
->max_freq
& 0xff) * 50,
1644 (params
& 0xff) * 50);
1645 rps
->max_freq
= params
& 0xff;
1649 /* Finally allow us to boost to max by default */
1650 rps
->boost_freq
= rps
->max_freq
;
1651 rps
->idle_freq
= rps
->min_freq
;
1652 rps
->cur_freq
= rps
->idle_freq
;
1654 rps
->pm_intrmsk_mbz
= 0;
1657 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
1658 * if GEN6_PM_UP_EI_EXPIRED is masked.
1660 * TODO: verify if this can be reproduced on VLV,CHV.
1662 if (INTEL_GEN(i915
) <= 7)
1663 rps
->pm_intrmsk_mbz
|= GEN6_PM_RP_UP_EI_EXPIRED
;
1665 if (INTEL_GEN(i915
) >= 8 && INTEL_GEN(i915
) < 11)
1666 rps
->pm_intrmsk_mbz
|= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC
;
1669 u32
intel_rps_get_cagf(struct intel_rps
*rps
, u32 rpstat
)
1671 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1674 if (IS_VALLEYVIEW(i915
) || IS_CHERRYVIEW(i915
))
1675 cagf
= (rpstat
>> 8) & 0xff;
1676 else if (INTEL_GEN(i915
) >= 9)
1677 cagf
= (rpstat
& GEN9_CAGF_MASK
) >> GEN9_CAGF_SHIFT
;
1678 else if (IS_HASWELL(i915
) || IS_BROADWELL(i915
))
1679 cagf
= (rpstat
& HSW_CAGF_MASK
) >> HSW_CAGF_SHIFT
;
1681 cagf
= (rpstat
& GEN6_CAGF_MASK
) >> GEN6_CAGF_SHIFT
;
1686 static u32
read_cagf(struct intel_rps
*rps
)
1688 struct drm_i915_private
*i915
= rps_to_i915(rps
);
1691 if (IS_VALLEYVIEW(i915
) || IS_CHERRYVIEW(i915
)) {
1692 vlv_punit_get(i915
);
1693 freq
= vlv_punit_read(i915
, PUNIT_REG_GPU_FREQ_STS
);
1694 vlv_punit_put(i915
);
1696 freq
= intel_uncore_read(rps_to_gt(rps
)->uncore
, GEN6_RPSTAT1
);
1699 return intel_rps_get_cagf(rps
, freq
);
1702 u32
intel_rps_read_actual_frequency(struct intel_rps
*rps
)
1704 struct intel_runtime_pm
*rpm
= rps_to_gt(rps
)->uncore
->rpm
;
1705 intel_wakeref_t wakeref
;
1708 with_intel_runtime_pm_if_in_use(rpm
, wakeref
)
1709 freq
= intel_gpu_freq(rps
, read_cagf(rps
));
1714 /* External interface for intel_ips.ko */
1716 static struct drm_i915_private __rcu
*ips_mchdev
;
1719 * Tells the intel_ips driver that the i915 driver is now loaded, if
1720 * IPS got loaded first.
1722 * This awkward dance is so that neither module has to depend on the
1723 * other in order for IPS to do the appropriate communication of
1724 * GPU turbo limits to i915.
1727 ips_ping_for_i915_load(void)
1731 link
= symbol_get(ips_link_to_i915_driver
);
1734 symbol_put(ips_link_to_i915_driver
);
1738 void intel_rps_driver_register(struct intel_rps
*rps
)
1740 struct intel_gt
*gt
= rps_to_gt(rps
);
1743 * We only register the i915 ips part with intel-ips once everything is
1744 * set up, to avoid intel-ips sneaking in and reading bogus values.
1746 if (IS_GEN(gt
->i915
, 5)) {
1747 GEM_BUG_ON(ips_mchdev
);
1748 rcu_assign_pointer(ips_mchdev
, gt
->i915
);
1749 ips_ping_for_i915_load();
1753 void intel_rps_driver_unregister(struct intel_rps
*rps
)
1755 if (rcu_access_pointer(ips_mchdev
) == rps_to_i915(rps
))
1756 rcu_assign_pointer(ips_mchdev
, NULL
);
1759 static struct drm_i915_private
*mchdev_get(void)
1761 struct drm_i915_private
*i915
;
1764 i915
= rcu_dereference(ips_mchdev
);
1765 if (!kref_get_unless_zero(&i915
->drm
.ref
))
1773 * i915_read_mch_val - return value for IPS use
1775 * Calculate and return a value for the IPS driver to use when deciding whether
1776 * we have thermal and power headroom to increase CPU or GPU power budget.
1778 unsigned long i915_read_mch_val(void)
1780 struct drm_i915_private
*i915
;
1781 unsigned long chipset_val
= 0;
1782 unsigned long graphics_val
= 0;
1783 intel_wakeref_t wakeref
;
1785 i915
= mchdev_get();
1789 with_intel_runtime_pm(&i915
->runtime_pm
, wakeref
) {
1790 struct intel_ips
*ips
= &i915
->gt
.rps
.ips
;
1792 spin_lock_irq(&mchdev_lock
);
1793 chipset_val
= __ips_chipset_val(ips
);
1794 graphics_val
= __ips_gfx_val(ips
);
1795 spin_unlock_irq(&mchdev_lock
);
1798 drm_dev_put(&i915
->drm
);
1799 return chipset_val
+ graphics_val
;
1801 EXPORT_SYMBOL_GPL(i915_read_mch_val
);
1804 * i915_gpu_raise - raise GPU frequency limit
1806 * Raise the limit; IPS indicates we have thermal headroom.
1808 bool i915_gpu_raise(void)
1810 struct drm_i915_private
*i915
;
1811 struct intel_rps
*rps
;
1813 i915
= mchdev_get();
1817 rps
= &i915
->gt
.rps
;
1819 spin_lock_irq(&mchdev_lock
);
1820 if (rps
->max_freq_softlimit
< rps
->max_freq
)
1821 rps
->max_freq_softlimit
++;
1822 spin_unlock_irq(&mchdev_lock
);
1824 drm_dev_put(&i915
->drm
);
1827 EXPORT_SYMBOL_GPL(i915_gpu_raise
);
1830 * i915_gpu_lower - lower GPU frequency limit
1832 * IPS indicates we're close to a thermal limit, so throttle back the GPU
1833 * frequency maximum.
1835 bool i915_gpu_lower(void)
1837 struct drm_i915_private
*i915
;
1838 struct intel_rps
*rps
;
1840 i915
= mchdev_get();
1844 rps
= &i915
->gt
.rps
;
1846 spin_lock_irq(&mchdev_lock
);
1847 if (rps
->max_freq_softlimit
> rps
->min_freq
)
1848 rps
->max_freq_softlimit
--;
1849 spin_unlock_irq(&mchdev_lock
);
1851 drm_dev_put(&i915
->drm
);
1854 EXPORT_SYMBOL_GPL(i915_gpu_lower
);
1857 * i915_gpu_busy - indicate GPU business to IPS
1859 * Tell the IPS driver whether or not the GPU is busy.
1861 bool i915_gpu_busy(void)
1863 struct drm_i915_private
*i915
;
1866 i915
= mchdev_get();
1870 ret
= i915
->gt
.awake
;
1872 drm_dev_put(&i915
->drm
);
1875 EXPORT_SYMBOL_GPL(i915_gpu_busy
);
1878 * i915_gpu_turbo_disable - disable graphics turbo
1880 * Disable graphics turbo by resetting the max frequency and setting the
1881 * current frequency to the default.
1883 bool i915_gpu_turbo_disable(void)
1885 struct drm_i915_private
*i915
;
1886 struct intel_rps
*rps
;
1889 i915
= mchdev_get();
1893 rps
= &i915
->gt
.rps
;
1895 spin_lock_irq(&mchdev_lock
);
1896 rps
->max_freq_softlimit
= rps
->min_freq
;
1897 ret
= gen5_rps_set(&i915
->gt
.rps
, rps
->min_freq
);
1898 spin_unlock_irq(&mchdev_lock
);
1900 drm_dev_put(&i915
->drm
);
1903 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable
);