2 * SPDX-License-Identifier: MIT
4 * Copyright © 2014-2018 Intel Corporation
8 #include "intel_workarounds.h"
11 * DOC: Hardware workarounds
13 * This file is intended as a central place to implement most [1]_ of the
14 * required workarounds for hardware to work as originally intended. They fall
15 * in five basic categories depending on how/when they are applied:
17 * - Workarounds that touch registers that are saved/restored to/from the HW
18 * context image. The list is emitted (via Load Register Immediate commands)
19 * everytime a new context is created.
20 * - GT workarounds. The list of these WAs is applied whenever these registers
21 * revert to default values (on GPU reset, suspend/resume [2]_, etc..).
22 * - Display workarounds. The list is applied during display clock-gating
24 * - Workarounds that whitelist a privileged register, so that UMDs can manage
25 * them directly. This is just a special case of a MMMIO workaround (as we
26 * write the list of these to/be-whitelisted registers to some special HW
28 * - Workaround batchbuffers, that get executed automatically by the hardware
29 * on every HW context restore.
31 * .. [1] Please notice that there are other WAs that, due to their nature,
32 * cannot be applied from a central place. Those are peppered around the rest
33 * of the code, as needed.
35 * .. [2] Technically, some registers are powercontext saved & restored, so they
36 * survive a suspend/resume. In practice, writing them again is not too
37 * costly and simplifies things. We can revisit this in the future.
42 * Keep things in this file ordered by WA type, as per the above (context, GT,
43 * display, register whitelist, batchbuffer). Then, inside each type, keep the
46 * - Infrastructure functions and macros
47 * - WAs per platform in standard gen/chrono order
48 * - Public functions to init or apply the given workaround type.
51 static void wa_add(struct drm_i915_private
*i915
,
52 i915_reg_t reg
, const u32 mask
, const u32 val
)
54 struct i915_workarounds
*wa
= &i915
->workarounds
;
55 unsigned int start
= 0, end
= wa
->count
;
56 unsigned int addr
= i915_mmio_reg_offset(reg
);
57 struct i915_wa_reg
*r
;
60 unsigned int mid
= start
+ (end
- start
) / 2;
62 if (wa
->reg
[mid
].addr
< addr
) {
64 } else if (wa
->reg
[mid
].addr
> addr
) {
69 if ((mask
& ~r
->mask
) == 0) {
70 DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
71 addr
, r
->mask
, r
->value
);
82 if (WARN_ON_ONCE(wa
->count
>= I915_MAX_WA_REGS
)) {
83 DRM_ERROR("Dropping w/a for reg %04x (mask: %08x, value: %08x)\n",
88 r
= &wa
->reg
[wa
->count
++];
93 while (r
-- > wa
->reg
) {
94 GEM_BUG_ON(r
[0].addr
== r
[1].addr
);
95 if (r
[1].addr
> r
[0].addr
)
102 #define WA_REG(addr, mask, val) wa_add(dev_priv, (addr), (mask), (val))
104 #define WA_SET_BIT_MASKED(addr, mask) \
105 WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
107 #define WA_CLR_BIT_MASKED(addr, mask) \
108 WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
110 #define WA_SET_FIELD_MASKED(addr, mask, value) \
111 WA_REG(addr, (mask), _MASKED_FIELD(mask, value))
113 static int gen8_ctx_workarounds_init(struct drm_i915_private
*dev_priv
)
115 WA_SET_BIT_MASKED(INSTPM
, INSTPM_FORCE_ORDERING
);
117 /* WaDisableAsyncFlipPerfMode:bdw,chv */
118 WA_SET_BIT_MASKED(MI_MODE
, ASYNC_FLIP_PERF_DISABLE
);
120 /* WaDisablePartialInstShootdown:bdw,chv */
121 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN
,
122 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
);
124 /* Use Force Non-Coherent whenever executing a 3D context. This is a
125 * workaround for for a possible hang in the unlikely event a TLB
126 * invalidation occurs during a PSD flush.
128 /* WaForceEnableNonCoherent:bdw,chv */
129 /* WaHdcDisableFetchWhenMasked:bdw,chv */
130 WA_SET_BIT_MASKED(HDC_CHICKEN0
,
131 HDC_DONOT_FETCH_MEM_WHEN_MASKED
|
132 HDC_FORCE_NON_COHERENT
);
134 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
135 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
136 * polygons in the same 8x4 pixel/sample area to be processed without
137 * stalling waiting for the earlier ones to write to Hierarchical Z
140 * This optimization is off by default for BDW and CHV; turn it on.
142 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7
, HIZ_RAW_STALL_OPT_DISABLE
);
144 /* Wa4x4STCOptimizationDisable:bdw,chv */
145 WA_SET_BIT_MASKED(CACHE_MODE_1
, GEN8_4x4_STC_OPTIMIZATION_DISABLE
);
148 * BSpec recommends 8x4 when MSAA is used,
149 * however in practice 16x4 seems fastest.
151 * Note that PS/WM thread counts depend on the WIZ hashing
152 * disable bit, which we don't touch here, but it's good
153 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
155 WA_SET_FIELD_MASKED(GEN7_GT_MODE
,
156 GEN6_WIZ_HASHING_MASK
,
157 GEN6_WIZ_HASHING_16x4
);
162 static int bdw_ctx_workarounds_init(struct drm_i915_private
*dev_priv
)
166 ret
= gen8_ctx_workarounds_init(dev_priv
);
170 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
171 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN
, STALL_DOP_GATING_DISABLE
);
173 /* WaDisableDopClockGating:bdw
175 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
176 * to disable EUTC clock gating.
178 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2
,
179 DOP_CLOCK_GATING_DISABLE
);
181 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3
,
182 GEN8_SAMPLER_POWER_BYPASS_DIS
);
184 WA_SET_BIT_MASKED(HDC_CHICKEN0
,
185 /* WaForceContextSaveRestoreNonCoherent:bdw */
186 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT
|
187 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
188 (IS_BDW_GT3(dev_priv
) ? HDC_FENCE_DEST_SLM_DISABLE
: 0));
193 static int chv_ctx_workarounds_init(struct drm_i915_private
*dev_priv
)
197 ret
= gen8_ctx_workarounds_init(dev_priv
);
201 /* WaDisableThreadStallDopClockGating:chv */
202 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN
, STALL_DOP_GATING_DISABLE
);
204 /* Improve HiZ throughput on CHV. */
205 WA_SET_BIT_MASKED(HIZ_CHICKEN
, CHV_HZ_8X8_MODE_IN_1X
);
210 static int gen9_ctx_workarounds_init(struct drm_i915_private
*dev_priv
)
212 if (HAS_LLC(dev_priv
)) {
213 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
215 * Must match Display Engine. See
216 * WaCompressedResourceDisplayNewHashMode.
218 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2
,
219 GEN9_PBE_COMPRESSED_HASH_SELECTION
);
220 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7
,
221 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR
);
224 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
225 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
226 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN
,
227 FLOW_CONTROL_ENABLE
|
228 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
);
230 /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
231 if (!IS_COFFEELAKE(dev_priv
))
232 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3
,
233 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC
);
235 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
236 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
237 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7
,
238 GEN9_ENABLE_YV12_BUGFIX
|
239 GEN9_ENABLE_GPGPU_PREEMPTION
);
241 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
242 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
243 WA_SET_BIT_MASKED(CACHE_MODE_1
,
244 GEN8_4x4_STC_OPTIMIZATION_DISABLE
|
245 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE
);
247 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
248 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5
,
249 GEN9_CCS_TLB_PREFETCH_ENABLE
);
251 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
252 WA_SET_BIT_MASKED(HDC_CHICKEN0
,
253 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT
|
254 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE
);
256 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
257 * both tied to WaForceContextSaveRestoreNonCoherent
258 * in some hsds for skl. We keep the tie for all gen9. The
259 * documentation is a bit hazy and so we want to get common behaviour,
260 * even though there is no clear evidence we would need both on kbl/bxt.
261 * This area has been source of system hangs so we play it safe
262 * and mimic the skl regardless of what bspec says.
264 * Use Force Non-Coherent whenever executing a 3D context. This
265 * is a workaround for a possible hang in the unlikely event
266 * a TLB invalidation occurs during a PSD flush.
269 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
270 WA_SET_BIT_MASKED(HDC_CHICKEN0
,
271 HDC_FORCE_NON_COHERENT
);
273 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
274 if (IS_SKYLAKE(dev_priv
) ||
275 IS_KABYLAKE(dev_priv
) ||
276 IS_COFFEELAKE(dev_priv
))
277 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3
,
278 GEN8_SAMPLER_POWER_BYPASS_DIS
);
280 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
281 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2
, GEN8_ST_PO_DISABLE
);
284 * Supporting preemption with fine-granularity requires changes in the
285 * batch buffer programming. Since we can't break old userspace, we
286 * need to set our default preemption level to safe value. Userspace is
287 * still able to use more fine-grained preemption levels, since in
288 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
289 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
290 * not real HW workarounds, but merely a way to start using preemption
291 * while maintaining old contract with userspace.
294 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
295 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1
, GEN9_PREEMPT_3D_OBJECT_LEVEL
);
297 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
298 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1
,
299 GEN9_PREEMPT_GPGPU_LEVEL_MASK
,
300 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL
);
302 /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
303 if (IS_GEN9_LP(dev_priv
))
304 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3
, GEN9_FACTOR_IN_CLR_VAL_HIZ
);
309 static int skl_tune_iz_hashing(struct drm_i915_private
*dev_priv
)
311 u8 vals
[3] = { 0, 0, 0 };
314 for (i
= 0; i
< 3; i
++) {
318 * Only consider slices where one, and only one, subslice has 7
321 if (!is_power_of_2(INTEL_INFO(dev_priv
)->sseu
.subslice_7eu
[i
]))
325 * subslice_7eu[i] != 0 (because of the check above) and
326 * ss_max == 4 (maximum number of subslices possible per slice)
330 ss
= ffs(INTEL_INFO(dev_priv
)->sseu
.subslice_7eu
[i
]) - 1;
334 if (vals
[0] == 0 && vals
[1] == 0 && vals
[2] == 0)
337 /* Tune IZ hashing. See intel_device_info_runtime_init() */
338 WA_SET_FIELD_MASKED(GEN7_GT_MODE
,
339 GEN9_IZ_HASHING_MASK(2) |
340 GEN9_IZ_HASHING_MASK(1) |
341 GEN9_IZ_HASHING_MASK(0),
342 GEN9_IZ_HASHING(2, vals
[2]) |
343 GEN9_IZ_HASHING(1, vals
[1]) |
344 GEN9_IZ_HASHING(0, vals
[0]));
349 static int skl_ctx_workarounds_init(struct drm_i915_private
*dev_priv
)
353 ret
= gen9_ctx_workarounds_init(dev_priv
);
357 return skl_tune_iz_hashing(dev_priv
);
360 static int bxt_ctx_workarounds_init(struct drm_i915_private
*dev_priv
)
364 ret
= gen9_ctx_workarounds_init(dev_priv
);
368 /* WaDisableThreadStallDopClockGating:bxt */
369 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN
,
370 STALL_DOP_GATING_DISABLE
);
372 /* WaToEnableHwFixForPushConstHWBug:bxt */
373 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2
,
374 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
379 static int kbl_ctx_workarounds_init(struct drm_i915_private
*dev_priv
)
383 ret
= gen9_ctx_workarounds_init(dev_priv
);
387 /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
388 if (IS_KBL_REVID(dev_priv
, KBL_REVID_A0
, KBL_REVID_A0
))
389 WA_SET_BIT_MASKED(HDC_CHICKEN0
,
390 HDC_FENCE_DEST_SLM_DISABLE
);
392 /* WaToEnableHwFixForPushConstHWBug:kbl */
393 if (IS_KBL_REVID(dev_priv
, KBL_REVID_C0
, REVID_FOREVER
))
394 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2
,
395 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
397 /* WaDisableSbeCacheDispatchPortSharing:kbl */
398 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1
,
399 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE
);
404 static int glk_ctx_workarounds_init(struct drm_i915_private
*dev_priv
)
408 ret
= gen9_ctx_workarounds_init(dev_priv
);
412 /* WaToEnableHwFixForPushConstHWBug:glk */
413 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2
,
414 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
419 static int cfl_ctx_workarounds_init(struct drm_i915_private
*dev_priv
)
423 ret
= gen9_ctx_workarounds_init(dev_priv
);
427 /* WaToEnableHwFixForPushConstHWBug:cfl */
428 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2
,
429 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
431 /* WaDisableSbeCacheDispatchPortSharing:cfl */
432 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1
,
433 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE
);
438 static int cnl_ctx_workarounds_init(struct drm_i915_private
*dev_priv
)
440 /* WaForceContextSaveRestoreNonCoherent:cnl */
441 WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0
,
442 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT
);
444 /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
445 if (IS_CNL_REVID(dev_priv
, CNL_REVID_B0
, CNL_REVID_B0
))
446 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN
, THROTTLE_12_5
);
448 /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
449 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2
,
450 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION
);
452 /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
453 if (IS_CNL_REVID(dev_priv
, 0, CNL_REVID_B0
))
454 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2
,
455 GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE
);
457 /* WaPushConstantDereferenceHoldDisable:cnl */
458 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2
, PUSH_CONSTANT_DEREF_DISABLE
);
460 /* FtrEnableFastAnisoL1BankingFix:cnl */
461 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3
, CNL_FAST_ANISO_L1_BANKING_FIX
);
463 /* WaDisable3DMidCmdPreemption:cnl */
464 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1
, GEN9_PREEMPT_3D_OBJECT_LEVEL
);
466 /* WaDisableGPGPUMidCmdPreemption:cnl */
467 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1
,
468 GEN9_PREEMPT_GPGPU_LEVEL_MASK
,
469 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL
);
471 /* WaDisableEarlyEOT:cnl */
472 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN
, DISABLE_EARLY_EOT
);
477 static int icl_ctx_workarounds_init(struct drm_i915_private
*dev_priv
)
479 /* Wa_1604370585:icl (pre-prod)
480 * Formerly known as WaPushConstantDereferenceHoldDisable
482 if (IS_ICL_REVID(dev_priv
, ICL_REVID_A0
, ICL_REVID_B0
))
483 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2
,
484 PUSH_CONSTANT_DEREF_DISABLE
);
486 /* WaForceEnableNonCoherent:icl
487 * This is not the same workaround as in early Gen9 platforms, where
488 * lacking this could cause system hangs, but coherency performance
489 * overhead is high and only a few compute workloads really need it
490 * (the register is whitelisted in hardware now, so UMDs can opt in
491 * for coherency if they have a good reason).
493 WA_SET_BIT_MASKED(ICL_HDC_MODE
, HDC_FORCE_NON_COHERENT
);
495 /* Wa_2006611047:icl (pre-prod)
496 * Formerly known as WaDisableImprovedTdlClkGating
498 if (IS_ICL_REVID(dev_priv
, ICL_REVID_A0
, ICL_REVID_A0
))
499 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2
,
500 GEN11_TDL_CLOCK_GATING_FIX_DISABLE
);
502 /* WaEnableStateCacheRedirectToCS:icl */
503 WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN1
,
504 GEN11_STATE_CACHE_REDIRECT_TO_CS
);
506 /* Wa_2006665173:icl (pre-prod) */
507 if (IS_ICL_REVID(dev_priv
, ICL_REVID_A0
, ICL_REVID_A0
))
508 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3
,
509 GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC
);
514 int intel_ctx_workarounds_init(struct drm_i915_private
*dev_priv
)
518 dev_priv
->workarounds
.count
= 0;
520 if (INTEL_GEN(dev_priv
) < 8)
522 else if (IS_BROADWELL(dev_priv
))
523 err
= bdw_ctx_workarounds_init(dev_priv
);
524 else if (IS_CHERRYVIEW(dev_priv
))
525 err
= chv_ctx_workarounds_init(dev_priv
);
526 else if (IS_SKYLAKE(dev_priv
))
527 err
= skl_ctx_workarounds_init(dev_priv
);
528 else if (IS_BROXTON(dev_priv
))
529 err
= bxt_ctx_workarounds_init(dev_priv
);
530 else if (IS_KABYLAKE(dev_priv
))
531 err
= kbl_ctx_workarounds_init(dev_priv
);
532 else if (IS_GEMINILAKE(dev_priv
))
533 err
= glk_ctx_workarounds_init(dev_priv
);
534 else if (IS_COFFEELAKE(dev_priv
))
535 err
= cfl_ctx_workarounds_init(dev_priv
);
536 else if (IS_CANNONLAKE(dev_priv
))
537 err
= cnl_ctx_workarounds_init(dev_priv
);
538 else if (IS_ICELAKE(dev_priv
))
539 err
= icl_ctx_workarounds_init(dev_priv
);
541 MISSING_CASE(INTEL_GEN(dev_priv
));
545 DRM_DEBUG_DRIVER("Number of context specific w/a: %d\n",
546 dev_priv
->workarounds
.count
);
550 int intel_ctx_workarounds_emit(struct i915_request
*rq
)
552 struct i915_workarounds
*w
= &rq
->i915
->workarounds
;
559 ret
= rq
->engine
->emit_flush(rq
, EMIT_BARRIER
);
563 cs
= intel_ring_begin(rq
, (w
->count
* 2 + 2));
567 *cs
++ = MI_LOAD_REGISTER_IMM(w
->count
);
568 for (i
= 0; i
< w
->count
; i
++) {
569 *cs
++ = w
->reg
[i
].addr
;
570 *cs
++ = w
->reg
[i
].value
;
574 intel_ring_advance(rq
, cs
);
576 ret
= rq
->engine
->emit_flush(rq
, EMIT_BARRIER
);
583 static void bdw_gt_workarounds_apply(struct drm_i915_private
*dev_priv
)
587 static void chv_gt_workarounds_apply(struct drm_i915_private
*dev_priv
)
591 static void gen9_gt_workarounds_apply(struct drm_i915_private
*dev_priv
)
593 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
594 I915_WRITE(GEN9_CSFE_CHICKEN1_RCS
,
595 _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE
));
597 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
598 I915_WRITE(BDW_SCRATCH1
, I915_READ(BDW_SCRATCH1
) |
599 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE
);
601 /* WaDisableKillLogic:bxt,skl,kbl */
602 if (!IS_COFFEELAKE(dev_priv
))
603 I915_WRITE(GAM_ECOCHK
, I915_READ(GAM_ECOCHK
) |
606 if (HAS_LLC(dev_priv
)) {
607 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
609 * Must match Display Engine. See
610 * WaCompressedResourceDisplayNewHashMode.
612 I915_WRITE(MMCD_MISC_CTRL
,
613 I915_READ(MMCD_MISC_CTRL
) |
618 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
619 I915_WRITE(GAM_ECOCHK
, I915_READ(GAM_ECOCHK
) |
620 BDW_DISABLE_HDC_INVALIDATION
);
622 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
623 if (IS_GEN9_LP(dev_priv
)) {
624 u32 val
= I915_READ(GEN8_L3SQCREG1
);
626 val
&= ~L3_PRIO_CREDITS_MASK
;
627 val
|= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
628 I915_WRITE(GEN8_L3SQCREG1
, val
);
631 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
632 I915_WRITE(GEN8_L3SQCREG4
,
633 I915_READ(GEN8_L3SQCREG4
) | GEN8_LQSC_FLUSH_COHERENT_LINES
);
635 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
636 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1
,
637 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL
));
640 static void skl_gt_workarounds_apply(struct drm_i915_private
*dev_priv
)
642 gen9_gt_workarounds_apply(dev_priv
);
644 /* WaEnableGapsTsvCreditFix:skl */
645 I915_WRITE(GEN8_GARBCNTL
,
646 I915_READ(GEN8_GARBCNTL
) | GEN9_GAPS_TSV_CREDIT_DISABLE
);
648 /* WaDisableGafsUnitClkGating:skl */
649 I915_WRITE(GEN7_UCGCTL4
,
650 I915_READ(GEN7_UCGCTL4
) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE
);
652 /* WaInPlaceDecompressionHang:skl */
653 if (IS_SKL_REVID(dev_priv
, SKL_REVID_H0
, REVID_FOREVER
))
654 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA
,
655 I915_READ(GEN9_GAMT_ECO_REG_RW_IA
) |
656 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS
);
659 static void bxt_gt_workarounds_apply(struct drm_i915_private
*dev_priv
)
661 gen9_gt_workarounds_apply(dev_priv
);
663 /* WaDisablePooledEuLoadBalancingFix:bxt */
664 I915_WRITE(FF_SLICE_CS_CHICKEN2
,
665 _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE
));
667 /* WaInPlaceDecompressionHang:bxt */
668 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA
,
669 I915_READ(GEN9_GAMT_ECO_REG_RW_IA
) |
670 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS
);
673 static void kbl_gt_workarounds_apply(struct drm_i915_private
*dev_priv
)
675 gen9_gt_workarounds_apply(dev_priv
);
677 /* WaEnableGapsTsvCreditFix:kbl */
678 I915_WRITE(GEN8_GARBCNTL
,
679 I915_READ(GEN8_GARBCNTL
) | GEN9_GAPS_TSV_CREDIT_DISABLE
);
681 /* WaDisableDynamicCreditSharing:kbl */
682 if (IS_KBL_REVID(dev_priv
, 0, KBL_REVID_B0
))
683 I915_WRITE(GAMT_CHKN_BIT_REG
,
684 I915_READ(GAMT_CHKN_BIT_REG
) |
685 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING
);
687 /* WaDisableGafsUnitClkGating:kbl */
688 I915_WRITE(GEN7_UCGCTL4
,
689 I915_READ(GEN7_UCGCTL4
) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE
);
691 /* WaInPlaceDecompressionHang:kbl */
692 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA
,
693 I915_READ(GEN9_GAMT_ECO_REG_RW_IA
) |
694 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS
);
696 /* WaKBLVECSSemaphoreWaitPoll:kbl */
697 if (IS_KBL_REVID(dev_priv
, KBL_REVID_A0
, KBL_REVID_E0
)) {
698 struct intel_engine_cs
*engine
;
701 for_each_engine(engine
, dev_priv
, tmp
) {
702 if (engine
->id
== RCS
)
705 I915_WRITE(RING_SEMA_WAIT_POLL(engine
->mmio_base
), 1);
710 static void glk_gt_workarounds_apply(struct drm_i915_private
*dev_priv
)
712 gen9_gt_workarounds_apply(dev_priv
);
715 static void cfl_gt_workarounds_apply(struct drm_i915_private
*dev_priv
)
717 gen9_gt_workarounds_apply(dev_priv
);
719 /* WaEnableGapsTsvCreditFix:cfl */
720 I915_WRITE(GEN8_GARBCNTL
,
721 I915_READ(GEN8_GARBCNTL
) | GEN9_GAPS_TSV_CREDIT_DISABLE
);
723 /* WaDisableGafsUnitClkGating:cfl */
724 I915_WRITE(GEN7_UCGCTL4
,
725 I915_READ(GEN7_UCGCTL4
) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE
);
727 /* WaInPlaceDecompressionHang:cfl */
728 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA
,
729 I915_READ(GEN9_GAMT_ECO_REG_RW_IA
) |
730 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS
);
733 static void wa_init_mcr(struct drm_i915_private
*dev_priv
)
735 const struct sseu_dev_info
*sseu
= &(INTEL_INFO(dev_priv
)->sseu
);
737 u32 mcr_slice_subslice_mask
;
740 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
741 * L3Banks could be fused off in single slice scenario. If that is
742 * the case, we might need to program MCR select to a valid L3Bank
743 * by default, to make sure we correctly read certain registers
744 * later on (in the range 0xB100 - 0xB3FF).
745 * This might be incompatible with
746 * WaProgramMgsrForCorrectSliceSpecificMmioReads.
747 * Fortunately, this should not happen in production hardware, so
748 * we only assert that this is the case (instead of implementing
749 * something more complex that requires checking the range of every
752 if (INTEL_GEN(dev_priv
) >= 10 &&
753 is_power_of_2(sseu
->slice_mask
)) {
755 * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
756 * enabled subslice, no need to redirect MCR packet
758 u32 slice
= fls(sseu
->slice_mask
);
759 u32 fuse3
= I915_READ(GEN10_MIRROR_FUSE3
);
760 u8 ss_mask
= sseu
->subslice_mask
[slice
];
762 u8 enabled_mask
= (ss_mask
| ss_mask
>>
763 GEN10_L3BANK_PAIR_COUNT
) & GEN10_L3BANK_MASK
;
764 u8 disabled_mask
= fuse3
& GEN10_L3BANK_MASK
;
767 * Production silicon should have matched L3Bank and
770 WARN_ON((enabled_mask
& disabled_mask
) != enabled_mask
);
773 mcr
= I915_READ(GEN8_MCR_SELECTOR
);
775 if (INTEL_GEN(dev_priv
) >= 11)
776 mcr_slice_subslice_mask
= GEN11_MCR_SLICE_MASK
|
777 GEN11_MCR_SUBSLICE_MASK
;
779 mcr_slice_subslice_mask
= GEN8_MCR_SLICE_MASK
|
780 GEN8_MCR_SUBSLICE_MASK
;
782 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
783 * Before any MMIO read into slice/subslice specific registers, MCR
784 * packet control register needs to be programmed to point to any
785 * enabled s/ss pair. Otherwise, incorrect values will be returned.
786 * This means each subsequent MMIO read will be forwarded to an
787 * specific s/ss combination, but this is OK since these registers
788 * are consistent across s/ss in almost all cases. In the rare
789 * occasions, such as INSTDONE, where this value is dependent
790 * on s/ss combo, the read should be done with read_subslice_reg.
792 mcr
&= ~mcr_slice_subslice_mask
;
793 mcr
|= intel_calculate_mcr_s_ss_select(dev_priv
);
794 I915_WRITE(GEN8_MCR_SELECTOR
, mcr
);
797 static void cnl_gt_workarounds_apply(struct drm_i915_private
*dev_priv
)
799 wa_init_mcr(dev_priv
);
801 /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
802 if (IS_CNL_REVID(dev_priv
, CNL_REVID_B0
, CNL_REVID_B0
))
803 I915_WRITE(GAMT_CHKN_BIT_REG
,
804 I915_READ(GAMT_CHKN_BIT_REG
) |
805 GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT
);
807 /* WaInPlaceDecompressionHang:cnl */
808 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA
,
809 I915_READ(GEN9_GAMT_ECO_REG_RW_IA
) |
810 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS
);
812 /* WaEnablePreemptionGranularityControlByUMD:cnl */
813 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1
,
814 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL
));
817 static void icl_gt_workarounds_apply(struct drm_i915_private
*dev_priv
)
819 wa_init_mcr(dev_priv
);
821 /* This is not an Wa. Enable for better image quality */
822 I915_WRITE(_3D_CHICKEN3
,
823 _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE
));
825 /* WaInPlaceDecompressionHang:icl */
826 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA
, I915_READ(GEN9_GAMT_ECO_REG_RW_IA
) |
827 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS
);
829 /* WaPipelineFlushCoherentLines:icl */
830 I915_WRITE(GEN8_L3SQCREG4
, I915_READ(GEN8_L3SQCREG4
) |
831 GEN8_LQSC_FLUSH_COHERENT_LINES
);
834 * Formerly known as WaGAPZPriorityScheme
836 I915_WRITE(GEN8_GARBCNTL
, I915_READ(GEN8_GARBCNTL
) |
837 GEN11_ARBITRATION_PRIO_ORDER_MASK
);
840 * Formerly known as WaL3BankAddressHashing
842 I915_WRITE(GEN8_GARBCNTL
,
843 (I915_READ(GEN8_GARBCNTL
) & ~GEN11_HASH_CTRL_EXCL_MASK
) |
844 GEN11_HASH_CTRL_EXCL_BIT0
);
845 I915_WRITE(GEN11_GLBLINVL
,
846 (I915_READ(GEN11_GLBLINVL
) & ~GEN11_BANK_HASH_ADDR_EXCL_MASK
) |
847 GEN11_BANK_HASH_ADDR_EXCL_BIT0
);
849 /* WaModifyGamTlbPartitioning:icl */
850 I915_WRITE(GEN11_GACB_PERF_CTRL
,
851 (I915_READ(GEN11_GACB_PERF_CTRL
) & ~GEN11_HASH_CTRL_MASK
) |
852 GEN11_HASH_CTRL_BIT0
| GEN11_HASH_CTRL_BIT4
);
855 * Formerly known as WaDisableCleanEvicts
857 I915_WRITE(GEN8_L3SQCREG4
, I915_READ(GEN8_L3SQCREG4
) |
858 GEN11_LQSC_CLEAN_EVICT_DISABLE
);
861 * Formerly known as WaCL2SFHalfMaxAlloc
863 I915_WRITE(GEN11_LSN_UNSLCVC
, I915_READ(GEN11_LSN_UNSLCVC
) |
864 GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC
|
865 GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC
);
868 * Formerly known as WaDisCtxReload
870 I915_WRITE(GAMW_ECO_DEV_RW_IA_REG
, I915_READ(GAMW_ECO_DEV_RW_IA_REG
) |
871 GAMW_ECO_DEV_CTX_RELOAD_DISABLE
);
873 /* Wa_1405779004:icl (pre-prod) */
874 if (IS_ICL_REVID(dev_priv
, ICL_REVID_A0
, ICL_REVID_A0
))
875 I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE
,
876 I915_READ(SLICE_UNIT_LEVEL_CLKGATE
) |
877 MSCUNIT_CLKGATE_DIS
);
879 /* Wa_1406680159:icl */
880 I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE
,
881 I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE
) |
884 /* Wa_1604302699:icl */
885 I915_WRITE(GEN10_L3_CHICKEN_MODE_REGISTER
,
886 I915_READ(GEN10_L3_CHICKEN_MODE_REGISTER
) |
887 GEN11_I2M_WRITE_DISABLE
);
889 /* Wa_1406838659:icl (pre-prod) */
890 if (IS_ICL_REVID(dev_priv
, ICL_REVID_A0
, ICL_REVID_B0
))
891 I915_WRITE(INF_UNIT_LEVEL_CLKGATE
,
892 I915_READ(INF_UNIT_LEVEL_CLKGATE
) |
895 /* WaForwardProgressSoftReset:icl */
896 I915_WRITE(GEN10_SCRATCH_LNCF2
,
897 I915_READ(GEN10_SCRATCH_LNCF2
) |
898 PMFLUSHDONE_LNICRSDROP
|
899 PMFLUSH_GAPL3UNBLOCK
|
903 * Formerly known as WaGamTlbPendError
905 I915_WRITE(GAMT_CHKN_BIT_REG
,
906 I915_READ(GAMT_CHKN_BIT_REG
) |
907 GAMT_CHKN_DISABLE_L3_COH_PIPE
);
910 void intel_gt_workarounds_apply(struct drm_i915_private
*dev_priv
)
912 if (INTEL_GEN(dev_priv
) < 8)
914 else if (IS_BROADWELL(dev_priv
))
915 bdw_gt_workarounds_apply(dev_priv
);
916 else if (IS_CHERRYVIEW(dev_priv
))
917 chv_gt_workarounds_apply(dev_priv
);
918 else if (IS_SKYLAKE(dev_priv
))
919 skl_gt_workarounds_apply(dev_priv
);
920 else if (IS_BROXTON(dev_priv
))
921 bxt_gt_workarounds_apply(dev_priv
);
922 else if (IS_KABYLAKE(dev_priv
))
923 kbl_gt_workarounds_apply(dev_priv
);
924 else if (IS_GEMINILAKE(dev_priv
))
925 glk_gt_workarounds_apply(dev_priv
);
926 else if (IS_COFFEELAKE(dev_priv
))
927 cfl_gt_workarounds_apply(dev_priv
);
928 else if (IS_CANNONLAKE(dev_priv
))
929 cnl_gt_workarounds_apply(dev_priv
);
930 else if (IS_ICELAKE(dev_priv
))
931 icl_gt_workarounds_apply(dev_priv
);
933 MISSING_CASE(INTEL_GEN(dev_priv
));
937 i915_reg_t reg
[RING_MAX_NONPRIV_SLOTS
];
942 static void whitelist_reg(struct whitelist
*w
, i915_reg_t reg
)
944 if (GEM_WARN_ON(w
->count
>= RING_MAX_NONPRIV_SLOTS
))
947 w
->reg
[w
->count
++] = reg
;
950 static void bdw_whitelist_build(struct whitelist
*w
)
954 static void chv_whitelist_build(struct whitelist
*w
)
958 static void gen9_whitelist_build(struct whitelist
*w
)
960 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
961 whitelist_reg(w
, GEN9_CTX_PREEMPT_REG
);
963 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
964 whitelist_reg(w
, GEN8_CS_CHICKEN1
);
966 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
967 whitelist_reg(w
, GEN8_HDC_CHICKEN1
);
970 static void skl_whitelist_build(struct whitelist
*w
)
972 gen9_whitelist_build(w
);
974 /* WaDisableLSQCROPERFforOCL:skl */
975 whitelist_reg(w
, GEN8_L3SQCREG4
);
978 static void bxt_whitelist_build(struct whitelist
*w
)
980 gen9_whitelist_build(w
);
983 static void kbl_whitelist_build(struct whitelist
*w
)
985 gen9_whitelist_build(w
);
987 /* WaDisableLSQCROPERFforOCL:kbl */
988 whitelist_reg(w
, GEN8_L3SQCREG4
);
991 static void glk_whitelist_build(struct whitelist
*w
)
993 gen9_whitelist_build(w
);
995 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
996 whitelist_reg(w
, GEN9_SLICE_COMMON_ECO_CHICKEN1
);
999 static void cfl_whitelist_build(struct whitelist
*w
)
1001 gen9_whitelist_build(w
);
1004 static void cnl_whitelist_build(struct whitelist
*w
)
1006 /* WaEnablePreemptionGranularityControlByUMD:cnl */
1007 whitelist_reg(w
, GEN8_CS_CHICKEN1
);
1010 static void icl_whitelist_build(struct whitelist
*w
)
1014 static struct whitelist
*whitelist_build(struct intel_engine_cs
*engine
,
1015 struct whitelist
*w
)
1017 struct drm_i915_private
*i915
= engine
->i915
;
1019 GEM_BUG_ON(engine
->id
!= RCS
);
1022 w
->nopid
= i915_mmio_reg_offset(RING_NOPID(engine
->mmio_base
));
1024 if (INTEL_GEN(i915
) < 8)
1026 else if (IS_BROADWELL(i915
))
1027 bdw_whitelist_build(w
);
1028 else if (IS_CHERRYVIEW(i915
))
1029 chv_whitelist_build(w
);
1030 else if (IS_SKYLAKE(i915
))
1031 skl_whitelist_build(w
);
1032 else if (IS_BROXTON(i915
))
1033 bxt_whitelist_build(w
);
1034 else if (IS_KABYLAKE(i915
))
1035 kbl_whitelist_build(w
);
1036 else if (IS_GEMINILAKE(i915
))
1037 glk_whitelist_build(w
);
1038 else if (IS_COFFEELAKE(i915
))
1039 cfl_whitelist_build(w
);
1040 else if (IS_CANNONLAKE(i915
))
1041 cnl_whitelist_build(w
);
1042 else if (IS_ICELAKE(i915
))
1043 icl_whitelist_build(w
);
1045 MISSING_CASE(INTEL_GEN(i915
));
1050 static void whitelist_apply(struct intel_engine_cs
*engine
,
1051 const struct whitelist
*w
)
1053 struct drm_i915_private
*dev_priv
= engine
->i915
;
1054 const u32 base
= engine
->mmio_base
;
1060 intel_uncore_forcewake_get(engine
->i915
, FORCEWAKE_ALL
);
1062 for (i
= 0; i
< w
->count
; i
++)
1063 I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base
, i
),
1064 i915_mmio_reg_offset(w
->reg
[i
]));
1066 /* And clear the rest just in case of garbage */
1067 for (; i
< RING_MAX_NONPRIV_SLOTS
; i
++)
1068 I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base
, i
), w
->nopid
);
1070 intel_uncore_forcewake_put(engine
->i915
, FORCEWAKE_ALL
);
1073 void intel_whitelist_workarounds_apply(struct intel_engine_cs
*engine
)
1077 whitelist_apply(engine
, whitelist_build(engine
, &w
));
1080 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1081 #include "selftests/intel_workarounds.c"