upgpkg: sbcl 2.2.11-1
[arch-packages.git] / mesa / repos / staging-x86_64 / 0002-intel-fs-always-mask-the-bottom-bits-of-the-sampler-.patch
blob4e30757c09177602cea181164d960bb1dbcf4b2d
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
3 Date: Sat, 25 Jun 2022 23:38:45 +0300
4 Subject: [PATCH] intel/fs: always mask the bottom bits of the sampler extended
5 descriptor
7 Fixes a hang in Age Of Empire 4. The HW is hang with the sampler input
8 unit busy. Replaying on simulation showed the extended message length
9 in the extended descriptor is invalid. Since the Anv ensures the input
10 is correct in anv_surface_state_to_handle(), the likely reason for
11 this issue is the use of VK_VALVE_mutable_descriptor_type and the
12 application leaving a previous value for a different descriptor type.
14 Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
15 ---
16 src/intel/compiler/brw_fs.cpp | 2 +-
17 .../compiler/brw_lower_logical_sends.cpp | 20 +++++++++++++++----
18 2 files changed, 17 insertions(+), 5 deletions(-)
20 diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
21 index 0f282dcd345f..165d8b33b677 100644
22 --- a/src/intel/compiler/brw_fs.cpp
23 +++ b/src/intel/compiler/brw_fs.cpp
24 @@ -4421,7 +4421,7 @@ brw_fb_write_msg_control(const fs_inst *inst,
25 return mctl;
28 - /**
29 +/**
30 * Predicate the specified instruction on the sample mask.
32 void
33 diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp
34 index 255ce7594811..e99c7b8ab844 100644
35 --- a/src/intel/compiler/brw_lower_logical_sends.cpp
36 +++ b/src/intel/compiler/brw_lower_logical_sends.cpp
37 @@ -1112,30 +1112,42 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op,
38 inst->src[1] = brw_imm_ud(0);
39 } else if (surface_handle.file != BAD_FILE) {
40 /* Bindless surface */
41 + const fs_builder ubld = bld.group(1, 0).exec_all();
42 assert(devinfo->ver >= 9);
43 inst->desc = brw_sampler_desc(devinfo,
44 GFX9_BTI_BINDLESS,
45 sampler.file == IMM ? sampler.ud % 16 : 0,
46 msg_type,
47 simd_mode,
48 0 /* return_format unused on gfx7+ */);
50 /* For bindless samplers, the entire address is included in the message
51 * header so we can leave the portion in the message descriptor 0.
53 if (sampler_handle.file != BAD_FILE || sampler.file == IMM) {
54 inst->src[0] = brw_imm_ud(0);
55 } else {
56 - const fs_builder ubld = bld.group(1, 0).exec_all();
57 fs_reg desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
58 ubld.SHL(desc, sampler, brw_imm_ud(8));
59 inst->src[0] = desc;
62 - /* We assume that the driver provided the handle in the top 20 bits so
63 - * we can use the surface handle directly as the extended descriptor.
64 + /* We previously assumed that the driver provided the handle in the top
65 + * 20 bits (leaving the bottom 12 bits at 0). But with extensions like
66 + * VK_VALVE_mutable_descriptor_type, the application is more in control
67 + * of the content of VkDescriptors which is where we store
68 + * surface/sampler offsets. We experience GPU hangs because the
69 + * application left an invalid value in the descriptor (probably used
70 + * for another descriptor type than sampler) and the lower 12bits of the
71 + * surface handle overlapping with the extended descriptor length make
72 + * the HW hang. The following AND() clears those bits and fixes a hang
73 + * in Age Of Empire 4.
75 - inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD);
76 + fs_reg ex_desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
77 + ubld.AND(ex_desc,
78 + retype(surface_handle, BRW_REGISTER_TYPE_UD),
79 + brw_imm_ud(INTEL_MASK(31, 12)));
80 + inst->src[1] = component(ex_desc, 0);
81 } else {
82 /* Immediate portion of the descriptor */
83 inst->desc = brw_sampler_desc(devinfo,