archrelease: copy trunk to extra-x86_64
[arch-packages.git] / mesa / repos / extra-x86_64 / 0001-intel-fs-fix-scheduling-of-HALT-instructions.patch
blobd8b44a868dc392d0cf9d1806b11ad4e9a581b9f8
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
3 Date: Mon, 3 Apr 2023 14:52:59 +0300
4 Subject: [PATCH] intel/fs: fix scheduling of HALT instructions
6 With the following test :
8 dEQP-VK.spirv_assembly.instruction.terminate_invocation.terminate.no_out_of_bounds_load
10 There is a :
12 shader_start:
13 ... <- no control flow
14 g0 = some_alu
15 g1 = fbl
16 g2 = broadcast g3, g1
17 g4 = get_buffer_size g2
18 ... <- no control flow
19 halt <- on some lanes
20 g5 = send <surface>, g4
22 eliminate_find_live_channel will remove the fbl/broadcast because it
23 assumes lane0 is active at get_buffer_size :
25 shader_start:
26 ... <- no control flow
27 g0 = some_alu
28 g4 = get_buffer_size g0
29 ... <- no control flow
30 halt <- on some lanes
31 g5 = send <surface>, g4
33 But then the instruction scheduler will move the get_buffer_size after
34 the halt :
36 shader_start:
37 ... <- no control flow
38 halt <- on some lanes
39 g0 = some_alu
40 g4 = get_buffer_size g0
41 g5 = send <surface>, g4
43 get_buffer_size pulls the surface index from lane0 in g0 which could
44 have been turned off by the halt and we end up accessing an invalid
45 surface handle.
47 Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
48 Cc: mesa-stable
49 ---
50 .../compiler/brw_schedule_instructions.cpp | 46 +++++++++++++++++++
51 1 file changed, 46 insertions(+)
53 diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp
54 index 3286e3f83b96..43f63784b2e8 100644
55 --- a/src/intel/compiler/brw_schedule_instructions.cpp
56 +++ b/src/intel/compiler/brw_schedule_instructions.cpp
57 @@ -651,6 +651,7 @@ public:
58 ralloc_free(this->mem_ctx);
60 void add_barrier_deps(schedule_node *n);
61 + void add_cross_lane_deps(schedule_node *n);
62 void add_dep(schedule_node *before, schedule_node *after, int latency);
63 void add_dep(schedule_node *before, schedule_node *after);
65 @@ -1098,6 +1099,28 @@ is_scheduling_barrier(const backend_instruction *inst)
66 inst->has_side_effects();
69 +static bool
70 +has_cross_lane_access(const fs_inst *inst)
72 + if (inst->opcode == SHADER_OPCODE_BROADCAST ||
73 + inst->opcode == SHADER_OPCODE_READ_SR_REG ||
74 + inst->opcode == SHADER_OPCODE_CLUSTER_BROADCAST ||
75 + inst->opcode == SHADER_OPCODE_SHUFFLE ||
76 + inst->opcode == FS_OPCODE_LOAD_LIVE_CHANNELS ||
77 + inst->opcode == SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL ||
78 + inst->opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL)
79 + return true;
81 + for (unsigned s = 0; s < inst->sources; s++) {
82 + if (inst->src[s].file == VGRF) {
83 + if (inst->src[s].stride == 0)
84 + return true;
85 + }
86 + }
88 + return false;
91 /**
92 * Sometimes we really want this node to execute after everything that
93 * was before it and before everything that followed it. This adds
94 @@ -1128,6 +1151,25 @@ instruction_scheduler::add_barrier_deps(schedule_node *n)
98 +/**
99 + * Because some instructions like HALT can disable lanes, scheduling prior to
100 + * a cross lane access should not be allowed, otherwise we could end up with
101 + * later instructions accessing uninitialized data.
102 + */
103 +void
104 +instruction_scheduler::add_cross_lane_deps(schedule_node *n)
106 + schedule_node *prev = (schedule_node *)n->prev;
108 + if (prev) {
109 + while (!prev->is_head_sentinel()) {
110 + if (has_cross_lane_access((fs_inst *)prev->inst))
111 + add_dep(prev, n, 0);
112 + prev = (schedule_node *)prev->prev;
117 /* instruction scheduling needs to be aware of when an MRF write
118 * actually writes 2 MRFs.
120 @@ -1165,6 +1207,10 @@ fs_instruction_scheduler::calculate_deps()
121 if (is_scheduling_barrier(inst))
122 add_barrier_deps(n);
124 + if (inst->opcode == BRW_OPCODE_HALT ||
125 + inst->opcode == SHADER_OPCODE_HALT_TARGET)
126 + add_cross_lane_deps(n);
128 /* read-after-write deps. */
129 for (int i = 0; i < inst->sources; i++) {
130 if (inst->src[i].file == VGRF) {