1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
3 Date: Mon, 3 Apr 2023 14:52:59 +0300
4 Subject: [PATCH] intel/fs: fix scheduling of HALT instructions
6 With the following test :
8 dEQP-VK.spirv_assembly.instruction.terminate_invocation.terminate.no_out_of_bounds_load
13 ... <- no control flow
17 g4 = get_buffer_size g2
18 ... <- no control flow
20 g5 = send <surface>, g4
22 eliminate_find_live_channel will remove the fbl/broadcast because it
23 assumes lane0 is active at get_buffer_size :
26 ... <- no control flow
28 g4 = get_buffer_size g0
29 ... <- no control flow
31 g5 = send <surface>, g4
33 But then the instruction scheduler will move the get_buffer_size after
37 ... <- no control flow
40 g4 = get_buffer_size g0
41 g5 = send <surface>, g4
43 get_buffer_size pulls the surface index from lane0 in g0 which could
44 have been turned off by the halt and we end up accessing an invalid
47 Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
50 .../compiler/brw_schedule_instructions.cpp | 46 +++++++++++++++++++
51 1 file changed, 46 insertions(+)
53 diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp
54 index 3286e3f83b96..43f63784b2e8 100644
55 --- a/src/intel/compiler/brw_schedule_instructions.cpp
56 +++ b/src/intel/compiler/brw_schedule_instructions.cpp
57 @@ -651,6 +651,7 @@ public:
58 ralloc_free(this->mem_ctx);
60 void add_barrier_deps(schedule_node *n);
61 + void add_cross_lane_deps(schedule_node *n);
62 void add_dep(schedule_node *before, schedule_node *after, int latency);
63 void add_dep(schedule_node *before, schedule_node *after);
65 @@ -1098,6 +1099,28 @@ is_scheduling_barrier(const backend_instruction *inst)
66 inst->has_side_effects();
70 +has_cross_lane_access(const fs_inst *inst)
72 + if (inst->opcode == SHADER_OPCODE_BROADCAST ||
73 + inst->opcode == SHADER_OPCODE_READ_SR_REG ||
74 + inst->opcode == SHADER_OPCODE_CLUSTER_BROADCAST ||
75 + inst->opcode == SHADER_OPCODE_SHUFFLE ||
76 + inst->opcode == FS_OPCODE_LOAD_LIVE_CHANNELS ||
77 + inst->opcode == SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL ||
78 + inst->opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL)
81 + for (unsigned s = 0; s < inst->sources; s++) {
82 + if (inst->src[s].file == VGRF) {
83 + if (inst->src[s].stride == 0)
92 * Sometimes we really want this node to execute after everything that
93 * was before it and before everything that followed it. This adds
94 @@ -1128,6 +1151,25 @@ instruction_scheduler::add_barrier_deps(schedule_node *n)
99 + * Because some instructions like HALT can disable lanes, scheduling prior to
100 + * a cross lane access should not be allowed, otherwise we could end up with
101 + * later instructions accessing uninitialized data.
104 +instruction_scheduler::add_cross_lane_deps(schedule_node *n)
106 + schedule_node *prev = (schedule_node *)n->prev;
109 + while (!prev->is_head_sentinel()) {
110 + if (has_cross_lane_access((fs_inst *)prev->inst))
111 + add_dep(prev, n, 0);
112 + prev = (schedule_node *)prev->prev;
117 /* instruction scheduling needs to be aware of when an MRF write
118 * actually writes 2 MRFs.
120 @@ -1165,6 +1207,10 @@ fs_instruction_scheduler::calculate_deps()
121 if (is_scheduling_barrier(inst))
124 + if (inst->opcode == BRW_OPCODE_HALT ||
125 + inst->opcode == SHADER_OPCODE_HALT_TARGET)
126 + add_cross_lane_deps(n);
128 /* read-after-write deps. */
129 for (int i = 0; i < inst->sources; i++) {
130 if (inst->src[i].file == VGRF) {