1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,SI %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,GFX10-WAVE64 %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX10-WAVE32 %s
5 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
7 define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
8 ; GCN-LABEL: test_kill_depth_0_imm_pos:
11 call void @llvm.amdgcn.kill(i1 true)
15 define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
16 ; WAVE64-LABEL: test_kill_depth_0_imm_neg:
18 ; WAVE64-NEXT: s_andn2_b64 exec, exec, exec
19 ; WAVE64-NEXT: s_cbranch_scc0 .LBB1_1
20 ; WAVE64-NEXT: s_endpgm
21 ; WAVE64-NEXT: .LBB1_1:
22 ; WAVE64-NEXT: s_mov_b64 exec, 0
23 ; WAVE64-NEXT: exp null off, off, off, off done vm
24 ; WAVE64-NEXT: s_endpgm
26 ; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg:
27 ; GFX10-WAVE32: ; %bb.0:
28 ; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, exec_lo
29 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB1_1
30 ; GFX10-WAVE32-NEXT: s_endpgm
31 ; GFX10-WAVE32-NEXT: .LBB1_1:
32 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
33 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
34 ; GFX10-WAVE32-NEXT: s_endpgm
36 ; GFX11-LABEL: test_kill_depth_0_imm_neg:
38 ; GFX11-NEXT: s_and_not1_b64 exec, exec, exec
39 ; GFX11-NEXT: s_cbranch_scc0 .LBB1_1
40 ; GFX11-NEXT: s_endpgm
41 ; GFX11-NEXT: .LBB1_1:
42 ; GFX11-NEXT: s_mov_b64 exec, 0
43 ; GFX11-NEXT: exp mrt0 off, off, off, off done
44 ; GFX11-NEXT: s_endpgm
45 call void @llvm.amdgcn.kill(i1 false)
49 ; FIXME: Ideally only one early-exit would be emitted
50 define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
51 ; WAVE64-LABEL: test_kill_depth_0_imm_neg_x2:
53 ; WAVE64-NEXT: s_mov_b64 s[0:1], exec
54 ; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
55 ; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2
56 ; WAVE64-NEXT: ; %bb.1:
57 ; WAVE64-NEXT: s_mov_b64 exec, 0
58 ; WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
59 ; WAVE64-NEXT: s_cbranch_scc0 .LBB2_2
60 ; WAVE64-NEXT: s_endpgm
61 ; WAVE64-NEXT: .LBB2_2:
62 ; WAVE64-NEXT: s_mov_b64 exec, 0
63 ; WAVE64-NEXT: exp null off, off, off, off done vm
64 ; WAVE64-NEXT: s_endpgm
66 ; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg_x2:
67 ; GFX10-WAVE32: ; %bb.0:
68 ; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
69 ; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo
70 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2
71 ; GFX10-WAVE32-NEXT: ; %bb.1:
72 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
73 ; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo
74 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB2_2
75 ; GFX10-WAVE32-NEXT: s_endpgm
76 ; GFX10-WAVE32-NEXT: .LBB2_2:
77 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
78 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
79 ; GFX10-WAVE32-NEXT: s_endpgm
81 ; GFX11-LABEL: test_kill_depth_0_imm_neg_x2:
83 ; GFX11-NEXT: s_mov_b64 s[0:1], exec
84 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
85 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec
86 ; GFX11-NEXT: s_cbranch_scc0 .LBB2_2
87 ; GFX11-NEXT: ; %bb.1:
88 ; GFX11-NEXT: s_mov_b64 exec, 0
89 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
90 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec
91 ; GFX11-NEXT: s_cbranch_scc0 .LBB2_2
92 ; GFX11-NEXT: s_endpgm
93 ; GFX11-NEXT: .LBB2_2:
94 ; GFX11-NEXT: s_mov_b64 exec, 0
95 ; GFX11-NEXT: exp mrt0 off, off, off, off done
96 ; GFX11-NEXT: s_endpgm
97 call void @llvm.amdgcn.kill(i1 false)
98 call void @llvm.amdgcn.kill(i1 false)
102 define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
103 ; WAVE64-LABEL: test_kill_depth_var:
105 ; WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
106 ; WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
107 ; WAVE64-NEXT: s_cbranch_scc0 .LBB3_1
108 ; WAVE64-NEXT: s_endpgm
109 ; WAVE64-NEXT: .LBB3_1:
110 ; WAVE64-NEXT: s_mov_b64 exec, 0
111 ; WAVE64-NEXT: exp null off, off, off, off done vm
112 ; WAVE64-NEXT: s_endpgm
114 ; GFX10-WAVE32-LABEL: test_kill_depth_var:
115 ; GFX10-WAVE32: ; %bb.0:
116 ; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
117 ; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
118 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB3_1
119 ; GFX10-WAVE32-NEXT: s_endpgm
120 ; GFX10-WAVE32-NEXT: .LBB3_1:
121 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
122 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
123 ; GFX10-WAVE32-NEXT: s_endpgm
125 ; GFX11-LABEL: test_kill_depth_var:
127 ; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
128 ; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
129 ; GFX11-NEXT: s_cbranch_scc0 .LBB3_1
130 ; GFX11-NEXT: s_endpgm
131 ; GFX11-NEXT: .LBB3_1:
132 ; GFX11-NEXT: s_mov_b64 exec, 0
133 ; GFX11-NEXT: exp mrt0 off, off, off, off done
134 ; GFX11-NEXT: s_endpgm
135 %cmp = fcmp olt float %x, 0.0
136 call void @llvm.amdgcn.kill(i1 %cmp)
140 ; FIXME: Ideally only one early-exit would be emitted
141 define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
142 ; SI-LABEL: test_kill_depth_var_x2_same:
144 ; SI-NEXT: s_mov_b64 s[0:1], exec
145 ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
146 ; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
147 ; SI-NEXT: s_cbranch_scc0 .LBB4_2
149 ; SI-NEXT: s_andn2_b64 exec, exec, vcc
150 ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
151 ; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
152 ; SI-NEXT: s_cbranch_scc0 .LBB4_2
155 ; SI-NEXT: s_mov_b64 exec, 0
156 ; SI-NEXT: exp null off, off, off, off done vm
159 ; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_same:
160 ; GFX10-WAVE64: ; %bb.0:
161 ; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
162 ; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
163 ; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
164 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2
165 ; GFX10-WAVE64-NEXT: ; %bb.1:
166 ; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
167 ; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
168 ; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
169 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB4_2
170 ; GFX10-WAVE64-NEXT: s_endpgm
171 ; GFX10-WAVE64-NEXT: .LBB4_2:
172 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
173 ; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
174 ; GFX10-WAVE64-NEXT: s_endpgm
176 ; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_same:
177 ; GFX10-WAVE32: ; %bb.0:
178 ; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
179 ; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
180 ; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
181 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2
182 ; GFX10-WAVE32-NEXT: ; %bb.1:
183 ; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
184 ; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
185 ; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
186 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB4_2
187 ; GFX10-WAVE32-NEXT: s_endpgm
188 ; GFX10-WAVE32-NEXT: .LBB4_2:
189 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
190 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
191 ; GFX10-WAVE32-NEXT: s_endpgm
193 ; GFX11-LABEL: test_kill_depth_var_x2_same:
195 ; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
196 ; GFX11-NEXT: s_mov_b64 s[0:1], exec
197 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
198 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
199 ; GFX11-NEXT: s_cbranch_scc0 .LBB4_2
200 ; GFX11-NEXT: ; %bb.1:
201 ; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
202 ; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
203 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
204 ; GFX11-NEXT: s_cbranch_scc0 .LBB4_2
205 ; GFX11-NEXT: s_endpgm
206 ; GFX11-NEXT: .LBB4_2:
207 ; GFX11-NEXT: s_mov_b64 exec, 0
208 ; GFX11-NEXT: exp mrt0 off, off, off, off done
209 ; GFX11-NEXT: s_endpgm
210 %cmp = fcmp olt float %x, 0.0
211 call void @llvm.amdgcn.kill(i1 %cmp)
212 call void @llvm.amdgcn.kill(i1 %cmp)
216 ; FIXME: Ideally only one early-exit would be emitted
217 define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
218 ; SI-LABEL: test_kill_depth_var_x2:
220 ; SI-NEXT: s_mov_b64 s[0:1], exec
221 ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
222 ; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
223 ; SI-NEXT: s_cbranch_scc0 .LBB5_2
225 ; SI-NEXT: s_andn2_b64 exec, exec, vcc
226 ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
227 ; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
228 ; SI-NEXT: s_cbranch_scc0 .LBB5_2
231 ; SI-NEXT: s_mov_b64 exec, 0
232 ; SI-NEXT: exp null off, off, off, off done vm
235 ; GFX10-WAVE64-LABEL: test_kill_depth_var_x2:
236 ; GFX10-WAVE64: ; %bb.0:
237 ; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
238 ; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
239 ; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
240 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2
241 ; GFX10-WAVE64-NEXT: ; %bb.1:
242 ; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
243 ; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
244 ; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
245 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB5_2
246 ; GFX10-WAVE64-NEXT: s_endpgm
247 ; GFX10-WAVE64-NEXT: .LBB5_2:
248 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
249 ; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
250 ; GFX10-WAVE64-NEXT: s_endpgm
252 ; GFX10-WAVE32-LABEL: test_kill_depth_var_x2:
253 ; GFX10-WAVE32: ; %bb.0:
254 ; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
255 ; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
256 ; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
257 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2
258 ; GFX10-WAVE32-NEXT: ; %bb.1:
259 ; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
260 ; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v1
261 ; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
262 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB5_2
263 ; GFX10-WAVE32-NEXT: s_endpgm
264 ; GFX10-WAVE32-NEXT: .LBB5_2:
265 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
266 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
267 ; GFX10-WAVE32-NEXT: s_endpgm
269 ; GFX11-LABEL: test_kill_depth_var_x2:
271 ; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
272 ; GFX11-NEXT: s_mov_b64 s[0:1], exec
273 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
274 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
275 ; GFX11-NEXT: s_cbranch_scc0 .LBB5_2
276 ; GFX11-NEXT: ; %bb.1:
277 ; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
278 ; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
279 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
280 ; GFX11-NEXT: s_cbranch_scc0 .LBB5_2
281 ; GFX11-NEXT: s_endpgm
282 ; GFX11-NEXT: .LBB5_2:
283 ; GFX11-NEXT: s_mov_b64 exec, 0
284 ; GFX11-NEXT: exp mrt0 off, off, off, off done
285 ; GFX11-NEXT: s_endpgm
286 %cmp.x = fcmp olt float %x, 0.0
287 call void @llvm.amdgcn.kill(i1 %cmp.x)
288 %cmp.y = fcmp olt float %y, 0.0
289 call void @llvm.amdgcn.kill(i1 %cmp.y)
293 define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
294 ; SI-LABEL: test_kill_depth_var_x2_instructions:
296 ; SI-NEXT: s_mov_b64 s[0:1], exec
297 ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
298 ; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
299 ; SI-NEXT: s_cbranch_scc0 .LBB6_2
301 ; SI-NEXT: s_andn2_b64 exec, exec, vcc
302 ; SI-NEXT: ;;#ASMSTART
303 ; SI-NEXT: v_mov_b32_e64 v7, -1
305 ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
306 ; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
307 ; SI-NEXT: s_cbranch_scc0 .LBB6_2
310 ; SI-NEXT: s_mov_b64 exec, 0
311 ; SI-NEXT: exp null off, off, off, off done vm
314 ; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_instructions:
315 ; GFX10-WAVE64: ; %bb.0:
316 ; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
317 ; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
318 ; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
319 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2
320 ; GFX10-WAVE64-NEXT: ; %bb.1:
321 ; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
322 ; GFX10-WAVE64-NEXT: ;;#ASMSTART
323 ; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
324 ; GFX10-WAVE64-NEXT: ;;#ASMEND
325 ; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
326 ; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
327 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB6_2
328 ; GFX10-WAVE64-NEXT: s_endpgm
329 ; GFX10-WAVE64-NEXT: .LBB6_2:
330 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
331 ; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
332 ; GFX10-WAVE64-NEXT: s_endpgm
334 ; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_instructions:
335 ; GFX10-WAVE32: ; %bb.0:
336 ; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
337 ; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
338 ; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
339 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2
340 ; GFX10-WAVE32-NEXT: ; %bb.1:
341 ; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
342 ; GFX10-WAVE32-NEXT: ;;#ASMSTART
343 ; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
344 ; GFX10-WAVE32-NEXT: ;;#ASMEND
345 ; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
346 ; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
347 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB6_2
348 ; GFX10-WAVE32-NEXT: s_endpgm
349 ; GFX10-WAVE32-NEXT: .LBB6_2:
350 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
351 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
352 ; GFX10-WAVE32-NEXT: s_endpgm
354 ; GFX11-LABEL: test_kill_depth_var_x2_instructions:
356 ; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
357 ; GFX11-NEXT: s_mov_b64 s[0:1], exec
358 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
359 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
360 ; GFX11-NEXT: s_cbranch_scc0 .LBB6_2
361 ; GFX11-NEXT: ; %bb.1:
362 ; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
363 ; GFX11-NEXT: ;;#ASMSTART
364 ; GFX11-NEXT: v_mov_b32_e64 v7, -1
365 ; GFX11-NEXT: ;;#ASMEND
366 ; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
367 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
368 ; GFX11-NEXT: s_cbranch_scc0 .LBB6_2
369 ; GFX11-NEXT: s_endpgm
370 ; GFX11-NEXT: .LBB6_2:
371 ; GFX11-NEXT: s_mov_b64 exec, 0
372 ; GFX11-NEXT: exp mrt0 off, off, off, off done
373 ; GFX11-NEXT: s_endpgm
374 %cmp.x = fcmp olt float %x, 0.0
375 call void @llvm.amdgcn.kill(i1 %cmp.x)
376 %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"()
377 %cmp.y = fcmp olt float %y, 0.0
378 call void @llvm.amdgcn.kill(i1 %cmp.y)
382 ; FIXME: why does the skip depend on the asm length in the same block?
383 define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) #0 {
384 ; SI-LABEL: test_kill_control_flow:
385 ; SI: ; %bb.0: ; %entry
386 ; SI-NEXT: s_cmp_lg_u32 s0, 0
387 ; SI-NEXT: s_cbranch_scc0 .LBB7_2
388 ; SI-NEXT: ; %bb.1: ; %exit
389 ; SI-NEXT: v_mov_b32_e32 v0, 1.0
390 ; SI-NEXT: s_branch .LBB7_5
391 ; SI-NEXT: .LBB7_2: ; %bb
392 ; SI-NEXT: s_mov_b64 s[2:3], exec
393 ; SI-NEXT: ;;#ASMSTART
394 ; SI-NEXT: v_mov_b32_e64 v7, -1
406 ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
407 ; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
408 ; SI-NEXT: s_cbranch_scc0 .LBB7_4
409 ; SI-NEXT: ; %bb.3: ; %bb
410 ; SI-NEXT: s_andn2_b64 exec, exec, vcc
411 ; SI-NEXT: v_mov_b32_e32 v0, 1.0
412 ; SI-NEXT: s_branch .LBB7_5
414 ; SI-NEXT: s_mov_b64 exec, 0
415 ; SI-NEXT: exp null off, off, off, off done vm
419 ; GFX10-WAVE64-LABEL: test_kill_control_flow:
420 ; GFX10-WAVE64: ; %bb.0: ; %entry
421 ; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0
422 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_2
423 ; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit
424 ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0
425 ; GFX10-WAVE64-NEXT: s_branch .LBB7_5
426 ; GFX10-WAVE64-NEXT: .LBB7_2: ; %bb
427 ; GFX10-WAVE64-NEXT: ;;#ASMSTART
428 ; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
429 ; GFX10-WAVE64-NEXT: v_nop_e64
430 ; GFX10-WAVE64-NEXT: v_nop_e64
431 ; GFX10-WAVE64-NEXT: v_nop_e64
432 ; GFX10-WAVE64-NEXT: v_nop_e64
433 ; GFX10-WAVE64-NEXT: v_nop_e64
434 ; GFX10-WAVE64-NEXT: v_nop_e64
435 ; GFX10-WAVE64-NEXT: v_nop_e64
436 ; GFX10-WAVE64-NEXT: v_nop_e64
437 ; GFX10-WAVE64-NEXT: v_nop_e64
438 ; GFX10-WAVE64-NEXT: v_nop_e64
439 ; GFX10-WAVE64-NEXT: ;;#ASMEND
440 ; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
441 ; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
442 ; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
443 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB7_4
444 ; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb
445 ; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
446 ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 1.0
447 ; GFX10-WAVE64-NEXT: s_branch .LBB7_5
448 ; GFX10-WAVE64-NEXT: .LBB7_4:
449 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
450 ; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
451 ; GFX10-WAVE64-NEXT: s_endpgm
452 ; GFX10-WAVE64-NEXT: .LBB7_5:
454 ; GFX10-WAVE32-LABEL: test_kill_control_flow:
455 ; GFX10-WAVE32: ; %bb.0: ; %entry
456 ; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0
457 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_2
458 ; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit
459 ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0
460 ; GFX10-WAVE32-NEXT: s_branch .LBB7_5
461 ; GFX10-WAVE32-NEXT: .LBB7_2: ; %bb
462 ; GFX10-WAVE32-NEXT: ;;#ASMSTART
463 ; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
464 ; GFX10-WAVE32-NEXT: v_nop_e64
465 ; GFX10-WAVE32-NEXT: v_nop_e64
466 ; GFX10-WAVE32-NEXT: v_nop_e64
467 ; GFX10-WAVE32-NEXT: v_nop_e64
468 ; GFX10-WAVE32-NEXT: v_nop_e64
469 ; GFX10-WAVE32-NEXT: v_nop_e64
470 ; GFX10-WAVE32-NEXT: v_nop_e64
471 ; GFX10-WAVE32-NEXT: v_nop_e64
472 ; GFX10-WAVE32-NEXT: v_nop_e64
473 ; GFX10-WAVE32-NEXT: v_nop_e64
474 ; GFX10-WAVE32-NEXT: ;;#ASMEND
475 ; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
476 ; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
477 ; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo
478 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB7_4
479 ; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb
480 ; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
481 ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 1.0
482 ; GFX10-WAVE32-NEXT: s_branch .LBB7_5
483 ; GFX10-WAVE32-NEXT: .LBB7_4:
484 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
485 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
486 ; GFX10-WAVE32-NEXT: s_endpgm
487 ; GFX10-WAVE32-NEXT: .LBB7_5:
489 ; GFX11-LABEL: test_kill_control_flow:
490 ; GFX11: ; %bb.0: ; %entry
491 ; GFX11-NEXT: s_cmp_lg_u32 s0, 0
492 ; GFX11-NEXT: s_cbranch_scc0 .LBB7_2
493 ; GFX11-NEXT: ; %bb.1: ; %exit
494 ; GFX11-NEXT: v_mov_b32_e32 v0, 1.0
495 ; GFX11-NEXT: s_branch .LBB7_5
496 ; GFX11-NEXT: .LBB7_2: ; %bb
497 ; GFX11-NEXT: ;;#ASMSTART
498 ; GFX11-NEXT: v_mov_b32_e64 v7, -1
499 ; GFX11-NEXT: v_nop_e64
500 ; GFX11-NEXT: v_nop_e64
501 ; GFX11-NEXT: v_nop_e64
502 ; GFX11-NEXT: v_nop_e64
503 ; GFX11-NEXT: v_nop_e64
504 ; GFX11-NEXT: v_nop_e64
505 ; GFX11-NEXT: v_nop_e64
506 ; GFX11-NEXT: v_nop_e64
507 ; GFX11-NEXT: v_nop_e64
508 ; GFX11-NEXT: v_nop_e64
509 ; GFX11-NEXT: ;;#ASMEND
510 ; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
511 ; GFX11-NEXT: s_mov_b64 s[2:3], exec
512 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
513 ; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc
514 ; GFX11-NEXT: s_cbranch_scc0 .LBB7_4
515 ; GFX11-NEXT: ; %bb.3: ; %bb
516 ; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
517 ; GFX11-NEXT: v_mov_b32_e32 v0, 1.0
518 ; GFX11-NEXT: s_branch .LBB7_5
519 ; GFX11-NEXT: .LBB7_4:
520 ; GFX11-NEXT: s_mov_b64 exec, 0
521 ; GFX11-NEXT: exp mrt0 off, off, off, off done
522 ; GFX11-NEXT: s_endpgm
523 ; GFX11-NEXT: .LBB7_5:
525 %cmp = icmp eq i32 %arg, 0
526 br i1 %cmp, label %bb, label %exit
529 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
539 v_nop_e64", "={v7}"()
540 %cmp.var = fcmp olt float %var, 0.0
541 ; TODO: We could do an early-exit here (the branch above is uniform!)
542 call void @llvm.amdgcn.kill(i1 %cmp.var)
549 define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 {
550 ; SI-LABEL: test_kill_control_flow_remainder:
551 ; SI: ; %bb.0: ; %entry
552 ; SI-NEXT: s_cmp_lg_u32 s0, 0
553 ; SI-NEXT: v_mov_b32_e32 v9, 0
554 ; SI-NEXT: s_cbranch_scc1 .LBB8_3
555 ; SI-NEXT: ; %bb.1: ; %bb
556 ; SI-NEXT: s_mov_b64 s[2:3], exec
557 ; SI-NEXT: ;;#ASMSTART
558 ; SI-NEXT: v_mov_b32_e64 v7, -1
571 ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
572 ; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
573 ; SI-NEXT: ;;#ASMSTART
574 ; SI-NEXT: v_mov_b32_e64 v8, -1
576 ; SI-NEXT: s_cbranch_scc0 .LBB8_4
577 ; SI-NEXT: ; %bb.2: ; %bb
578 ; SI-NEXT: s_andn2_b64 exec, exec, vcc
579 ; SI-NEXT: s_mov_b32 s3, 0xf000
580 ; SI-NEXT: s_mov_b32 s2, -1
581 ; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0
582 ; SI-NEXT: s_waitcnt vmcnt(0)
583 ; SI-NEXT: ;;#ASMSTART
584 ; SI-NEXT: v_mov_b32_e64 v9, -2
586 ; SI-NEXT: .LBB8_3: ; %exit
587 ; SI-NEXT: s_mov_b32 s3, 0xf000
588 ; SI-NEXT: s_mov_b32 s2, -1
589 ; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0
592 ; SI-NEXT: s_mov_b64 exec, 0
593 ; SI-NEXT: exp null off, off, off, off done vm
596 ; GFX10-WAVE64-LABEL: test_kill_control_flow_remainder:
597 ; GFX10-WAVE64: ; %bb.0: ; %entry
598 ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v9, 0
599 ; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0
600 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_2
601 ; GFX10-WAVE64-NEXT: ; %bb.1: ; %exit
602 ; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off
603 ; GFX10-WAVE64-NEXT: s_endpgm
604 ; GFX10-WAVE64-NEXT: .LBB8_2: ; %bb
605 ; GFX10-WAVE64-NEXT: ;;#ASMSTART
606 ; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
607 ; GFX10-WAVE64-NEXT: v_nop_e64
608 ; GFX10-WAVE64-NEXT: v_nop_e64
609 ; GFX10-WAVE64-NEXT: v_nop_e64
610 ; GFX10-WAVE64-NEXT: v_nop_e64
611 ; GFX10-WAVE64-NEXT: v_nop_e64
612 ; GFX10-WAVE64-NEXT: v_nop_e64
613 ; GFX10-WAVE64-NEXT: v_nop_e64
614 ; GFX10-WAVE64-NEXT: v_nop_e64
615 ; GFX10-WAVE64-NEXT: v_nop_e64
616 ; GFX10-WAVE64-NEXT: v_nop_e64
617 ; GFX10-WAVE64-NEXT: v_nop_e64
618 ; GFX10-WAVE64-NEXT: ;;#ASMEND
619 ; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
620 ; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
621 ; GFX10-WAVE64-NEXT: ;;#ASMSTART
622 ; GFX10-WAVE64-NEXT: v_mov_b32_e64 v8, -1
623 ; GFX10-WAVE64-NEXT: ;;#ASMEND
624 ; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
625 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB8_4
626 ; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb
627 ; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
628 ; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v8, off
629 ; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
630 ; GFX10-WAVE64-NEXT: ;;#ASMSTART
631 ; GFX10-WAVE64-NEXT: v_mov_b32_e64 v9, -2
632 ; GFX10-WAVE64-NEXT: ;;#ASMEND
633 ; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v9, off
634 ; GFX10-WAVE64-NEXT: s_endpgm
635 ; GFX10-WAVE64-NEXT: .LBB8_4:
636 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
637 ; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
638 ; GFX10-WAVE64-NEXT: s_endpgm
640 ; GFX10-WAVE32-LABEL: test_kill_control_flow_remainder:
641 ; GFX10-WAVE32: ; %bb.0: ; %entry
642 ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v9, 0
643 ; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0
644 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_2
645 ; GFX10-WAVE32-NEXT: ; %bb.1: ; %exit
646 ; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off
647 ; GFX10-WAVE32-NEXT: s_endpgm
648 ; GFX10-WAVE32-NEXT: .LBB8_2: ; %bb
649 ; GFX10-WAVE32-NEXT: ;;#ASMSTART
650 ; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
651 ; GFX10-WAVE32-NEXT: v_nop_e64
652 ; GFX10-WAVE32-NEXT: v_nop_e64
653 ; GFX10-WAVE32-NEXT: v_nop_e64
654 ; GFX10-WAVE32-NEXT: v_nop_e64
655 ; GFX10-WAVE32-NEXT: v_nop_e64
656 ; GFX10-WAVE32-NEXT: v_nop_e64
657 ; GFX10-WAVE32-NEXT: v_nop_e64
658 ; GFX10-WAVE32-NEXT: v_nop_e64
659 ; GFX10-WAVE32-NEXT: v_nop_e64
660 ; GFX10-WAVE32-NEXT: v_nop_e64
661 ; GFX10-WAVE32-NEXT: v_nop_e64
662 ; GFX10-WAVE32-NEXT: ;;#ASMEND
663 ; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
664 ; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
665 ; GFX10-WAVE32-NEXT: ;;#ASMSTART
666 ; GFX10-WAVE32-NEXT: v_mov_b32_e64 v8, -1
667 ; GFX10-WAVE32-NEXT: ;;#ASMEND
668 ; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo
669 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB8_4
670 ; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb
671 ; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
672 ; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v8, off
673 ; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
674 ; GFX10-WAVE32-NEXT: ;;#ASMSTART
675 ; GFX10-WAVE32-NEXT: v_mov_b32_e64 v9, -2
676 ; GFX10-WAVE32-NEXT: ;;#ASMEND
677 ; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v9, off
678 ; GFX10-WAVE32-NEXT: s_endpgm
679 ; GFX10-WAVE32-NEXT: .LBB8_4:
680 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
681 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
682 ; GFX10-WAVE32-NEXT: s_endpgm
684 ; GFX11-LABEL: test_kill_control_flow_remainder:
685 ; GFX11: ; %bb.0: ; %entry
686 ; GFX11-NEXT: v_mov_b32_e32 v9, 0
687 ; GFX11-NEXT: s_cmp_lg_u32 s0, 0
688 ; GFX11-NEXT: s_cbranch_scc0 .LBB8_2
689 ; GFX11-NEXT: ; %bb.1: ; %exit
690 ; GFX11-NEXT: global_store_b32 v[0:1], v9, off
691 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
692 ; GFX11-NEXT: s_endpgm
693 ; GFX11-NEXT: .LBB8_2: ; %bb
694 ; GFX11-NEXT: ;;#ASMSTART
695 ; GFX11-NEXT: v_mov_b32_e64 v7, -1
696 ; GFX11-NEXT: v_nop_e64
697 ; GFX11-NEXT: v_nop_e64
698 ; GFX11-NEXT: v_nop_e64
699 ; GFX11-NEXT: v_nop_e64
700 ; GFX11-NEXT: v_nop_e64
701 ; GFX11-NEXT: v_nop_e64
702 ; GFX11-NEXT: v_nop_e64
703 ; GFX11-NEXT: v_nop_e64
704 ; GFX11-NEXT: v_nop_e64
705 ; GFX11-NEXT: v_nop_e64
706 ; GFX11-NEXT: v_nop_e64
707 ; GFX11-NEXT: ;;#ASMEND
708 ; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
709 ; GFX11-NEXT: s_mov_b64 s[2:3], exec
710 ; GFX11-NEXT: ;;#ASMSTART
711 ; GFX11-NEXT: v_mov_b32_e64 v8, -1
712 ; GFX11-NEXT: ;;#ASMEND
713 ; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc
714 ; GFX11-NEXT: s_cbranch_scc0 .LBB8_4
715 ; GFX11-NEXT: ; %bb.3: ; %bb
716 ; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
717 ; GFX11-NEXT: global_store_b32 v[0:1], v8, off dlc
718 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
719 ; GFX11-NEXT: ;;#ASMSTART
720 ; GFX11-NEXT: v_mov_b32_e64 v9, -2
721 ; GFX11-NEXT: ;;#ASMEND
722 ; GFX11-NEXT: global_store_b32 v[0:1], v9, off
723 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
724 ; GFX11-NEXT: s_endpgm
725 ; GFX11-NEXT: .LBB8_4:
726 ; GFX11-NEXT: s_mov_b64 exec, 0
727 ; GFX11-NEXT: exp mrt0 off, off, off, off done
728 ; GFX11-NEXT: s_endpgm
730 %cmp = icmp eq i32 %arg, 0
731 br i1 %cmp, label %bb, label %exit
734 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
745 v_nop_e64", "={v7}"()
746 %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"()
747 %cmp.var = fcmp olt float %var, 0.0
748 ; TODO: We could do an early-exit here (the branch above is uniform!)
749 call void @llvm.amdgcn.kill(i1 %cmp.var)
750 store volatile float %live.across, float addrspace(1)* undef
751 %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"()
755 %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ]
756 store float %phi, float addrspace(1)* undef
760 define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
761 ; SI-LABEL: test_kill_control_flow_return:
762 ; SI: ; %bb.0: ; %entry
763 ; SI-NEXT: s_cmp_eq_u32 s0, 1
764 ; SI-NEXT: s_cselect_b64 s[4:5], -1, 0
765 ; SI-NEXT: s_mov_b64 s[2:3], exec
766 ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], exec
767 ; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
768 ; SI-NEXT: s_cbranch_scc0 .LBB9_4
769 ; SI-NEXT: ; %bb.1: ; %entry
770 ; SI-NEXT: s_and_b64 exec, exec, s[2:3]
771 ; SI-NEXT: s_cmp_lg_u32 s0, 0
772 ; SI-NEXT: v_mov_b32_e32 v0, 0
773 ; SI-NEXT: s_cbranch_scc0 .LBB9_3
774 ; SI-NEXT: ; %bb.2: ; %exit
775 ; SI-NEXT: s_branch .LBB9_5
776 ; SI-NEXT: .LBB9_3: ; %bb
777 ; SI-NEXT: ;;#ASMSTART
778 ; SI-NEXT: v_mov_b32_e64 v7, -1
790 ; SI-NEXT: v_mov_b32_e32 v0, v7
791 ; SI-NEXT: s_branch .LBB9_5
793 ; SI-NEXT: s_mov_b64 exec, 0
794 ; SI-NEXT: exp null off, off, off, off done vm
798 ; GFX10-WAVE64-LABEL: test_kill_control_flow_return:
799 ; GFX10-WAVE64: ; %bb.0: ; %entry
800 ; GFX10-WAVE64-NEXT: s_cmp_eq_u32 s0, 1
801 ; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
802 ; GFX10-WAVE64-NEXT: s_cselect_b64 s[4:5], -1, 0
803 ; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], s[4:5], exec
804 ; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
805 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_4
806 ; GFX10-WAVE64-NEXT: ; %bb.1: ; %entry
807 ; GFX10-WAVE64-NEXT: s_and_b64 exec, exec, s[2:3]
808 ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 0
809 ; GFX10-WAVE64-NEXT: s_cmp_lg_u32 s0, 0
810 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB9_3
811 ; GFX10-WAVE64-NEXT: ; %bb.2: ; %exit
812 ; GFX10-WAVE64-NEXT: s_branch .LBB9_5
813 ; GFX10-WAVE64-NEXT: .LBB9_3: ; %bb
814 ; GFX10-WAVE64-NEXT: ;;#ASMSTART
815 ; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
816 ; GFX10-WAVE64-NEXT: v_nop_e64
817 ; GFX10-WAVE64-NEXT: v_nop_e64
818 ; GFX10-WAVE64-NEXT: v_nop_e64
819 ; GFX10-WAVE64-NEXT: v_nop_e64
820 ; GFX10-WAVE64-NEXT: v_nop_e64
821 ; GFX10-WAVE64-NEXT: v_nop_e64
822 ; GFX10-WAVE64-NEXT: v_nop_e64
823 ; GFX10-WAVE64-NEXT: v_nop_e64
824 ; GFX10-WAVE64-NEXT: v_nop_e64
825 ; GFX10-WAVE64-NEXT: v_nop_e64
826 ; GFX10-WAVE64-NEXT: ;;#ASMEND
827 ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, v7
828 ; GFX10-WAVE64-NEXT: s_branch .LBB9_5
829 ; GFX10-WAVE64-NEXT: .LBB9_4:
830 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
831 ; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
832 ; GFX10-WAVE64-NEXT: s_endpgm
833 ; GFX10-WAVE64-NEXT: .LBB9_5:
835 ; GFX10-WAVE32-LABEL: test_kill_control_flow_return:
836 ; GFX10-WAVE32: ; %bb.0: ; %entry
837 ; GFX10-WAVE32-NEXT: s_cmp_eq_u32 s0, 1
838 ; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
839 ; GFX10-WAVE32-NEXT: s_cselect_b32 s2, -1, 0
840 ; GFX10-WAVE32-NEXT: s_xor_b32 s2, s2, exec_lo
841 ; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, s2
842 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_4
843 ; GFX10-WAVE32-NEXT: ; %bb.1: ; %entry
844 ; GFX10-WAVE32-NEXT: s_and_b32 exec_lo, exec_lo, s1
845 ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 0
846 ; GFX10-WAVE32-NEXT: s_cmp_lg_u32 s0, 0
847 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB9_3
848 ; GFX10-WAVE32-NEXT: ; %bb.2: ; %exit
849 ; GFX10-WAVE32-NEXT: s_branch .LBB9_5
850 ; GFX10-WAVE32-NEXT: .LBB9_3: ; %bb
851 ; GFX10-WAVE32-NEXT: ;;#ASMSTART
852 ; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
853 ; GFX10-WAVE32-NEXT: v_nop_e64
854 ; GFX10-WAVE32-NEXT: v_nop_e64
855 ; GFX10-WAVE32-NEXT: v_nop_e64
856 ; GFX10-WAVE32-NEXT: v_nop_e64
857 ; GFX10-WAVE32-NEXT: v_nop_e64
858 ; GFX10-WAVE32-NEXT: v_nop_e64
859 ; GFX10-WAVE32-NEXT: v_nop_e64
860 ; GFX10-WAVE32-NEXT: v_nop_e64
861 ; GFX10-WAVE32-NEXT: v_nop_e64
862 ; GFX10-WAVE32-NEXT: v_nop_e64
863 ; GFX10-WAVE32-NEXT: ;;#ASMEND
864 ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, v7
865 ; GFX10-WAVE32-NEXT: s_branch .LBB9_5
866 ; GFX10-WAVE32-NEXT: .LBB9_4:
867 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
868 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
869 ; GFX10-WAVE32-NEXT: s_endpgm
870 ; GFX10-WAVE32-NEXT: .LBB9_5:
872 ; GFX11-LABEL: test_kill_control_flow_return:
873 ; GFX11: ; %bb.0: ; %entry
874 ; GFX11-NEXT: s_cmp_eq_u32 s0, 1
875 ; GFX11-NEXT: s_mov_b64 s[2:3], exec
876 ; GFX11-NEXT: s_cselect_b64 s[4:5], -1, 0
877 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
878 ; GFX11-NEXT: s_xor_b64 s[4:5], s[4:5], exec
879 ; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], s[4:5]
880 ; GFX11-NEXT: s_cbranch_scc0 .LBB9_4
881 ; GFX11-NEXT: ; %bb.1: ; %entry
882 ; GFX11-NEXT: s_and_b64 exec, exec, s[2:3]
883 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
884 ; GFX11-NEXT: s_cmp_lg_u32 s0, 0
885 ; GFX11-NEXT: s_cbranch_scc0 .LBB9_3
886 ; GFX11-NEXT: ; %bb.2: ; %exit
887 ; GFX11-NEXT: s_branch .LBB9_5
888 ; GFX11-NEXT: .LBB9_3: ; %bb
889 ; GFX11-NEXT: ;;#ASMSTART
890 ; GFX11-NEXT: v_mov_b32_e64 v7, -1
891 ; GFX11-NEXT: v_nop_e64
892 ; GFX11-NEXT: v_nop_e64
893 ; GFX11-NEXT: v_nop_e64
894 ; GFX11-NEXT: v_nop_e64
895 ; GFX11-NEXT: v_nop_e64
896 ; GFX11-NEXT: v_nop_e64
897 ; GFX11-NEXT: v_nop_e64
898 ; GFX11-NEXT: v_nop_e64
899 ; GFX11-NEXT: v_nop_e64
900 ; GFX11-NEXT: v_nop_e64
901 ; GFX11-NEXT: ;;#ASMEND
902 ; GFX11-NEXT: v_mov_b32_e32 v0, v7
903 ; GFX11-NEXT: s_branch .LBB9_5
904 ; GFX11-NEXT: .LBB9_4:
905 ; GFX11-NEXT: s_mov_b64 exec, 0
906 ; GFX11-NEXT: exp mrt0 off, off, off, off done
907 ; GFX11-NEXT: s_endpgm
908 ; GFX11-NEXT: .LBB9_5:
910 %kill = icmp eq i32 %arg, 1
911 %cmp = icmp eq i32 %arg, 0
912 call void @llvm.amdgcn.kill(i1 %kill)
913 br i1 %cmp, label %bb, label %exit
916 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
926 v_nop_e64", "={v7}"()
930 %ret = phi float [ %var, %bb ], [ 0.0, %entry ]
934 define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
935 ; SI-LABEL: test_kill_divergent_loop:
936 ; SI: ; %bb.0: ; %entry
937 ; SI-NEXT: s_mov_b64 s[0:1], exec
938 ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
939 ; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
940 ; SI-NEXT: s_xor_b64 s[4:5], exec, s[2:3]
941 ; SI-NEXT: s_cbranch_execz .LBB10_4
942 ; SI-NEXT: ; %bb.1: ; %bb.preheader
943 ; SI-NEXT: s_mov_b32 s3, 0xf000
944 ; SI-NEXT: s_mov_b32 s2, -1
945 ; SI-NEXT: .LBB10_2: ; %bb
946 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1
947 ; SI-NEXT: ;;#ASMSTART
948 ; SI-NEXT: v_mov_b32_e64 v7, -1
960 ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
961 ; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
962 ; SI-NEXT: s_cbranch_scc0 .LBB10_5
963 ; SI-NEXT: ; %bb.3: ; %bb
964 ; SI-NEXT: ; in Loop: Header=BB10_2 Depth=1
965 ; SI-NEXT: s_andn2_b64 exec, exec, vcc
966 ; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
967 ; SI-NEXT: s_waitcnt vmcnt(0)
968 ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
969 ; SI-NEXT: s_cbranch_vccnz .LBB10_2
970 ; SI-NEXT: .LBB10_4: ; %Flow1
971 ; SI-NEXT: s_or_b64 exec, exec, s[4:5]
972 ; SI-NEXT: s_mov_b32 s3, 0xf000
973 ; SI-NEXT: s_mov_b32 s2, -1
974 ; SI-NEXT: v_mov_b32_e32 v0, 8
975 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
976 ; SI-NEXT: s_waitcnt vmcnt(0)
979 ; SI-NEXT: s_mov_b64 exec, 0
980 ; SI-NEXT: exp null off, off, off, off done vm
983 ; GFX10-WAVE64-LABEL: test_kill_divergent_loop:
984 ; GFX10-WAVE64: ; %bb.0: ; %entry
985 ; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
986 ; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
987 ; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc
988 ; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
989 ; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB10_3
990 ; GFX10-WAVE64-NEXT: .LBB10_1: ; %bb
991 ; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1
992 ; GFX10-WAVE64-NEXT: ;;#ASMSTART
993 ; GFX10-WAVE64-NEXT: v_mov_b32_e64 v7, -1
994 ; GFX10-WAVE64-NEXT: v_nop_e64
995 ; GFX10-WAVE64-NEXT: v_nop_e64
996 ; GFX10-WAVE64-NEXT: v_nop_e64
997 ; GFX10-WAVE64-NEXT: v_nop_e64
998 ; GFX10-WAVE64-NEXT: v_nop_e64
999 ; GFX10-WAVE64-NEXT: v_nop_e64
1000 ; GFX10-WAVE64-NEXT: v_nop_e64
1001 ; GFX10-WAVE64-NEXT: v_nop_e64
1002 ; GFX10-WAVE64-NEXT: v_nop_e64
1003 ; GFX10-WAVE64-NEXT: v_nop_e64
1004 ; GFX10-WAVE64-NEXT: ;;#ASMEND
1005 ; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
1006 ; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], vcc
1007 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB10_4
1008 ; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb
1009 ; GFX10-WAVE64-NEXT: ; in Loop: Header=BB10_1 Depth=1
1010 ; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
1011 ; GFX10-WAVE64-NEXT: global_load_dword v0, v[0:1], off glc dlc
1012 ; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0)
1013 ; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1014 ; GFX10-WAVE64-NEXT: s_cbranch_vccnz .LBB10_1
1015 ; GFX10-WAVE64-NEXT: .LBB10_3: ; %Flow1
1016 ; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[2:3]
1017 ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 8
1018 ; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off
1019 ; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
1020 ; GFX10-WAVE64-NEXT: s_endpgm
1021 ; GFX10-WAVE64-NEXT: .LBB10_4:
1022 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
1023 ; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
1024 ; GFX10-WAVE64-NEXT: s_endpgm
1026 ; GFX10-WAVE32-LABEL: test_kill_divergent_loop:
1027 ; GFX10-WAVE32: ; %bb.0: ; %entry
1028 ; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1029 ; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
1030 ; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo
1031 ; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1
1032 ; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB10_3
1033 ; GFX10-WAVE32-NEXT: .LBB10_1: ; %bb
1034 ; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
1035 ; GFX10-WAVE32-NEXT: ;;#ASMSTART
1036 ; GFX10-WAVE32-NEXT: v_mov_b32_e64 v7, -1
1037 ; GFX10-WAVE32-NEXT: v_nop_e64
1038 ; GFX10-WAVE32-NEXT: v_nop_e64
1039 ; GFX10-WAVE32-NEXT: v_nop_e64
1040 ; GFX10-WAVE32-NEXT: v_nop_e64
1041 ; GFX10-WAVE32-NEXT: v_nop_e64
1042 ; GFX10-WAVE32-NEXT: v_nop_e64
1043 ; GFX10-WAVE32-NEXT: v_nop_e64
1044 ; GFX10-WAVE32-NEXT: v_nop_e64
1045 ; GFX10-WAVE32-NEXT: v_nop_e64
1046 ; GFX10-WAVE32-NEXT: v_nop_e64
1047 ; GFX10-WAVE32-NEXT: ;;#ASMEND
1048 ; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v7
1049 ; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, vcc_lo
1050 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB10_4
1051 ; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb
1052 ; GFX10-WAVE32-NEXT: ; in Loop: Header=BB10_1 Depth=1
1053 ; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
1054 ; GFX10-WAVE32-NEXT: global_load_dword v0, v[0:1], off glc dlc
1055 ; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0)
1056 ; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
1057 ; GFX10-WAVE32-NEXT: s_cbranch_vccnz .LBB10_1
1058 ; GFX10-WAVE32-NEXT: .LBB10_3: ; %Flow1
1059 ; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s1
1060 ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 8
1061 ; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off
1062 ; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
1063 ; GFX10-WAVE32-NEXT: s_endpgm
1064 ; GFX10-WAVE32-NEXT: .LBB10_4:
1065 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
1066 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
1067 ; GFX10-WAVE32-NEXT: s_endpgm
1069 ; GFX11-LABEL: test_kill_divergent_loop:
1070 ; GFX11: ; %bb.0: ; %entry
1071 ; GFX11-NEXT: s_mov_b64 s[0:1], exec
1072 ; GFX11-NEXT: s_mov_b64 s[2:3], exec
1073 ; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0
1074 ; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
1075 ; GFX11-NEXT: s_cbranch_execz .LBB10_3
1076 ; GFX11-NEXT: .LBB10_1: ; %bb
1077 ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
1078 ; GFX11-NEXT: ;;#ASMSTART
1079 ; GFX11-NEXT: v_mov_b32_e64 v7, -1
1080 ; GFX11-NEXT: v_nop_e64
1081 ; GFX11-NEXT: v_nop_e64
1082 ; GFX11-NEXT: v_nop_e64
1083 ; GFX11-NEXT: v_nop_e64
1084 ; GFX11-NEXT: v_nop_e64
1085 ; GFX11-NEXT: v_nop_e64
1086 ; GFX11-NEXT: v_nop_e64
1087 ; GFX11-NEXT: v_nop_e64
1088 ; GFX11-NEXT: v_nop_e64
1089 ; GFX11-NEXT: v_nop_e64
1090 ; GFX11-NEXT: ;;#ASMEND
1091 ; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v7
1092 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], vcc
1093 ; GFX11-NEXT: s_cbranch_scc0 .LBB10_4
1094 ; GFX11-NEXT: ; %bb.2: ; %bb
1095 ; GFX11-NEXT: ; in Loop: Header=BB10_1 Depth=1
1096 ; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
1097 ; GFX11-NEXT: global_load_b32 v0, v[0:1], off glc dlc
1098 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1099 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1100 ; GFX11-NEXT: s_cbranch_vccnz .LBB10_1
1101 ; GFX11-NEXT: .LBB10_3: ; %Flow1
1102 ; GFX11-NEXT: s_or_b64 exec, exec, s[2:3]
1103 ; GFX11-NEXT: v_mov_b32_e32 v0, 8
1104 ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
1105 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1106 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1107 ; GFX11-NEXT: s_endpgm
1108 ; GFX11-NEXT: .LBB10_4:
1109 ; GFX11-NEXT: s_mov_b64 exec, 0
1110 ; GFX11-NEXT: exp mrt0 off, off, off, off done
1111 ; GFX11-NEXT: s_endpgm
1113 %cmp = icmp eq i32 %arg, 0
1114 br i1 %cmp, label %bb, label %exit
1117 %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
1127 v_nop_e64", "={v7}"()
1128 %cmp.var = fcmp olt float %var, 0.0
1129 call void @llvm.amdgcn.kill(i1 %cmp.var)
1130 %vgpr = load volatile i32, i32 addrspace(1)* undef
1131 %loop.cond = icmp eq i32 %vgpr, 0
1132 br i1 %loop.cond, label %bb, label %exit
1135 store volatile i32 8, i32 addrspace(1)* undef
1140 define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
1141 ; SI-LABEL: phi_use_def_before_kill:
1142 ; SI: ; %bb.0: ; %bb
1143 ; SI-NEXT: v_add_f32_e64 v1, s0, 1.0
1144 ; SI-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
1145 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
1146 ; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
1147 ; SI-NEXT: s_andn2_b64 exec, exec, vcc
1148 ; SI-NEXT: s_cbranch_scc0 .LBB11_6
1149 ; SI-NEXT: ; %bb.1: ; %bb
1150 ; SI-NEXT: s_andn2_b64 exec, exec, vcc
1151 ; SI-NEXT: s_cbranch_scc0 .LBB11_3
1152 ; SI-NEXT: ; %bb.2: ; %bb8
1153 ; SI-NEXT: s_mov_b32 s3, 0xf000
1154 ; SI-NEXT: s_mov_b32 s2, -1
1155 ; SI-NEXT: v_mov_b32_e32 v0, 8
1156 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1157 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1158 ; SI-NEXT: v_mov_b32_e32 v0, 4.0
1159 ; SI-NEXT: .LBB11_3: ; %phibb
1160 ; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
1161 ; SI-NEXT: s_cbranch_vccz .LBB11_5
1162 ; SI-NEXT: ; %bb.4: ; %bb10
1163 ; SI-NEXT: s_mov_b32 s3, 0xf000
1164 ; SI-NEXT: s_mov_b32 s2, -1
1165 ; SI-NEXT: v_mov_b32_e32 v0, 9
1166 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1167 ; SI-NEXT: s_waitcnt vmcnt(0)
1168 ; SI-NEXT: .LBB11_5: ; %end
1170 ; SI-NEXT: .LBB11_6:
1171 ; SI-NEXT: s_mov_b64 exec, 0
1172 ; SI-NEXT: exp null off, off, off, off done vm
1175 ; GFX10-WAVE64-LABEL: phi_use_def_before_kill:
1176 ; GFX10-WAVE64: ; %bb.0: ; %bb
1177 ; GFX10-WAVE64-NEXT: v_add_f32_e64 v1, s0, 1.0
1178 ; GFX10-WAVE64-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
1179 ; GFX10-WAVE64-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
1180 ; GFX10-WAVE64-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
1181 ; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
1182 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_6
1183 ; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb
1184 ; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
1185 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB11_3
1186 ; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb8
1187 ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v1, 8
1188 ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 4.0
1189 ; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v1, off
1190 ; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
1191 ; GFX10-WAVE64-NEXT: .LBB11_3: ; %phibb
1192 ; GFX10-WAVE64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
1193 ; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB11_5
1194 ; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb10
1195 ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9
1196 ; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off
1197 ; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
1198 ; GFX10-WAVE64-NEXT: .LBB11_5: ; %end
1199 ; GFX10-WAVE64-NEXT: s_endpgm
1200 ; GFX10-WAVE64-NEXT: .LBB11_6:
1201 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
1202 ; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
1203 ; GFX10-WAVE64-NEXT: s_endpgm
1205 ; GFX10-WAVE32-LABEL: phi_use_def_before_kill:
1206 ; GFX10-WAVE32: ; %bb.0: ; %bb
1207 ; GFX10-WAVE32-NEXT: v_add_f32_e64 v1, s0, 1.0
1208 ; GFX10-WAVE32-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0, v1
1209 ; GFX10-WAVE32-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
1210 ; GFX10-WAVE32-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0, v1
1211 ; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
1212 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_6
1213 ; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb
1214 ; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
1215 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB11_3
1216 ; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb8
1217 ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v1, 8
1218 ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 4.0
1219 ; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v1, off
1220 ; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
1221 ; GFX10-WAVE32-NEXT: .LBB11_3: ; %phibb
1222 ; GFX10-WAVE32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
1223 ; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB11_5
1224 ; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb10
1225 ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9
1226 ; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off
1227 ; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
1228 ; GFX10-WAVE32-NEXT: .LBB11_5: ; %end
1229 ; GFX10-WAVE32-NEXT: s_endpgm
1230 ; GFX10-WAVE32-NEXT: .LBB11_6:
1231 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
1232 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
1233 ; GFX10-WAVE32-NEXT: s_endpgm
1235 ; GFX11-LABEL: phi_use_def_before_kill:
1236 ; GFX11: ; %bb.0: ; %bb
1237 ; GFX11-NEXT: v_add_f32_e64 v1, s0, 1.0
1238 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1239 ; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
1240 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
1241 ; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
1242 ; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
1243 ; GFX11-NEXT: s_cbranch_scc0 .LBB11_6
1244 ; GFX11-NEXT: ; %bb.1: ; %bb
1245 ; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
1246 ; GFX11-NEXT: s_cbranch_scc0 .LBB11_3
1247 ; GFX11-NEXT: ; %bb.2: ; %bb8
1248 ; GFX11-NEXT: v_mov_b32_e32 v1, 8
1249 ; GFX11-NEXT: v_mov_b32_e32 v0, 4.0
1250 ; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc
1251 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1252 ; GFX11-NEXT: .LBB11_3: ; %phibb
1253 ; GFX11-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
1254 ; GFX11-NEXT: s_cbranch_vccz .LBB11_5
1255 ; GFX11-NEXT: ; %bb.4: ; %bb10
1256 ; GFX11-NEXT: v_mov_b32_e32 v0, 9
1257 ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
1258 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1259 ; GFX11-NEXT: .LBB11_5: ; %end
1260 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1261 ; GFX11-NEXT: s_endpgm
1262 ; GFX11-NEXT: .LBB11_6:
1263 ; GFX11-NEXT: s_mov_b64 exec, 0
1264 ; GFX11-NEXT: exp mrt0 off, off, off, off done
1265 ; GFX11-NEXT: s_endpgm
1267 %tmp = fadd float %x, 1.000000e+00
1268 %tmp1 = fcmp olt float 0.000000e+00, %tmp
1269 %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00
1270 %cmp.tmp2 = fcmp olt float %tmp2, 0.0
1271 call void @llvm.amdgcn.kill(i1 %cmp.tmp2)
1272 br i1 undef, label %phibb, label %bb8
1275 %tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ]
1276 %tmp6 = fcmp oeq float %tmp5, 0.000000e+00
1277 br i1 %tmp6, label %bb10, label %end
1280 store volatile i32 8, i32 addrspace(1)* undef
1284 store volatile i32 9, i32 addrspace(1)* undef
1291 define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
1292 ; SI-LABEL: no_skip_no_successors:
1293 ; SI: ; %bb.0: ; %bb
1294 ; SI-NEXT: v_cmp_nge_f32_e64 s[4:5], s1, 0
1295 ; SI-NEXT: s_mov_b64 s[2:3], exec
1296 ; SI-NEXT: s_mov_b64 s[0:1], -1
1297 ; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
1298 ; SI-NEXT: s_cbranch_vccz .LBB12_3
1299 ; SI-NEXT: ; %bb.1: ; %Flow
1300 ; SI-NEXT: s_and_b64 vcc, exec, s[0:1]
1301 ; SI-NEXT: s_cbranch_vccnz .LBB12_4
1302 ; SI-NEXT: .LBB12_2: ; %UnifiedUnreachableBlock
1303 ; SI-NEXT: .LBB12_3: ; %bb3
1304 ; SI-NEXT: s_branch .LBB12_2
1305 ; SI-NEXT: .LBB12_4: ; %bb6
1306 ; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
1307 ; SI-NEXT: s_cbranch_scc0 .LBB12_6
1308 ; SI-NEXT: ; %bb.5: ; %bb6
1309 ; SI-NEXT: s_mov_b64 exec, 0
1310 ; SI-NEXT: .LBB12_6:
1311 ; SI-NEXT: s_mov_b64 exec, 0
1312 ; SI-NEXT: exp null off, off, off, off done vm
1315 ; GFX10-WAVE64-LABEL: no_skip_no_successors:
1316 ; GFX10-WAVE64: ; %bb.0: ; %bb
1317 ; GFX10-WAVE64-NEXT: v_cmp_nge_f32_e64 s[0:1], s1, 0
1318 ; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
1319 ; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[0:1]
1320 ; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], -1
1321 ; GFX10-WAVE64-NEXT: s_cbranch_vccz .LBB12_3
1322 ; GFX10-WAVE64-NEXT: ; %bb.1: ; %Flow
1323 ; GFX10-WAVE64-NEXT: s_and_b64 vcc, exec, s[0:1]
1324 ; GFX10-WAVE64-NEXT: s_cbranch_vccnz .LBB12_4
1325 ; GFX10-WAVE64-NEXT: .LBB12_2: ; %UnifiedUnreachableBlock
1326 ; GFX10-WAVE64-NEXT: .LBB12_3: ; %bb3
1327 ; GFX10-WAVE64-NEXT: s_branch .LBB12_2
1328 ; GFX10-WAVE64-NEXT: .LBB12_4: ; %bb6
1329 ; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
1330 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB12_6
1331 ; GFX10-WAVE64-NEXT: ; %bb.5: ; %bb6
1332 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
1333 ; GFX10-WAVE64-NEXT: .LBB12_6:
1334 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
1335 ; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
1336 ; GFX10-WAVE64-NEXT: s_endpgm
1338 ; GFX10-WAVE32-LABEL: no_skip_no_successors:
1339 ; GFX10-WAVE32: ; %bb.0: ; %bb
1340 ; GFX10-WAVE32-NEXT: v_cmp_nge_f32_e64 s1, s1, 0
1341 ; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
1342 ; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s1
1343 ; GFX10-WAVE32-NEXT: s_mov_b32 s1, -1
1344 ; GFX10-WAVE32-NEXT: s_cbranch_vccz .LBB12_3
1345 ; GFX10-WAVE32-NEXT: ; %bb.1: ; %Flow
1346 ; GFX10-WAVE32-NEXT: s_and_b32 vcc_lo, exec_lo, s1
1347 ; GFX10-WAVE32-NEXT: s_cbranch_vccnz .LBB12_4
1348 ; GFX10-WAVE32-NEXT: .LBB12_2: ; %UnifiedUnreachableBlock
1349 ; GFX10-WAVE32-NEXT: .LBB12_3: ; %bb3
1350 ; GFX10-WAVE32-NEXT: s_branch .LBB12_2
1351 ; GFX10-WAVE32-NEXT: .LBB12_4: ; %bb6
1352 ; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo
1353 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB12_6
1354 ; GFX10-WAVE32-NEXT: ; %bb.5: ; %bb6
1355 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
1356 ; GFX10-WAVE32-NEXT: .LBB12_6:
1357 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
1358 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
1359 ; GFX10-WAVE32-NEXT: s_endpgm
1361 ; GFX11-LABEL: no_skip_no_successors:
1362 ; GFX11: ; %bb.0: ; %bb
1363 ; GFX11-NEXT: v_cmp_nge_f32_e64 s[0:1], s1, 0
1364 ; GFX11-NEXT: s_mov_b64 s[2:3], exec
1365 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1366 ; GFX11-NEXT: s_and_b64 vcc, exec, s[0:1]
1367 ; GFX11-NEXT: s_mov_b64 s[0:1], -1
1368 ; GFX11-NEXT: s_cbranch_vccz .LBB12_3
1369 ; GFX11-NEXT: ; %bb.1: ; %Flow
1370 ; GFX11-NEXT: s_and_b64 vcc, exec, s[0:1]
1371 ; GFX11-NEXT: s_cbranch_vccnz .LBB12_4
1372 ; GFX11-NEXT: .LBB12_2: ; %UnifiedUnreachableBlock
1373 ; GFX11-NEXT: .LBB12_3: ; %bb3
1374 ; GFX11-NEXT: s_branch .LBB12_2
1375 ; GFX11-NEXT: .LBB12_4: ; %bb6
1376 ; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], exec
1377 ; GFX11-NEXT: s_cbranch_scc0 .LBB12_6
1378 ; GFX11-NEXT: ; %bb.5: ; %bb6
1379 ; GFX11-NEXT: s_mov_b64 exec, 0
1380 ; GFX11-NEXT: .LBB12_6:
1381 ; GFX11-NEXT: s_mov_b64 exec, 0
1382 ; GFX11-NEXT: exp mrt0 off, off, off, off done
1383 ; GFX11-NEXT: s_endpgm
1385 %tmp = fcmp ult float %arg1, 0.000000e+00
1386 br i1 %tmp, label %bb6, label %bb3
1389 %tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000
1390 br i1 %tmp2, label %bb5, label %bb4
1393 br i1 true, label %bb5, label %bb7
1395 bb5: ; preds = %bb4, %bb3
1399 call void @llvm.amdgcn.kill(i1 false)
1406 define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 {
1407 ; SI-LABEL: if_after_kill_block:
1408 ; SI: ; %bb.0: ; %bb
1409 ; SI-NEXT: s_mov_b64 s[2:3], exec
1410 ; SI-NEXT: s_wqm_b64 exec, exec
1411 ; SI-NEXT: s_mov_b32 s0, 0
1412 ; SI-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1
1413 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
1414 ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
1415 ; SI-NEXT: s_cbranch_execz .LBB13_3
1416 ; SI-NEXT: ; %bb.1: ; %bb3
1417 ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
1418 ; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
1419 ; SI-NEXT: s_cbranch_scc0 .LBB13_6
1420 ; SI-NEXT: ; %bb.2: ; %bb3
1421 ; SI-NEXT: s_andn2_b64 exec, exec, vcc
1422 ; SI-NEXT: .LBB13_3: ; %bb4
1423 ; SI-NEXT: s_or_b64 exec, exec, s[4:5]
1424 ; SI-NEXT: s_mov_b32 s1, s0
1425 ; SI-NEXT: s_mov_b32 s2, s0
1426 ; SI-NEXT: s_mov_b32 s3, s0
1427 ; SI-NEXT: s_mov_b32 s4, s0
1428 ; SI-NEXT: s_mov_b32 s5, s0
1429 ; SI-NEXT: s_mov_b32 s6, s0
1430 ; SI-NEXT: s_mov_b32 s7, s0
1431 ; SI-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10
1432 ; SI-NEXT: s_waitcnt vmcnt(0)
1433 ; SI-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
1434 ; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc
1435 ; SI-NEXT: s_cbranch_execz .LBB13_5
1436 ; SI-NEXT: ; %bb.4: ; %bb8
1437 ; SI-NEXT: s_mov_b32 s3, 0xf000
1438 ; SI-NEXT: s_mov_b32 s2, -1
1439 ; SI-NEXT: v_mov_b32_e32 v0, 9
1440 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
1441 ; SI-NEXT: s_waitcnt vmcnt(0)
1442 ; SI-NEXT: .LBB13_5: ; %UnifiedReturnBlock
1444 ; SI-NEXT: .LBB13_6:
1445 ; SI-NEXT: s_mov_b64 exec, 0
1446 ; SI-NEXT: exp null off, off, off, off done vm
1449 ; GFX10-WAVE64-LABEL: if_after_kill_block:
1450 ; GFX10-WAVE64: ; %bb.0: ; %bb
1451 ; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
1452 ; GFX10-WAVE64-NEXT: s_wqm_b64 exec, exec
1453 ; GFX10-WAVE64-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1
1454 ; GFX10-WAVE64-NEXT: s_mov_b32 s0, 0
1455 ; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc
1456 ; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
1457 ; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_3
1458 ; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb3
1459 ; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
1460 ; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
1461 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB13_6
1462 ; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb3
1463 ; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
1464 ; GFX10-WAVE64-NEXT: .LBB13_3: ; %bb4
1465 ; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5]
1466 ; GFX10-WAVE64-NEXT: s_mov_b32 s1, s0
1467 ; GFX10-WAVE64-NEXT: s_mov_b32 s2, s0
1468 ; GFX10-WAVE64-NEXT: s_mov_b32 s3, s0
1469 ; GFX10-WAVE64-NEXT: s_mov_b32 s4, s0
1470 ; GFX10-WAVE64-NEXT: s_mov_b32 s5, s0
1471 ; GFX10-WAVE64-NEXT: s_mov_b32 s6, s0
1472 ; GFX10-WAVE64-NEXT: s_mov_b32 s7, s0
1473 ; GFX10-WAVE64-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
1474 ; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0)
1475 ; GFX10-WAVE64-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
1476 ; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[0:1], vcc
1477 ; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB13_5
1478 ; GFX10-WAVE64-NEXT: ; %bb.4: ; %bb8
1479 ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v0, 9
1480 ; GFX10-WAVE64-NEXT: global_store_dword v[0:1], v0, off
1481 ; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
1482 ; GFX10-WAVE64-NEXT: .LBB13_5: ; %UnifiedReturnBlock
1483 ; GFX10-WAVE64-NEXT: s_endpgm
1484 ; GFX10-WAVE64-NEXT: .LBB13_6:
1485 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
1486 ; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
1487 ; GFX10-WAVE64-NEXT: s_endpgm
1489 ; GFX10-WAVE32-LABEL: if_after_kill_block:
1490 ; GFX10-WAVE32: ; %bb.0: ; %bb
1491 ; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
1492 ; GFX10-WAVE32-NEXT: s_wqm_b32 exec_lo, exec_lo
1493 ; GFX10-WAVE32-NEXT: v_cmp_nle_f32_e32 vcc_lo, 0, v1
1494 ; GFX10-WAVE32-NEXT: s_mov_b32 s0, 0
1495 ; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s2, vcc_lo
1496 ; GFX10-WAVE32-NEXT: s_xor_b32 s2, exec_lo, s2
1497 ; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_3
1498 ; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb3
1499 ; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
1500 ; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo
1501 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB13_6
1502 ; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb3
1503 ; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
1504 ; GFX10-WAVE32-NEXT: .LBB13_3: ; %bb4
1505 ; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s2
1506 ; GFX10-WAVE32-NEXT: s_mov_b32 s1, s0
1507 ; GFX10-WAVE32-NEXT: s_mov_b32 s2, s0
1508 ; GFX10-WAVE32-NEXT: s_mov_b32 s3, s0
1509 ; GFX10-WAVE32-NEXT: s_mov_b32 s4, s0
1510 ; GFX10-WAVE32-NEXT: s_mov_b32 s5, s0
1511 ; GFX10-WAVE32-NEXT: s_mov_b32 s6, s0
1512 ; GFX10-WAVE32-NEXT: s_mov_b32 s7, s0
1513 ; GFX10-WAVE32-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
1514 ; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0)
1515 ; GFX10-WAVE32-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0
1516 ; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s0, vcc_lo
1517 ; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB13_5
1518 ; GFX10-WAVE32-NEXT: ; %bb.4: ; %bb8
1519 ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v0, 9
1520 ; GFX10-WAVE32-NEXT: global_store_dword v[0:1], v0, off
1521 ; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
1522 ; GFX10-WAVE32-NEXT: .LBB13_5: ; %UnifiedReturnBlock
1523 ; GFX10-WAVE32-NEXT: s_endpgm
1524 ; GFX10-WAVE32-NEXT: .LBB13_6:
1525 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
1526 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
1527 ; GFX10-WAVE32-NEXT: s_endpgm
1529 ; GFX11-LABEL: if_after_kill_block:
1530 ; GFX11: ; %bb.0: ; %bb
1531 ; GFX11-NEXT: s_mov_b64 s[2:3], exec
1532 ; GFX11-NEXT: s_wqm_b64 exec, exec
1533 ; GFX11-NEXT: s_mov_b32 s0, 0
1534 ; GFX11-NEXT: s_mov_b64 s[4:5], exec
1535 ; GFX11-NEXT: v_cmpx_nle_f32_e32 0, v1
1536 ; GFX11-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
1537 ; GFX11-NEXT: s_cbranch_execz .LBB13_3
1538 ; GFX11-NEXT: ; %bb.1: ; %bb3
1539 ; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
1540 ; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc
1541 ; GFX11-NEXT: s_cbranch_scc0 .LBB13_6
1542 ; GFX11-NEXT: ; %bb.2: ; %bb3
1543 ; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
1544 ; GFX11-NEXT: .LBB13_3: ; %bb4
1545 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1546 ; GFX11-NEXT: s_or_b64 exec, exec, s[4:5]
1547 ; GFX11-NEXT: s_mov_b32 s1, s0
1548 ; GFX11-NEXT: s_mov_b32 s2, s0
1549 ; GFX11-NEXT: s_mov_b32 s3, s0
1550 ; GFX11-NEXT: s_mov_b32 s4, s0
1551 ; GFX11-NEXT: s_mov_b32 s5, s0
1552 ; GFX11-NEXT: s_mov_b32 s6, s0
1553 ; GFX11-NEXT: s_mov_b32 s7, s0
1554 ; GFX11-NEXT: image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
1555 ; GFX11-NEXT: s_mov_b64 s[0:1], exec
1556 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1557 ; GFX11-NEXT: v_cmpx_neq_f32_e32 0, v0
1558 ; GFX11-NEXT: s_cbranch_execz .LBB13_5
1559 ; GFX11-NEXT: ; %bb.4: ; %bb8
1560 ; GFX11-NEXT: v_mov_b32_e32 v0, 9
1561 ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
1562 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1563 ; GFX11-NEXT: .LBB13_5: ; %UnifiedReturnBlock
1564 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1565 ; GFX11-NEXT: s_endpgm
1566 ; GFX11-NEXT: .LBB13_6:
1567 ; GFX11-NEXT: s_mov_b64 exec, 0
1568 ; GFX11-NEXT: exp mrt0 off, off, off, off done
1569 ; GFX11-NEXT: s_endpgm
1571 %tmp = fcmp ult float %arg1, 0.000000e+00
1572 br i1 %tmp, label %bb3, label %bb4
1575 %cmp.arg = fcmp olt float %arg, 0.0
1576 call void @llvm.amdgcn.kill(i1 %cmp.arg)
1579 bb4: ; preds = %bb3, %bb
1580 %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
1581 %tmp6 = extractelement <4 x float> %tmp5, i32 0
1582 %tmp7 = fcmp une float %tmp6, 0.000000e+00
1583 br i1 %tmp7, label %bb8, label %bb9
1585 bb8: ; preds = %bb9, %bb4
1586 store volatile i32 9, i32 addrspace(1)* undef
1593 define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
1594 ; SI-LABEL: cbranch_kill:
1595 ; SI: ; %bb.0: ; %.entry
1596 ; SI-NEXT: s_mov_b32 s4, 0
1597 ; SI-NEXT: s_mov_b64 s[0:1], exec
1598 ; SI-NEXT: v_mov_b32_e32 v4, 0
1599 ; SI-NEXT: v_mov_b32_e32 v2, v1
1600 ; SI-NEXT: v_mov_b32_e32 v3, v1
1601 ; SI-NEXT: s_mov_b32 s5, s4
1602 ; SI-NEXT: s_mov_b32 s6, s4
1603 ; SI-NEXT: s_mov_b32 s7, s4
1604 ; SI-NEXT: s_mov_b32 s8, s4
1605 ; SI-NEXT: s_mov_b32 s9, s4
1606 ; SI-NEXT: s_mov_b32 s10, s4
1607 ; SI-NEXT: s_mov_b32 s11, s4
1608 ; SI-NEXT: image_sample_l v1, v[1:4], s[4:11], s[0:3] dmask:0x1 da
1609 ; SI-NEXT: s_waitcnt vmcnt(0)
1610 ; SI-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1
1611 ; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
1612 ; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
1613 ; SI-NEXT: s_cbranch_execz .LBB14_3
1614 ; SI-NEXT: ; %bb.1: ; %kill
1615 ; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
1616 ; SI-NEXT: ; implicit-def: $vgpr0
1617 ; SI-NEXT: ; implicit-def: $vgpr1
1618 ; SI-NEXT: s_cbranch_scc0 .LBB14_6
1619 ; SI-NEXT: ; %bb.2: ; %kill
1620 ; SI-NEXT: s_mov_b64 exec, 0
1621 ; SI-NEXT: .LBB14_3: ; %Flow
1622 ; SI-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
1623 ; SI-NEXT: ; implicit-def: $vgpr2
1624 ; SI-NEXT: s_xor_b64 exec, exec, s[0:1]
1625 ; SI-NEXT: ; %bb.4: ; %live
1626 ; SI-NEXT: v_mul_f32_e32 v2, v0, v1
1627 ; SI-NEXT: ; %bb.5: ; %export
1628 ; SI-NEXT: s_or_b64 exec, exec, s[0:1]
1629 ; SI-NEXT: exp mrt0 v2, v2, v2, v2 done vm
1631 ; SI-NEXT: .LBB14_6:
1632 ; SI-NEXT: s_mov_b64 exec, 0
1633 ; SI-NEXT: exp null off, off, off, off done vm
1636 ; GFX10-WAVE64-LABEL: cbranch_kill:
1637 ; GFX10-WAVE64: ; %bb.0: ; %.entry
1638 ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, 0
1639 ; GFX10-WAVE64-NEXT: s_mov_b32 s4, 0
1640 ; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec
1641 ; GFX10-WAVE64-NEXT: s_mov_b32 s5, s4
1642 ; GFX10-WAVE64-NEXT: s_mov_b32 s6, s4
1643 ; GFX10-WAVE64-NEXT: s_mov_b32 s7, s4
1644 ; GFX10-WAVE64-NEXT: s_mov_b32 s8, s4
1645 ; GFX10-WAVE64-NEXT: s_mov_b32 s9, s4
1646 ; GFX10-WAVE64-NEXT: s_mov_b32 s10, s4
1647 ; GFX10-WAVE64-NEXT: s_mov_b32 s11, s4
1648 ; GFX10-WAVE64-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
1649 ; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0)
1650 ; GFX10-WAVE64-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1
1651 ; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc
1652 ; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
1653 ; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB14_3
1654 ; GFX10-WAVE64-NEXT: ; %bb.1: ; %kill
1655 ; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
1656 ; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr0
1657 ; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr1
1658 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB14_6
1659 ; GFX10-WAVE64-NEXT: ; %bb.2: ; %kill
1660 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
1661 ; GFX10-WAVE64-NEXT: .LBB14_3: ; %Flow
1662 ; GFX10-WAVE64-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
1663 ; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr2
1664 ; GFX10-WAVE64-NEXT: s_xor_b64 exec, exec, s[0:1]
1665 ; GFX10-WAVE64-NEXT: ; %bb.4: ; %live
1666 ; GFX10-WAVE64-NEXT: v_mul_f32_e32 v2, v0, v1
1667 ; GFX10-WAVE64-NEXT: ; %bb.5: ; %export
1668 ; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1]
1669 ; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v2, v2 done vm
1670 ; GFX10-WAVE64-NEXT: s_endpgm
1671 ; GFX10-WAVE64-NEXT: .LBB14_6:
1672 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
1673 ; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
1674 ; GFX10-WAVE64-NEXT: s_endpgm
1676 ; GFX10-WAVE32-LABEL: cbranch_kill:
1677 ; GFX10-WAVE32: ; %bb.0: ; %.entry
1678 ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, 0
1679 ; GFX10-WAVE32-NEXT: s_mov_b32 s4, 0
1680 ; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo
1681 ; GFX10-WAVE32-NEXT: s_mov_b32 s5, s4
1682 ; GFX10-WAVE32-NEXT: s_mov_b32 s6, s4
1683 ; GFX10-WAVE32-NEXT: s_mov_b32 s7, s4
1684 ; GFX10-WAVE32-NEXT: s_mov_b32 s8, s4
1685 ; GFX10-WAVE32-NEXT: s_mov_b32 s9, s4
1686 ; GFX10-WAVE32-NEXT: s_mov_b32 s10, s4
1687 ; GFX10-WAVE32-NEXT: s_mov_b32 s11, s4
1688 ; GFX10-WAVE32-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
1689 ; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0)
1690 ; GFX10-WAVE32-NEXT: v_cmp_ge_f32_e32 vcc_lo, 0, v1
1691 ; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo
1692 ; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1
1693 ; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB14_3
1694 ; GFX10-WAVE32-NEXT: ; %bb.1: ; %kill
1695 ; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo
1696 ; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr0
1697 ; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr1
1698 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB14_6
1699 ; GFX10-WAVE32-NEXT: ; %bb.2: ; %kill
1700 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
1701 ; GFX10-WAVE32-NEXT: .LBB14_3: ; %Flow
1702 ; GFX10-WAVE32-NEXT: s_or_saveexec_b32 s0, s1
1703 ; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr2
1704 ; GFX10-WAVE32-NEXT: s_xor_b32 exec_lo, exec_lo, s0
1705 ; GFX10-WAVE32-NEXT: ; %bb.4: ; %live
1706 ; GFX10-WAVE32-NEXT: v_mul_f32_e32 v2, v0, v1
1707 ; GFX10-WAVE32-NEXT: ; %bb.5: ; %export
1708 ; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0
1709 ; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v2, v2 done vm
1710 ; GFX10-WAVE32-NEXT: s_endpgm
1711 ; GFX10-WAVE32-NEXT: .LBB14_6:
1712 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
1713 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
1714 ; GFX10-WAVE32-NEXT: s_endpgm
1716 ; GFX11-LABEL: cbranch_kill:
1717 ; GFX11: ; %bb.0: ; %.entry
1718 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
1719 ; GFX11-NEXT: s_mov_b32 s4, 0
1720 ; GFX11-NEXT: s_mov_b64 s[0:1], exec
1721 ; GFX11-NEXT: s_mov_b32 s5, s4
1722 ; GFX11-NEXT: s_mov_b32 s6, s4
1723 ; GFX11-NEXT: s_mov_b32 s7, s4
1724 ; GFX11-NEXT: s_mov_b32 s8, s4
1725 ; GFX11-NEXT: s_mov_b32 s9, s4
1726 ; GFX11-NEXT: s_mov_b32 s10, s4
1727 ; GFX11-NEXT: s_mov_b32 s11, s4
1728 ; GFX11-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
1729 ; GFX11-NEXT: s_mov_b64 s[2:3], exec
1730 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1731 ; GFX11-NEXT: v_cmpx_ge_f32_e32 0, v1
1732 ; GFX11-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
1733 ; GFX11-NEXT: s_cbranch_execz .LBB14_3
1734 ; GFX11-NEXT: ; %bb.1: ; %kill
1735 ; GFX11-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec
1736 ; GFX11-NEXT: ; implicit-def: $vgpr0
1737 ; GFX11-NEXT: ; implicit-def: $vgpr1
1738 ; GFX11-NEXT: s_cbranch_scc0 .LBB14_6
1739 ; GFX11-NEXT: ; %bb.2: ; %kill
1740 ; GFX11-NEXT: s_mov_b64 exec, 0
1741 ; GFX11-NEXT: .LBB14_3: ; %Flow
1742 ; GFX11-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
1743 ; GFX11-NEXT: ; implicit-def: $vgpr2
1744 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1745 ; GFX11-NEXT: s_xor_b64 exec, exec, s[0:1]
1746 ; GFX11-NEXT: ; %bb.4: ; %live
1747 ; GFX11-NEXT: v_mul_f32_e32 v2, v0, v1
1748 ; GFX11-NEXT: ; %bb.5: ; %export
1749 ; GFX11-NEXT: s_or_b64 exec, exec, s[0:1]
1750 ; GFX11-NEXT: exp mrt0 v2, v2, v2, v2 done
1751 ; GFX11-NEXT: s_endpgm
1752 ; GFX11-NEXT: .LBB14_6:
1753 ; GFX11-NEXT: s_mov_b64 exec, 0
1754 ; GFX11-NEXT: exp mrt0 off, off, off, off done
1755 ; GFX11-NEXT: s_endpgm
1757 %sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
1758 %cond0 = fcmp ugt float %sample, 0.000000e+00
1759 br i1 %cond0, label %live, label %kill
1762 call void @llvm.amdgcn.kill(i1 false)
1766 %scale = fmul reassoc nnan nsz arcp contract float %val0, %sample
1770 %proxy = phi float [ undef, %kill ], [ %scale, %live ]
1771 call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %proxy, float %proxy, float %proxy, float %proxy, i1 immarg true, i1 immarg true) #3
1776 define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) {
1777 ; SI-LABEL: complex_loop:
1778 ; SI: ; %bb.0: ; %.entry
1779 ; SI-NEXT: s_cmp_lt_i32 s0, 1
1780 ; SI-NEXT: s_cbranch_scc1 .LBB15_7
1781 ; SI-NEXT: ; %bb.1: ; %.lr.ph
1782 ; SI-NEXT: s_mov_b64 s[2:3], exec
1783 ; SI-NEXT: s_mov_b32 s6, 0
1784 ; SI-NEXT: s_mov_b64 s[0:1], 0
1785 ; SI-NEXT: s_branch .LBB15_3
1786 ; SI-NEXT: .LBB15_2: ; %latch
1787 ; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1
1788 ; SI-NEXT: s_or_b64 exec, exec, s[4:5]
1789 ; SI-NEXT: s_add_i32 s6, s6, 1
1790 ; SI-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
1791 ; SI-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
1792 ; SI-NEXT: v_mov_b32_e32 v2, s6
1793 ; SI-NEXT: s_andn2_b64 exec, exec, s[0:1]
1794 ; SI-NEXT: s_cbranch_execz .LBB15_6
1795 ; SI-NEXT: .LBB15_3: ; %hdr
1796 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1
1797 ; SI-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
1798 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
1799 ; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
1800 ; SI-NEXT: s_cbranch_execz .LBB15_2
1801 ; SI-NEXT: ; %bb.4: ; %kill
1802 ; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1
1803 ; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
1804 ; SI-NEXT: s_cbranch_scc0 .LBB15_8
1805 ; SI-NEXT: ; %bb.5: ; %kill
1806 ; SI-NEXT: ; in Loop: Header=BB15_3 Depth=1
1807 ; SI-NEXT: s_mov_b64 exec, 0
1808 ; SI-NEXT: s_branch .LBB15_2
1809 ; SI-NEXT: .LBB15_6: ; %Flow
1810 ; SI-NEXT: s_or_b64 exec, exec, s[0:1]
1811 ; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm
1813 ; SI-NEXT: .LBB15_7:
1814 ; SI-NEXT: v_mov_b32_e32 v2, -1
1815 ; SI-NEXT: exp mrt0 v2, v2, v0, v0 done vm
1817 ; SI-NEXT: .LBB15_8:
1818 ; SI-NEXT: s_mov_b64 exec, 0
1819 ; SI-NEXT: exp null off, off, off, off done vm
1822 ; GFX10-WAVE64-LABEL: complex_loop:
1823 ; GFX10-WAVE64: ; %bb.0: ; %.entry
1824 ; GFX10-WAVE64-NEXT: s_cmp_lt_i32 s0, 1
1825 ; GFX10-WAVE64-NEXT: s_cbranch_scc1 .LBB15_7
1826 ; GFX10-WAVE64-NEXT: ; %bb.1: ; %.lr.ph
1827 ; GFX10-WAVE64-NEXT: s_mov_b64 s[2:3], exec
1828 ; GFX10-WAVE64-NEXT: s_mov_b32 s6, 0
1829 ; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], 0
1830 ; GFX10-WAVE64-NEXT: s_branch .LBB15_3
1831 ; GFX10-WAVE64-NEXT: .LBB15_2: ; %latch
1832 ; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1
1833 ; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5]
1834 ; GFX10-WAVE64-NEXT: s_add_i32 s6, s6, 1
1835 ; GFX10-WAVE64-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
1836 ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, s6
1837 ; GFX10-WAVE64-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
1838 ; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, s[0:1]
1839 ; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_6
1840 ; GFX10-WAVE64-NEXT: .LBB15_3: ; %hdr
1841 ; GFX10-WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1
1842 ; GFX10-WAVE64-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
1843 ; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc
1844 ; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
1845 ; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB15_2
1846 ; GFX10-WAVE64-NEXT: ; %bb.4: ; %kill
1847 ; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1
1848 ; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
1849 ; GFX10-WAVE64-NEXT: s_cbranch_scc0 .LBB15_8
1850 ; GFX10-WAVE64-NEXT: ; %bb.5: ; %kill
1851 ; GFX10-WAVE64-NEXT: ; in Loop: Header=BB15_3 Depth=1
1852 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
1853 ; GFX10-WAVE64-NEXT: s_branch .LBB15_2
1854 ; GFX10-WAVE64-NEXT: .LBB15_6: ; %Flow
1855 ; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[0:1]
1856 ; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm
1857 ; GFX10-WAVE64-NEXT: s_endpgm
1858 ; GFX10-WAVE64-NEXT: .LBB15_7:
1859 ; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, -1
1860 ; GFX10-WAVE64-NEXT: exp mrt0 v2, v2, v0, v0 done vm
1861 ; GFX10-WAVE64-NEXT: s_endpgm
1862 ; GFX10-WAVE64-NEXT: .LBB15_8:
1863 ; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
1864 ; GFX10-WAVE64-NEXT: exp null off, off, off, off done vm
1865 ; GFX10-WAVE64-NEXT: s_endpgm
1867 ; GFX10-WAVE32-LABEL: complex_loop:
1868 ; GFX10-WAVE32: ; %bb.0: ; %.entry
1869 ; GFX10-WAVE32-NEXT: s_cmp_lt_i32 s0, 1
1870 ; GFX10-WAVE32-NEXT: s_cbranch_scc1 .LBB15_7
1871 ; GFX10-WAVE32-NEXT: ; %bb.1: ; %.lr.ph
1872 ; GFX10-WAVE32-NEXT: s_mov_b32 s1, exec_lo
1873 ; GFX10-WAVE32-NEXT: s_mov_b32 s0, 0
1874 ; GFX10-WAVE32-NEXT: s_mov_b32 s2, 0
1875 ; GFX10-WAVE32-NEXT: s_branch .LBB15_3
1876 ; GFX10-WAVE32-NEXT: .LBB15_2: ; %latch
1877 ; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1
1878 ; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s3
1879 ; GFX10-WAVE32-NEXT: s_add_i32 s2, s2, 1
1880 ; GFX10-WAVE32-NEXT: v_cmp_ge_i32_e32 vcc_lo, s2, v1
1881 ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, s2
1882 ; GFX10-WAVE32-NEXT: s_or_b32 s0, vcc_lo, s0
1883 ; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
1884 ; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_6
1885 ; GFX10-WAVE32-NEXT: .LBB15_3: ; %hdr
1886 ; GFX10-WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1
1887 ; GFX10-WAVE32-NEXT: v_cmp_gt_u32_e32 vcc_lo, s2, v0
1888 ; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s3, vcc_lo
1889 ; GFX10-WAVE32-NEXT: s_xor_b32 s3, exec_lo, s3
1890 ; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB15_2
1891 ; GFX10-WAVE32-NEXT: ; %bb.4: ; %kill
1892 ; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1
1893 ; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, exec_lo
1894 ; GFX10-WAVE32-NEXT: s_cbranch_scc0 .LBB15_8
1895 ; GFX10-WAVE32-NEXT: ; %bb.5: ; %kill
1896 ; GFX10-WAVE32-NEXT: ; in Loop: Header=BB15_3 Depth=1
1897 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
1898 ; GFX10-WAVE32-NEXT: s_branch .LBB15_2
1899 ; GFX10-WAVE32-NEXT: .LBB15_6: ; %Flow
1900 ; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s0
1901 ; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm
1902 ; GFX10-WAVE32-NEXT: s_endpgm
1903 ; GFX10-WAVE32-NEXT: .LBB15_7:
1904 ; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, -1
1905 ; GFX10-WAVE32-NEXT: exp mrt0 v2, v2, v0, v0 done vm
1906 ; GFX10-WAVE32-NEXT: s_endpgm
1907 ; GFX10-WAVE32-NEXT: .LBB15_8:
1908 ; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
1909 ; GFX10-WAVE32-NEXT: exp null off, off, off, off done vm
1910 ; GFX10-WAVE32-NEXT: s_endpgm
1912 ; GFX11-LABEL: complex_loop:
1913 ; GFX11: ; %bb.0: ; %.entry
1914 ; GFX11-NEXT: s_cmp_lt_i32 s0, 1
1915 ; GFX11-NEXT: s_cbranch_scc1 .LBB15_7
1916 ; GFX11-NEXT: ; %bb.1: ; %.lr.ph
1917 ; GFX11-NEXT: s_mov_b64 s[2:3], exec
1918 ; GFX11-NEXT: s_mov_b32 s6, 0
1919 ; GFX11-NEXT: s_mov_b64 s[0:1], 0
1920 ; GFX11-NEXT: s_branch .LBB15_3
1921 ; GFX11-NEXT: .LBB15_2: ; %latch
1922 ; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1
1923 ; GFX11-NEXT: s_or_b64 exec, exec, s[4:5]
1924 ; GFX11-NEXT: s_add_i32 s6, s6, 1
1925 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
1926 ; GFX11-NEXT: v_cmp_ge_i32_e32 vcc, s6, v1
1927 ; GFX11-NEXT: v_mov_b32_e32 v2, s6
1928 ; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
1929 ; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1]
1930 ; GFX11-NEXT: s_cbranch_execz .LBB15_6
1931 ; GFX11-NEXT: .LBB15_3: ; %hdr
1932 ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
1933 ; GFX11-NEXT: s_mov_b64 s[4:5], exec
1934 ; GFX11-NEXT: v_cmpx_gt_u32_e64 s6, v0
1935 ; GFX11-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
1936 ; GFX11-NEXT: s_cbranch_execz .LBB15_2
1937 ; GFX11-NEXT: ; %bb.4: ; %kill
1938 ; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1
1939 ; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], exec
1940 ; GFX11-NEXT: s_cbranch_scc0 .LBB15_8
1941 ; GFX11-NEXT: ; %bb.5: ; %kill
1942 ; GFX11-NEXT: ; in Loop: Header=BB15_3 Depth=1
1943 ; GFX11-NEXT: s_mov_b64 exec, 0
1944 ; GFX11-NEXT: s_branch .LBB15_2
1945 ; GFX11-NEXT: .LBB15_6: ; %Flow
1946 ; GFX11-NEXT: s_or_b64 exec, exec, s[0:1]
1947 ; GFX11-NEXT: exp mrt0 v2, v2, v0, v0 done
1948 ; GFX11-NEXT: s_endpgm
1949 ; GFX11-NEXT: .LBB15_7:
1950 ; GFX11-NEXT: v_mov_b32_e32 v2, -1
1951 ; GFX11-NEXT: exp mrt0 v2, v2, v0, v0 done
1952 ; GFX11-NEXT: s_endpgm
1953 ; GFX11-NEXT: .LBB15_8:
1954 ; GFX11-NEXT: s_mov_b64 exec, 0
1955 ; GFX11-NEXT: exp mrt0 off, off, off, off done
1956 ; GFX11-NEXT: s_endpgm
1958 %flaga = icmp sgt i32 %cmpa, 0
1959 br i1 %flaga, label %.lr.ph, label %._crit_edge
1965 %ctr = phi i32 [ 0, %.lr.ph ], [ %ctr.next, %latch ]
1966 %flagb = icmp ugt i32 %ctr, %cmpb
1967 br i1 %flagb, label %kill, label %latch
1970 call void @llvm.amdgcn.kill(i1 false)
1974 %ctr.next = add nuw nsw i32 %ctr, 1
1975 %flagc = icmp slt i32 %ctr.next, %cmpc
1976 br i1 %flagc, label %hdr, label %._crit_edge
1979 %tmp = phi i32 [ -1, %.entry ], [ %ctr.next, %latch ]
1980 %out = bitcast i32 %tmp to float
1981 call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %out, float %out, float undef, float undef, i1 immarg true, i1 immarg true)
1985 define void @skip_mode_switch(i32 %arg) {
1986 ; SI-LABEL: skip_mode_switch:
1987 ; SI: ; %bb.0: ; %entry
1988 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1989 ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
1990 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
1991 ; SI-NEXT: s_cbranch_execz .LBB16_2
1992 ; SI-NEXT: ; %bb.1: ; %bb.0
1993 ; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
1994 ; SI-NEXT: .LBB16_2: ; %bb.1
1995 ; SI-NEXT: s_or_b64 exec, exec, s[4:5]
1996 ; SI-NEXT: s_setpc_b64 s[30:31]
1998 ; GFX10-WAVE64-LABEL: skip_mode_switch:
1999 ; GFX10-WAVE64: ; %bb.0: ; %entry
2000 ; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2001 ; GFX10-WAVE64-NEXT: s_waitcnt_vscnt null, 0x0
2002 ; GFX10-WAVE64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2003 ; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc
2004 ; GFX10-WAVE64-NEXT: s_cbranch_execz .LBB16_2
2005 ; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb.0
2006 ; GFX10-WAVE64-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
2007 ; GFX10-WAVE64-NEXT: .LBB16_2: ; %bb.1
2008 ; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5]
2009 ; GFX10-WAVE64-NEXT: s_setpc_b64 s[30:31]
2011 ; GFX10-WAVE32-LABEL: skip_mode_switch:
2012 ; GFX10-WAVE32: ; %bb.0: ; %entry
2013 ; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2014 ; GFX10-WAVE32-NEXT: s_waitcnt_vscnt null, 0x0
2015 ; GFX10-WAVE32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
2016 ; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s4, vcc_lo
2017 ; GFX10-WAVE32-NEXT: s_cbranch_execz .LBB16_2
2018 ; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb.0
2019 ; GFX10-WAVE32-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
2020 ; GFX10-WAVE32-NEXT: .LBB16_2: ; %bb.1
2021 ; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s4
2022 ; GFX10-WAVE32-NEXT: s_setpc_b64 s[30:31]
2024 ; GFX11-LABEL: skip_mode_switch:
2025 ; GFX11: ; %bb.0: ; %entry
2026 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2027 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2028 ; GFX11-NEXT: s_mov_b64 s[0:1], exec
2029 ; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0
2030 ; GFX11-NEXT: s_cbranch_execz .LBB16_2
2031 ; GFX11-NEXT: ; %bb.1: ; %bb.0
2032 ; GFX11-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
2033 ; GFX11-NEXT: .LBB16_2: ; %bb.1
2034 ; GFX11-NEXT: s_or_b64 exec, exec, s[0:1]
2035 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2037 %cmp = icmp eq i32 %arg, 0
2038 br i1 %cmp, label %bb.0, label %bb.1
2041 call void @llvm.amdgcn.s.setreg(i32 2049, i32 3)
2048 declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3
2049 declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
2050 declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2051 declare void @llvm.amdgcn.kill(i1) #0
2053 declare void @llvm.amdgcn.s.setreg(i32 immarg, i32)
2055 attributes #0 = { nounwind }
2056 attributes #1 = { nounwind readonly }
2057 attributes #2 = { nounwind readnone speculatable }
2058 attributes #3 = { inaccessiblememonly nounwind writeonly }