1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
3 ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_pos:
6 define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
7 call void @llvm.AMDGPU.kill(float 0.0)
11 ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg:
13 ; CHECK-NEXT: s_mov_b64 exec, 0
15 ; CHECK-NEXT: s_endpgm
16 define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
17 call void @llvm.AMDGPU.kill(float -0.0)
21 ; FIXME: Ideally only one would be emitted
22 ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg_x2:
24 ; CHECK-NEXT: s_mov_b64 exec, 0
26 ; CHECK-NEXT: s_mov_b64 exec, 0
28 ; CHECK-NEXT: s_endpgm
29 define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
30 call void @llvm.AMDGPU.kill(float -0.0)
31 call void @llvm.AMDGPU.kill(float -1.0)
35 ; CHECK-LABEL: {{^}}test_kill_depth_var:
37 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
39 ; CHECK-NEXT: s_endpgm
40 define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
41 call void @llvm.AMDGPU.kill(float %x)
45 ; FIXME: Ideally only one would be emitted
46 ; CHECK-LABEL: {{^}}test_kill_depth_var_x2_same:
48 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
50 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
52 ; CHECK-NEXT: s_endpgm
53 define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
54 call void @llvm.AMDGPU.kill(float %x)
55 call void @llvm.AMDGPU.kill(float %x)
59 ; CHECK-LABEL: {{^}}test_kill_depth_var_x2:
61 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
63 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v1
65 ; CHECK-NEXT: s_endpgm
66 define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
67 call void @llvm.AMDGPU.kill(float %x)
68 call void @llvm.AMDGPU.kill(float %y)
72 ; CHECK-LABEL: {{^}}test_kill_depth_var_x2_instructions:
74 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
76 ; CHECK: v_mov_b32_e64 v7, -1
77 ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7
79 ; CHECK-NEXT: s_endpgm
80 define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
81 call void @llvm.AMDGPU.kill(float %x)
82 %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"()
83 call void @llvm.AMDGPU.kill(float %y)
87 ; FIXME: why does the skip depend on the asm length in the same block?
89 ; CHECK-LABEL: {{^}}test_kill_control_flow:
90 ; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0
91 ; CHECK: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]]
94 ; CHECK: v_mov_b32_e64 v7, -1
106 ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7
107 ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]]
108 ; CHECK-NEXT: ; BB#2:
109 ; CHECK-NEXT: exp null off, off, off, off done vm
110 ; CHECK-NEXT: s_endpgm
112 ; CHECK-NEXT: {{^}}[[SPLIT_BB]]:
113 ; CHECK-NEXT: s_endpgm
114 define amdgpu_ps void @test_kill_control_flow(i32 inreg %arg) #0 {
116 %cmp = icmp eq i32 %arg, 0
117 br i1 %cmp, label %bb, label %exit
120 %var = call float asm sideeffect "
131 v_nop_e64", "={v7}"()
132 call void @llvm.AMDGPU.kill(float %var)
139 ; CHECK-LABEL: {{^}}test_kill_control_flow_remainder:
140 ; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0
141 ; CHECK-NEXT: v_mov_b32_e32 v{{[0-9]+}}, 0
142 ; CHECK-NEXT: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]]
144 ; CHECK-NEXT: ; BB#1: ; %bb
145 ; CHECK: v_mov_b32_e64 v7, -1
155 ; CHECK: v_mov_b32_e64 v8, -1
157 ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7
158 ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]]
160 ; CHECK-NEXT: ; BB#2:
161 ; CHECK-NEXT: exp null off, off, off, off done vm
162 ; CHECK-NEXT: s_endpgm
164 ; CHECK-NEXT: {{^}}[[SPLIT_BB]]:
165 ; CHECK: buffer_store_dword v8
166 ; CHECK: v_mov_b32_e64 v9, -2
168 ; CHECK: {{^}}BB{{[0-9]+_[0-9]+}}:
169 ; CHECK: buffer_store_dword v9
170 ; CHECK-NEXT: s_endpgm
171 define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 {
173 %cmp = icmp eq i32 %arg, 0
174 br i1 %cmp, label %bb, label %exit
177 %var = call float asm sideeffect "
189 v_nop_e64", "={v7}"()
190 %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"()
191 call void @llvm.AMDGPU.kill(float %var)
192 store volatile float %live.across, float addrspace(1)* undef
193 %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"()
197 %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ]
198 store float %phi, float addrspace(1)* undef
202 ; CHECK-LABEL: {{^}}test_kill_divergent_loop:
203 ; CHECK: v_cmp_eq_u32_e32 vcc, 0, v0
204 ; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc
205 ; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]]
206 ; CHECK-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
207 ; CHECK-NEXT: s_cbranch_execz [[EXIT]]
209 ; CHECK: {{BB[0-9]+_[0-9]+}}: ; %bb.preheader
212 ; CHECK: [[LOOP_BB:BB[0-9]+_[0-9]+]]:
214 ; CHECK: v_mov_b32_e64 v7, -1
216 ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7
218 ; CHECK-NEXT: ; BB#3:
219 ; CHECK: buffer_load_dword [[LOAD:v[0-9]+]]
220 ; CHECK: v_cmp_eq_u32_e32 vcc, 0, [[LOAD]]
221 ; CHECK-NEXT: s_and_b64 vcc, exec, vcc
222 ; CHECK-NEXT: s_cbranch_vccnz [[LOOP_BB]]
224 ; CHECK-NEXT: {{^}}[[EXIT]]:
225 ; CHECK: s_or_b64 exec, exec, [[SAVEEXEC]]
226 ; CHECK: buffer_store_dword
228 define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
230 %cmp = icmp eq i32 %arg, 0
231 br i1 %cmp, label %bb, label %exit
234 %var = call float asm sideeffect "
245 v_nop_e64", "={v7}"()
246 call void @llvm.AMDGPU.kill(float %var)
247 %vgpr = load volatile i32, i32 addrspace(1)* undef
248 %loop.cond = icmp eq i32 %vgpr, 0
249 br i1 %loop.cond, label %bb, label %exit
252 store volatile i32 8, i32 addrspace(1)* undef
257 ; CHECK-LABEL: {{^}}phi_use_def_before_kill:
258 ; CHECK: v_cndmask_b32_e64 [[PHIREG:v[0-9]+]], 0, -1.0,
259 ; CHECK: v_cmpx_le_f32_e32 vcc, 0,
260 ; CHECK-NEXT: s_cbranch_execnz [[BB4:BB[0-9]+_[0-9]+]]
263 ; CHECK-NEXT: s_endpgm
265 ; CHECK: [[KILLBB:BB[0-9]+_[0-9]+]]:
266 ; CHECK-NEXT: s_cbranch_scc0 [[PHIBB:BB[0-9]+_[0-9]+]]
269 ; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]]
270 ; CHECK-NEXT: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]]
273 ; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 9
274 ; CHECK: buffer_store_dword
277 ; CHECK-NEXT: s_endpgm
278 define amdgpu_ps void @phi_use_def_before_kill() #0 {
280 %tmp = fadd float undef, 1.000000e+00
281 %tmp1 = fcmp olt float 0.000000e+00, %tmp
282 %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00
283 call void @llvm.AMDGPU.kill(float %tmp2)
284 br i1 undef, label %phibb, label %bb8
287 %tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ]
288 %tmp6 = fcmp oeq float %tmp5, 0.000000e+00
289 br i1 %tmp6, label %bb10, label %end
292 store volatile i32 8, i32 addrspace(1)* undef
296 store volatile i32 9, i32 addrspace(1)* undef
303 ; CHECK-LABEL: {{^}}no_skip_no_successors:
304 ; CHECK: v_cmp_nge_f32
305 ; CHECK-NEXT: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]]
308 ; CHECK: s_mov_b64 exec, 0
310 ; CHECK: [[SKIPKILL]]:
311 ; CHECK: v_cmp_nge_f32_e32 vcc
312 ; CHECK-NEXT: BB#3: ; %bb5
313 ; CHECK-NEXT: .Lfunc_end{{[0-9]+}}
314 define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
316 %tmp = fcmp ult float %arg1, 0.000000e+00
317 %tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000
318 br i1 %tmp, label %bb6, label %bb3
321 br i1 %tmp2, label %bb5, label %bb4
324 br i1 true, label %bb5, label %bb7
326 bb5: ; preds = %bb4, %bb3
330 call void @llvm.AMDGPU.kill(float -1.000000e+00)
337 ; CHECK-LABEL: {{^}}if_after_kill_block:
339 ; CHECK: s_and_saveexec_b64
341 ; CHECK-NEXT: mask branch [[BB4:BB[0-9]+_[0-9]+]]
343 ; CHECK: v_cmpx_le_f32_e32 vcc, 0,
345 ; CHECK: s_or_b64 exec, exec
346 ; CHECK: image_sample_c
348 ; CHECK: v_cmp_neq_f32_e32 vcc, 0,
349 ; CHECK: s_and_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc
350 ; CHECK: mask branch [[END:BB[0-9]+_[0-9]+]]
353 ; CHECK: BB{{[0-9]+_[0-9]+}}: ; %bb8
354 ; CHECK: buffer_store_dword
358 define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, <4 x float> %arg2) #0 {
360 %tmp = fcmp ult float %arg1, 0.000000e+00
361 br i1 %tmp, label %bb3, label %bb4
364 call void @llvm.AMDGPU.kill(float %arg)
367 bb4: ; preds = %bb3, %bb
368 %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %arg2, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
369 %tmp6 = extractelement <4 x float> %tmp5, i32 0
370 %tmp7 = fcmp une float %tmp6, 0.000000e+00
371 br i1 %tmp7, label %bb8, label %bb9
373 bb8: ; preds = %bb9, %bb4
374 store volatile i32 9, i32 addrspace(1)* undef
381 declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
382 declare void @llvm.AMDGPU.kill(float) #0
384 attributes #0 = { nounwind }
385 attributes #1 = { nounwind readonly }