1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope %s
4 ; Although it's modeled without any control flow in order to get better code
5 ; out of the structurizer, @llvm.amdgcn.kill actually ends the thread that calls
6 ; it with "true". In case it's called in a provably infinite loop, we still
7 ; need to successfully exit and export something, even if we can't know where
8 ; to jump to in the LLVM IR. Therefore we insert a null export ourselves in
9 ; this case right before the s_endpgm to avoid GPU hangs, which is what this
12 ; FIXME: Immediate value 0x41200000 should be folded into the v_cmp instruction.
13 define amdgpu_ps void @return_void(float %0) #0 {
14 ; CHECK-LABEL: return_void:
15 ; CHECK: ; %bb.0: ; %main_body
16 ; CHECK-NEXT: s_mov_b64 s[0:1], exec
17 ; CHECK-NEXT: s_mov_b32 s2, 0x41200000
18 ; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0
19 ; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
20 ; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
21 ; CHECK-NEXT: s_cbranch_execz .LBB0_3
22 ; CHECK-NEXT: .LBB0_1: ; %loop
23 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
24 ; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
25 ; CHECK-NEXT: s_cbranch_scc0 .LBB0_6
26 ; CHECK-NEXT: ; %bb.2: ; %loop
27 ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
28 ; CHECK-NEXT: s_mov_b64 exec, 0
29 ; CHECK-NEXT: s_mov_b64 vcc, 0
30 ; CHECK-NEXT: s_branch .LBB0_1
31 ; CHECK-NEXT: .LBB0_3: ; %Flow1
32 ; CHECK-NEXT: s_andn2_saveexec_b64 s[0:1], s[2:3]
33 ; CHECK-NEXT: s_cbranch_execz .LBB0_5
34 ; CHECK-NEXT: ; %bb.4: ; %end
35 ; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
36 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
37 ; CHECK-NEXT: exp mrt0 v1, v1, v1, v0 done vm
38 ; CHECK-NEXT: .LBB0_5: ; %UnifiedReturnBlock
39 ; CHECK-NEXT: s_endpgm
40 ; CHECK-NEXT: .LBB0_6:
41 ; CHECK-NEXT: s_mov_b64 exec, 0
42 ; CHECK-NEXT: exp null off, off, off, off done vm
43 ; CHECK-NEXT: s_endpgm
45 %cmp = fcmp olt float %0, 1.000000e+01
46 br i1 %cmp, label %end, label %loop
49 call void @llvm.amdgcn.kill(i1 false) #3
53 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 0., float 0., float 0., float 1., i1 true, i1 true) #3
57 define amdgpu_ps void @return_void_compr(float %0) #0 {
58 ; CHECK-LABEL: return_void_compr:
59 ; CHECK: ; %bb.0: ; %main_body
60 ; CHECK-NEXT: s_mov_b64 s[0:1], exec
61 ; CHECK-NEXT: s_mov_b32 s2, 0x41200000
62 ; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0
63 ; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
64 ; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
65 ; CHECK-NEXT: s_cbranch_execz .LBB1_3
66 ; CHECK-NEXT: .LBB1_1: ; %loop
67 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
68 ; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
69 ; CHECK-NEXT: s_cbranch_scc0 .LBB1_6
70 ; CHECK-NEXT: ; %bb.2: ; %loop
71 ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
72 ; CHECK-NEXT: s_mov_b64 exec, 0
73 ; CHECK-NEXT: s_mov_b64 vcc, 0
74 ; CHECK-NEXT: s_branch .LBB1_1
75 ; CHECK-NEXT: .LBB1_3: ; %Flow1
76 ; CHECK-NEXT: s_andn2_saveexec_b64 s[0:1], s[2:3]
77 ; CHECK-NEXT: s_cbranch_execz .LBB1_5
78 ; CHECK-NEXT: ; %bb.4: ; %end
79 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
80 ; CHECK-NEXT: exp mrt0 v0, off, v0, off done compr vm
81 ; CHECK-NEXT: .LBB1_5: ; %UnifiedReturnBlock
82 ; CHECK-NEXT: s_endpgm
83 ; CHECK-NEXT: .LBB1_6:
84 ; CHECK-NEXT: s_mov_b64 exec, 0
85 ; CHECK-NEXT: exp null off, off, off, off done vm
86 ; CHECK-NEXT: s_endpgm
88 %cmp = fcmp olt float %0, 1.000000e+01
89 br i1 %cmp, label %end, label %loop
92 call void @llvm.amdgcn.kill(i1 false) #3
96 call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 5, <2 x i16> < i16 0, i16 0 >, <2 x i16> < i16 0, i16 0 >, i1 true, i1 true) #3
100 ; test the case where there's only a kill in an infinite loop
101 define amdgpu_ps void @only_kill() #0 {
102 ; CHECK-LABEL: only_kill:
103 ; CHECK: ; %bb.0: ; %main_body
104 ; CHECK-NEXT: s_mov_b64 s[0:1], exec
105 ; CHECK-NEXT: .LBB2_1: ; %loop
106 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
107 ; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
108 ; CHECK-NEXT: s_cbranch_scc0 .LBB2_4
109 ; CHECK-NEXT: ; %bb.2: ; %loop
110 ; CHECK-NEXT: ; in Loop: Header=BB2_1 Depth=1
111 ; CHECK-NEXT: s_mov_b64 exec, 0
112 ; CHECK-NEXT: s_mov_b64 vcc, exec
113 ; CHECK-NEXT: s_cbranch_execnz .LBB2_1
114 ; CHECK-NEXT: ; %bb.3: ; %DummyReturnBlock
115 ; CHECK-NEXT: s_endpgm
116 ; CHECK-NEXT: .LBB2_4:
117 ; CHECK-NEXT: s_mov_b64 exec, 0
118 ; CHECK-NEXT: exp null off, off, off, off done vm
119 ; CHECK-NEXT: s_endpgm
124 call void @llvm.amdgcn.kill(i1 false) #3
128 ; Check that the epilog is the final block
129 define amdgpu_ps float @return_nonvoid(float %0) #0 {
130 ; CHECK-LABEL: return_nonvoid:
131 ; CHECK: ; %bb.0: ; %main_body
132 ; CHECK-NEXT: s_mov_b64 s[0:1], exec
133 ; CHECK-NEXT: s_mov_b32 s2, 0x41200000
134 ; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0
135 ; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
136 ; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
137 ; CHECK-NEXT: s_cbranch_execz .LBB3_3
138 ; CHECK-NEXT: .LBB3_1: ; %loop
139 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
140 ; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
141 ; CHECK-NEXT: s_cbranch_scc0 .LBB3_4
142 ; CHECK-NEXT: ; %bb.2: ; %loop
143 ; CHECK-NEXT: ; in Loop: Header=BB3_1 Depth=1
144 ; CHECK-NEXT: s_mov_b64 exec, 0
145 ; CHECK-NEXT: s_mov_b64 vcc, exec
146 ; CHECK-NEXT: s_cbranch_execnz .LBB3_1
147 ; CHECK-NEXT: .LBB3_3: ; %Flow1
148 ; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]
149 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
150 ; CHECK-NEXT: s_branch .LBB3_5
151 ; CHECK-NEXT: .LBB3_4:
152 ; CHECK-NEXT: s_mov_b64 exec, 0
153 ; CHECK-NEXT: exp null off, off, off, off done vm
154 ; CHECK-NEXT: s_endpgm
155 ; CHECK-NEXT: .LBB3_5:
157 %cmp = fcmp olt float %0, 1.000000e+01
158 br i1 %cmp, label %end, label %loop
161 call void @llvm.amdgcn.kill(i1 false) #3
168 declare void @llvm.amdgcn.kill(i1) #0
169 declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0
170 declare void @llvm.amdgcn.exp.compr.v2i16(i32 immarg, i32 immarg, <2 x i16>, <2 x i16>, i1 immarg, i1 immarg) #0
172 attributes #0 = { nounwind }