1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3 ; GCN-LABEL: {{^}}simple_nested_if:
4 ; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
5 ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
6 ; GCN-NEXT: s_cbranch_execz [[ENDIF]]
7 ; GCN: s_and_b64 exec, exec, vcc
8 ; GCN-NEXT: ; mask branch [[ENDIF]]
9 ; GCN-NEXT: {{^BB[0-9_]+}}:
11 ; GCN-NEXT: {{^}}[[ENDIF]]:
13 define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) {
15 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
16 %tmp1 = icmp ugt i32 %tmp, 1
17 br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
19 bb.outer.then: ; preds = %bb
20 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
21 store i32 0, i32 addrspace(1)* %tmp4, align 4
22 %tmp5 = icmp eq i32 %tmp, 2
23 br i1 %tmp5, label %bb.outer.end, label %bb.inner.then
25 bb.inner.then: ; preds = %bb.outer.then
26 %tmp7 = add i32 %tmp, 1
27 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
28 store i32 1, i32 addrspace(1)* %tmp9, align 4
29 br label %bb.outer.end
31 bb.outer.end: ; preds = %bb.outer.then, %bb.inner.then, %bb
35 ; GCN-LABEL: {{^}}uncollapsable_nested_if:
36 ; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
37 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
38 ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
39 ; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
40 ; GCN-NEXT: ; mask branch [[ENDIF_INNER:BB[0-9_]+]]
41 ; GCN-NEXT: {{^BB[0-9_]+}}:
43 ; GCN-NEXT: {{^}}[[ENDIF_INNER]]:
44 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]]
46 ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
48 define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) {
50 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
51 %tmp1 = icmp ugt i32 %tmp, 1
52 br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
54 bb.outer.then: ; preds = %bb
55 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
56 store i32 0, i32 addrspace(1)* %tmp4, align 4
57 %tmp5 = icmp eq i32 %tmp, 2
58 br i1 %tmp5, label %bb.inner.end, label %bb.inner.then
60 bb.inner.then: ; preds = %bb.outer.then
61 %tmp7 = add i32 %tmp, 1
62 %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
63 store i32 1, i32 addrspace(1)* %tmp8, align 4
64 br label %bb.inner.end
66 bb.inner.end: ; preds = %bb.inner.then, %bb.outer.then
67 %tmp9 = add i32 %tmp, 2
68 %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp9
69 store i32 2, i32 addrspace(1)* %tmp10, align 4
70 br label %bb.outer.end
72 bb.outer.end: ; preds = %bb.inner.then, %bb
76 ; GCN-LABEL: {{^}}nested_if_if_else:
77 ; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
78 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
79 ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
80 ; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
81 ; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]]
82 ; GCN-NEXT: ; mask branch [[THEN_INNER:BB[0-9_]+]]
83 ; GCN-NEXT: {{^BB[0-9_]+}}:
85 ; GCN-NEXT: {{^}}[[THEN_INNER]]:
86 ; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_INNER3:s\[[0-9:]+\]]], [[SAVEEXEC_INNER2]]
87 ; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_INNER3]]
88 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
90 ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
92 define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) {
94 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
95 %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
96 store i32 0, i32 addrspace(1)* %tmp1, align 4
97 %tmp2 = icmp ugt i32 %tmp, 1
98 br i1 %tmp2, label %bb.outer.then, label %bb.outer.end
100 bb.outer.then: ; preds = %bb
101 %tmp5 = icmp eq i32 %tmp, 2
102 br i1 %tmp5, label %bb.then, label %bb.else
104 bb.then: ; preds = %bb.outer.then
105 %tmp3 = add i32 %tmp, 1
106 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp3
107 store i32 1, i32 addrspace(1)* %tmp4, align 4
108 br label %bb.outer.end
110 bb.else: ; preds = %bb.outer.then
111 %tmp7 = add i32 %tmp, 2
112 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
113 store i32 2, i32 addrspace(1)* %tmp9, align 4
114 br label %bb.outer.end
116 bb.outer.end: ; preds = %bb, %bb.then, %bb.else
120 ; GCN-LABEL: {{^}}nested_if_else_if:
121 ; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
122 ; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]]
123 ; GCN-NEXT: ; mask branch [[THEN_OUTER:BB[0-9_]+]]
124 ; GCN-NEXT: s_cbranch_execz [[THEN_OUTER]]
125 ; GCN-NEXT: {{^BB[0-9_]+}}:
127 ; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]]
128 ; GCN-NEXT: ; mask branch [[THEN_OUTER_FLOW:BB[0-9_]+]]
129 ; GCN-NEXT: {{^BB[0-9_]+}}:
131 ; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]:
132 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]]
133 ; GCN-NEXT: {{^}}[[THEN_OUTER]]:
134 ; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]]
135 ; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]]
136 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
137 ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
138 ; GCN-NEXT: {{^BB[0-9_]+}}:
140 ; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_THEN:s\[[0-9:]+\]]]
141 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
142 ; GCN-NEXT: {{^BB[0-9_]+}}:
144 ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
146 define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) {
148 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
149 %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
150 store i32 0, i32 addrspace(1)* %tmp1, align 4
151 %cc1 = icmp ugt i32 %tmp, 1
152 br i1 %cc1, label %bb.outer.then, label %bb.outer.else
155 %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 1
156 store i32 1, i32 addrspace(1)* %tmp2, align 4
157 %cc2 = icmp eq i32 %tmp, 2
158 br i1 %cc2, label %bb.inner.then, label %bb.outer.end
161 %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 2
162 store i32 2, i32 addrspace(1)* %tmp3, align 4
163 br label %bb.outer.end
166 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 3
167 store i32 3, i32 addrspace(1)* %tmp4, align 4
168 %cc3 = icmp eq i32 %tmp, 2
169 br i1 %cc3, label %bb.inner.then2, label %bb.outer.end
172 %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 4
173 store i32 4, i32 addrspace(1)* %tmp5, align 4
174 br label %bb.outer.end
180 ; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier:
181 ; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
182 ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
183 ; GCN-NEXT: {{^BB[0-9_]+}}:
185 ; GCN-NEXT: {{^}}[[ENDIF]]:
186 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
189 define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) {
191 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
192 %tmp1 = icmp ugt i32 %tmp, 1
193 br i1 %tmp1, label %bb.then, label %bb.end
195 bb.then: ; preds = %bb
196 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
197 store i32 0, i32 addrspace(1)* %tmp4, align 4
200 bb.end: ; preds = %bb.then, %bb
201 call void @llvm.amdgcn.s.barrier()
205 ; Make sure scc liveness is updated if sor_b64 is removed
206 ; GCN-LABEL: {{^}}scc_liveness:
208 ; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]:
209 ; GCN: s_andn2_b64 exec, exec,
210 ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
212 ; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
213 ; GCN: s_and_b64 exec, exec, {{vcc|s\[[0-9:]+\]}}
215 ; GCN-NOT: s_or_b64 exec, exec
217 ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
219 ; GCN-NEXT: s_cbranch_execnz
221 ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
222 ; GCN: buffer_store_dword
223 ; GCN: buffer_store_dword
224 ; GCN: buffer_store_dword
225 ; GCN: buffer_store_dword
227 define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
231 bb1: ; preds = %Flow1, %bb1, %bb
232 %tmp = icmp slt i32 %arg, 519
233 br i1 %tmp, label %bb2, label %bb1
236 %tmp3 = icmp eq i32 %arg, 0
237 br i1 %tmp3, label %bb4, label %bb10
240 %tmp6 = load float, float addrspace(5)* undef
241 %tmp7 = fcmp olt float %tmp6, 0.0
242 br i1 %tmp7, label %bb8, label %Flow
245 %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1
248 Flow: ; preds = %bb8, %bb4
249 %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ]
252 bb10: ; preds = %Flow, %bb2
253 %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ]
254 br i1 %tmp3, label %bb12, label %Flow1
256 Flow1: ; preds = %bb10
259 bb12: ; preds = %bb10
260 store volatile <4 x float> %tmp11, <4 x float> addrspace(5)* undef, align 16
264 declare i32 @llvm.amdgcn.workitem.id.x() #0
265 declare void @llvm.amdgcn.s.barrier() #1
267 attributes #0 = { nounwind readnone speculatable }
268 attributes #1 = { nounwind convergent }
269 attributes #2 = { nounwind }