1 ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2 ; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
4 ; Uses llvm.amdgcn.break
6 ; OPT-LABEL: @break_loop(
9 ; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
18 ; OPT: call i64 @llvm.amdgcn.if.break.i64.i64(
19 ; OPT: call i1 @llvm.amdgcn.loop.i64(i64
20 ; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1
23 ; OPT: call void @llvm.amdgcn.end.cf.i64(i64
25 ; GCN-LABEL: {{^}}break_loop:
26 ; GCN: s_mov_b64 [[OUTER_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
28 ; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1
29 ; GCN: v_cmp_lt_i32_e32 vcc, -1
30 ; GCN: s_and_b64 vcc, exec, vcc
31 ; GCN: s_or_b64 [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec
32 ; GCN: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]]
35 ; GCN: buffer_load_dword
36 ; GCN: v_cmp_ge_i32_e32 vcc,
37 ; GCN: s_andn2_b64 [[INNER_MASK]], [[INNER_MASK]], exec
38 ; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], vcc, exec
39 ; GCN: s_or_b64 [[INNER_MASK]], [[INNER_MASK]], [[TMP0]]
41 ; GCN: [[FLOW]]: ; %Flow
42 ; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[INNER_MASK]]
43 ; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[OUTER_MASK]]
44 ; GCN: s_mov_b64 [[OUTER_MASK]], [[TMP1]]
45 ; GCN: s_andn2_b64 exec, exec, [[TMP1]]
46 ; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]]
48 ; GCN: ; %bb.4: ; %bb9
50 define amdgpu_kernel void @break_loop(i32 %arg) #0 {
52 %id = call i32 @llvm.amdgcn.workitem.id.x()
53 %tmp = sub i32 %id, %arg
57 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
58 %lsr.iv.next = add i32 %lsr.iv, 1
59 %cmp0 = icmp slt i32 %lsr.iv.next, 0
60 br i1 %cmp0, label %bb4, label %bb9
63 %load = load volatile i32, i32 addrspace(1)* undef, align 4
64 %cmp1 = icmp slt i32 %tmp, %load
65 br i1 %cmp1, label %bb1, label %bb9
71 ; OPT-LABEL: @undef_phi_cond_break_loop(
73 ; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
74 ; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
75 ; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
76 ; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
77 ; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
80 ; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
81 ; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
82 ; OPT-NEXT: br label %Flow
85 ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
86 ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
87 ; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
88 ; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
89 ; OPT-NEXT: br i1 %1, label %bb9, label %bb1
91 ; OPT: bb9: ; preds = %Flow
92 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
93 ; OPT-NEXT: store volatile i32 7
95 define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
97 %id = call i32 @llvm.amdgcn.workitem.id.x()
98 %tmp = sub i32 %id, %arg
101 bb1: ; preds = %Flow, %bb
102 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
103 %lsr.iv.next = add i32 %lsr.iv, 1
104 %cmp0 = icmp slt i32 %lsr.iv.next, 0
105 br i1 %cmp0, label %bb4, label %Flow
108 %load = load volatile i32, i32 addrspace(1)* undef, align 4
109 %cmp1 = icmp sge i32 %tmp, %load
112 Flow: ; preds = %bb4, %bb1
113 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
114 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
115 br i1 %tmp3, label %bb9, label %bb1
118 store volatile i32 7, i32 addrspace(3)* undef
122 ; FIXME: ConstantExpr compare of address to null folds away
123 @lds = addrspace(3) global i32 undef
125 ; OPT-LABEL: @constexpr_phi_cond_break_loop(
127 ; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
128 ; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
129 ; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
130 ; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
131 ; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
134 ; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
135 ; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
136 ; OPT-NEXT: br label %Flow
139 ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
140 ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
141 ; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
142 ; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
143 ; OPT-NEXT: br i1 %1, label %bb9, label %bb1
145 ; OPT: bb9: ; preds = %Flow
146 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
147 ; OPT-NEXT: store volatile i32 7
149 define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
151 %id = call i32 @llvm.amdgcn.workitem.id.x()
152 %tmp = sub i32 %id, %arg
155 bb1: ; preds = %Flow, %bb
156 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
157 %lsr.iv.next = add i32 %lsr.iv, 1
158 %cmp0 = icmp slt i32 %lsr.iv.next, 0
159 br i1 %cmp0, label %bb4, label %Flow
162 %load = load volatile i32, i32 addrspace(1)* undef, align 4
163 %cmp1 = icmp sge i32 %tmp, %load
166 Flow: ; preds = %bb4, %bb1
167 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
168 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
169 br i1 %tmp3, label %bb9, label %bb1
172 store volatile i32 7, i32 addrspace(3)* undef
176 ; OPT-LABEL: @true_phi_cond_break_loop(
178 ; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
179 ; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
180 ; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
181 ; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
182 ; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
185 ; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
186 ; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
187 ; OPT-NEXT: br label %Flow
190 ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
191 ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
192 ; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
193 ; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
194 ; OPT-NEXT: br i1 %1, label %bb9, label %bb1
196 ; OPT: bb9: ; preds = %Flow
197 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
198 ; OPT-NEXT: store volatile i32 7
200 define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
202 %id = call i32 @llvm.amdgcn.workitem.id.x()
203 %tmp = sub i32 %id, %arg
206 bb1: ; preds = %Flow, %bb
207 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
208 %lsr.iv.next = add i32 %lsr.iv, 1
209 %cmp0 = icmp slt i32 %lsr.iv.next, 0
210 br i1 %cmp0, label %bb4, label %Flow
213 %load = load volatile i32, i32 addrspace(1)* undef, align 4
214 %cmp1 = icmp sge i32 %tmp, %load
217 Flow: ; preds = %bb4, %bb1
218 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
219 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
220 br i1 %tmp3, label %bb9, label %bb1
223 store volatile i32 7, i32 addrspace(3)* undef
227 ; OPT-LABEL: @false_phi_cond_break_loop(
229 ; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
230 ; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
232 ; OPT: br i1 %cmp0, label %bb4, label %Flow
235 ; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
236 ; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
237 ; OPT-NEXT: br label %Flow
240 ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
241 ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
242 ; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
243 ; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
244 ; OPT-NEXT: br i1 %1, label %bb9, label %bb1
246 ; OPT: bb9: ; preds = %Flow
247 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
248 ; OPT-NEXT: store volatile i32 7
250 define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
252 %id = call i32 @llvm.amdgcn.workitem.id.x()
253 %tmp = sub i32 %id, %arg
256 bb1: ; preds = %Flow, %bb
257 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
258 %lsr.iv.next = add i32 %lsr.iv, 1
259 %cmp0 = icmp slt i32 %lsr.iv.next, 0
260 br i1 %cmp0, label %bb4, label %Flow
263 %load = load volatile i32, i32 addrspace(1)* undef, align 4
264 %cmp1 = icmp sge i32 %tmp, %load
267 Flow: ; preds = %bb4, %bb1
268 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
269 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
270 br i1 %tmp3, label %bb9, label %bb1
273 store volatile i32 7, i32 addrspace(3)* undef
277 ; Swap order of branches in flow block so that the true phi is
280 ; OPT-LABEL: @invert_true_phi_cond_break_loop(
282 ; OPT-NEXT: %phi.broken = phi i64 [ %1, %Flow ], [ 0, %bb ]
283 ; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
284 ; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
285 ; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
286 ; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
289 ; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
290 ; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
291 ; OPT-NEXT: br label %Flow
294 ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
295 ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
296 ; OPT-NEXT: %0 = xor i1 %tmp3, true
297 ; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %0, i64 %phi.broken)
298 ; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop.i64(i64 %1)
299 ; OPT-NEXT: br i1 %2, label %bb9, label %bb1
302 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %1)
303 ; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
305 define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
307 %id = call i32 @llvm.amdgcn.workitem.id.x()
308 %tmp = sub i32 %id, %arg
311 bb1: ; preds = %Flow, %bb
312 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
313 %lsr.iv.next = add i32 %lsr.iv, 1
314 %cmp0 = icmp slt i32 %lsr.iv.next, 0
315 br i1 %cmp0, label %bb4, label %Flow
318 %load = load volatile i32, i32 addrspace(1)* undef, align 4
319 %cmp1 = icmp sge i32 %tmp, %load
322 Flow: ; preds = %bb4, %bb1
323 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
324 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
325 br i1 %tmp3, label %bb1, label %bb9
328 store volatile i32 7, i32 addrspace(3)* undef
332 declare i32 @llvm.amdgcn.workitem.id.x() #1
334 attributes #0 = { nounwind }
335 attributes #1 = { nounwind readnone }