1 ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2 ; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
4 ; Uses llvm.amdgcn.break
6 ; OPT-LABEL: @break_loop(
9 ; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
18 ; OPT: call i64 @llvm.amdgcn.if.break.i64.i64(
19 ; OPT: call i1 @llvm.amdgcn.loop.i64(i64
20 ; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1
23 ; OPT: call void @llvm.amdgcn.end.cf.i64(i64
25 ; GCN-LABEL: {{^}}break_loop:
26 ; GCN: s_mov_b64 [[ACCUM_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
28 ; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1
29 ; GCN: s_add_i32 s4, s4, 1
30 ; GCN: s_or_b64 [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec
31 ; GCN: s_cmp_gt_i32 s4, -1
32 ; GCN: s_cbranch_scc1 [[FLOW:BB[0-9]+_[0-9]+]]
35 ; GCN: buffer_load_dword
36 ; GCN: v_cmp_ge_i32_e32 vcc
37 ; GCN: s_andn2_b64 [[INNER_MASK]], [[INNER_MASK]], exec
38 ; GCN: s_and_b64 [[BROKEN_MASK:s\[[0-9]+:[0-9]+\]]], vcc, exec
39 ; GCN: s_or_b64 [[INNER_MASK]], [[INNER_MASK]], [[BROKEN_MASK]]
41 ; GCN: [[FLOW]]: ; %Flow
42 ; GCN: ; in Loop: Header=BB0_1 Depth=1
43 ; GCN: s_and_b64 [[BROKEN_MASK]], exec, [[INNER_MASK]]
44 ; GCN: s_or_b64 [[BROKEN_MASK]], [[BROKEN_MASK]], [[ACCUM_MASK]]
45 ; GCN: s_mov_b64 [[ACCUM_MASK]], [[BROKEN_MASK]]
46 ; GCN: s_andn2_b64 exec, exec, [[BROKEN_MASK]]
47 ; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]]
49 ; GCN: ; %bb.4: ; %bb9
51 define amdgpu_kernel void @break_loop(i32 %arg) #0 {
53 %id = call i32 @llvm.amdgcn.workitem.id.x()
54 %tmp = sub i32 %id, %arg
58 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
59 %lsr.iv.next = add i32 %lsr.iv, 1
60 %cmp0 = icmp slt i32 %lsr.iv.next, 0
61 br i1 %cmp0, label %bb4, label %bb9
64 %load = load volatile i32, i32 addrspace(1)* undef, align 4
65 %cmp1 = icmp slt i32 %tmp, %load
66 br i1 %cmp1, label %bb1, label %bb9
72 ; OPT-LABEL: @undef_phi_cond_break_loop(
74 ; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
75 ; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
76 ; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
77 ; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
78 ; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
81 ; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
82 ; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
83 ; OPT-NEXT: br label %Flow
86 ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
87 ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
88 ; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
89 ; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
90 ; OPT-NEXT: br i1 %1, label %bb9, label %bb1
92 ; OPT: bb9: ; preds = %Flow
93 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
94 ; OPT-NEXT: store volatile i32 7
96 define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
98 %id = call i32 @llvm.amdgcn.workitem.id.x()
99 %tmp = sub i32 %id, %arg
102 bb1: ; preds = %Flow, %bb
103 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
104 %lsr.iv.next = add i32 %lsr.iv, 1
105 %cmp0 = icmp slt i32 %lsr.iv.next, 0
106 br i1 %cmp0, label %bb4, label %Flow
109 %load = load volatile i32, i32 addrspace(1)* undef, align 4
110 %cmp1 = icmp sge i32 %tmp, %load
113 Flow: ; preds = %bb4, %bb1
114 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
115 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
116 br i1 %tmp3, label %bb9, label %bb1
119 store volatile i32 7, i32 addrspace(3)* undef
123 ; FIXME: ConstantExpr compare of address to null folds away
124 @lds = addrspace(3) global i32 undef
126 ; OPT-LABEL: @constexpr_phi_cond_break_loop(
128 ; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
129 ; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
130 ; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
131 ; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
132 ; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
135 ; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
136 ; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
137 ; OPT-NEXT: br label %Flow
140 ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
141 ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
142 ; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
143 ; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
144 ; OPT-NEXT: br i1 %1, label %bb9, label %bb1
146 ; OPT: bb9: ; preds = %Flow
147 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
148 ; OPT-NEXT: store volatile i32 7
150 define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
152 %id = call i32 @llvm.amdgcn.workitem.id.x()
153 %tmp = sub i32 %id, %arg
156 bb1: ; preds = %Flow, %bb
157 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
158 %lsr.iv.next = add i32 %lsr.iv, 1
159 %cmp0 = icmp slt i32 %lsr.iv.next, 0
160 br i1 %cmp0, label %bb4, label %Flow
163 %load = load volatile i32, i32 addrspace(1)* undef, align 4
164 %cmp1 = icmp sge i32 %tmp, %load
167 Flow: ; preds = %bb4, %bb1
168 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
169 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
170 br i1 %tmp3, label %bb9, label %bb1
173 store volatile i32 7, i32 addrspace(3)* undef
177 ; OPT-LABEL: @true_phi_cond_break_loop(
179 ; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
180 ; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
181 ; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
182 ; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
183 ; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
186 ; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
187 ; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
188 ; OPT-NEXT: br label %Flow
191 ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
192 ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
193 ; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
194 ; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
195 ; OPT-NEXT: br i1 %1, label %bb9, label %bb1
197 ; OPT: bb9: ; preds = %Flow
198 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
199 ; OPT-NEXT: store volatile i32 7
201 define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
203 %id = call i32 @llvm.amdgcn.workitem.id.x()
204 %tmp = sub i32 %id, %arg
207 bb1: ; preds = %Flow, %bb
208 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
209 %lsr.iv.next = add i32 %lsr.iv, 1
210 %cmp0 = icmp slt i32 %lsr.iv.next, 0
211 br i1 %cmp0, label %bb4, label %Flow
214 %load = load volatile i32, i32 addrspace(1)* undef, align 4
215 %cmp1 = icmp sge i32 %tmp, %load
218 Flow: ; preds = %bb4, %bb1
219 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
220 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
221 br i1 %tmp3, label %bb9, label %bb1
224 store volatile i32 7, i32 addrspace(3)* undef
228 ; OPT-LABEL: @false_phi_cond_break_loop(
230 ; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
231 ; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
233 ; OPT: br i1 %cmp0, label %bb4, label %Flow
236 ; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
237 ; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
238 ; OPT-NEXT: br label %Flow
241 ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
242 ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
243 ; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
244 ; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
245 ; OPT-NEXT: br i1 %1, label %bb9, label %bb1
247 ; OPT: bb9: ; preds = %Flow
248 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
249 ; OPT-NEXT: store volatile i32 7
251 define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
253 %id = call i32 @llvm.amdgcn.workitem.id.x()
254 %tmp = sub i32 %id, %arg
257 bb1: ; preds = %Flow, %bb
258 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
259 %lsr.iv.next = add i32 %lsr.iv, 1
260 %cmp0 = icmp slt i32 %lsr.iv.next, 0
261 br i1 %cmp0, label %bb4, label %Flow
264 %load = load volatile i32, i32 addrspace(1)* undef, align 4
265 %cmp1 = icmp sge i32 %tmp, %load
268 Flow: ; preds = %bb4, %bb1
269 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
270 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
271 br i1 %tmp3, label %bb9, label %bb1
274 store volatile i32 7, i32 addrspace(3)* undef
278 ; Swap order of branches in flow block so that the true phi is
281 ; OPT-LABEL: @invert_true_phi_cond_break_loop(
283 ; OPT-NEXT: %phi.broken = phi i64 [ %1, %Flow ], [ 0, %bb ]
284 ; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
285 ; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
286 ; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
287 ; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
290 ; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
291 ; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
292 ; OPT-NEXT: br label %Flow
295 ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
296 ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
297 ; OPT-NEXT: %0 = xor i1 %tmp3, true
298 ; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %0, i64 %phi.broken)
299 ; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop.i64(i64 %1)
300 ; OPT-NEXT: br i1 %2, label %bb9, label %bb1
303 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %1)
304 ; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
306 define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
308 %id = call i32 @llvm.amdgcn.workitem.id.x()
309 %tmp = sub i32 %id, %arg
312 bb1: ; preds = %Flow, %bb
313 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
314 %lsr.iv.next = add i32 %lsr.iv, 1
315 %cmp0 = icmp slt i32 %lsr.iv.next, 0
316 br i1 %cmp0, label %bb4, label %Flow
319 %load = load volatile i32, i32 addrspace(1)* undef, align 4
320 %cmp1 = icmp sge i32 %tmp, %load
323 Flow: ; preds = %bb4, %bb1
324 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
325 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
326 br i1 %tmp3, label %bb1, label %bb9
329 store volatile i32 7, i32 addrspace(3)* undef
333 declare i32 @llvm.amdgcn.workitem.id.x() #1
335 attributes #0 = { nounwind }
336 attributes #1 = { nounwind readnone }