1 ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
4 ; Uses llvm.amdgcn.break
6 ; OPT-LABEL: @break_loop(
8 ; OPT: call i64 @llvm.amdgcn.break(i64
9 ; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
14 ; OPT: call i64 @llvm.amdgcn.if.break(
18 ; OPT: call i1 @llvm.amdgcn.loop(i64
19 ; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1
22 ; OPT: call void @llvm.amdgcn.end.cf(i64
24 ; TODO: Can remove exec fixes in return block
25 ; GCN-LABEL: {{^}}break_loop:
26 ; GCN: s_mov_b64 [[INITMASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
28 ; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1
29 ; GCN: s_or_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INITMASK]]
30 ; GCN: v_cmp_lt_i32_e32 vcc, -1
31 ; GCN: s_and_b64 vcc, exec, vcc
32 ; GCN-NEXT: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]]
35 ; GCN: buffer_load_dword
36 ; GCN: v_cmp_ge_i32_e32 vcc,
37 ; GCN: s_or_b64 [[MASK]], vcc, [[INITMASK]]
40 ; GCN: s_mov_b64 [[INITMASK]], [[MASK]]
41 ; GCN: s_andn2_b64 exec, exec, [[MASK]]
42 ; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]]
46 define amdgpu_kernel void @break_loop(i32 %arg) #0 {
48 %id = call i32 @llvm.amdgcn.workitem.id.x()
49 %tmp = sub i32 %id, %arg
53 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
54 %lsr.iv.next = add i32 %lsr.iv, 1
55 %cmp0 = icmp slt i32 %lsr.iv.next, 0
56 br i1 %cmp0, label %bb4, label %bb9
59 %load = load volatile i32, i32 addrspace(1)* undef, align 4
60 %cmp1 = icmp slt i32 %tmp, %load
61 br i1 %cmp1, label %bb1, label %bb9
67 ; OPT-LABEL: @undef_phi_cond_break_loop(
69 ; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
70 ; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
71 ; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 undef, i64 %phi.broken)
72 ; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
75 ; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
76 ; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
77 ; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
78 ; OPT-NEXT: br label %Flow
81 ; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
82 ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
83 ; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
84 ; OPT-NEXT: br i1 %2, label %bb9, label %bb1
86 ; OPT: bb9: ; preds = %Flow
87 ; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
88 ; OPT-NEXT: store volatile i32 7
90 define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
92 %id = call i32 @llvm.amdgcn.workitem.id.x()
93 %tmp = sub i32 %id, %arg
96 bb1: ; preds = %Flow, %bb
97 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
98 %lsr.iv.next = add i32 %lsr.iv, 1
99 %cmp0 = icmp slt i32 %lsr.iv.next, 0
100 br i1 %cmp0, label %bb4, label %Flow
103 %load = load volatile i32, i32 addrspace(1)* undef, align 4
104 %cmp1 = icmp sge i32 %tmp, %load
107 Flow: ; preds = %bb4, %bb1
108 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
109 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
110 br i1 %tmp3, label %bb9, label %bb1
113 store volatile i32 7, i32 addrspace(3)* undef
117 ; FIXME: ConstantExpr compare of address to null folds away
118 @lds = addrspace(3) global i32 undef
120 ; OPT-LABEL: @constexpr_phi_cond_break_loop(
122 ; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
123 ; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
124 ; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), i64 %phi.broken)
125 ; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
128 ; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
129 ; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
130 ; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
131 ; OPT-NEXT: br label %Flow
134 ; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
135 ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
136 ; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
137 ; OPT-NEXT: br i1 %2, label %bb9, label %bb1
139 ; OPT: bb9: ; preds = %Flow
140 ; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
141 ; OPT-NEXT: store volatile i32 7
143 define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
145 %id = call i32 @llvm.amdgcn.workitem.id.x()
146 %tmp = sub i32 %id, %arg
149 bb1: ; preds = %Flow, %bb
150 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
151 %lsr.iv.next = add i32 %lsr.iv, 1
152 %cmp0 = icmp slt i32 %lsr.iv.next, 0
153 br i1 %cmp0, label %bb4, label %Flow
156 %load = load volatile i32, i32 addrspace(1)* undef, align 4
157 %cmp1 = icmp sge i32 %tmp, %load
160 Flow: ; preds = %bb4, %bb1
161 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
162 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
163 br i1 %tmp3, label %bb9, label %bb1
166 store volatile i32 7, i32 addrspace(3)* undef
170 ; OPT-LABEL: @true_phi_cond_break_loop(
172 ; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
173 ; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
174 ; OPT: %0 = call i64 @llvm.amdgcn.break(i64 %phi.broken)
175 ; OPT: br i1 %cmp0, label %bb4, label %Flow
178 ; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
179 ; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
180 ; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
181 ; OPT-NEXT: br label %Flow
184 ; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
185 ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
186 ; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
187 ; OPT-NEXT: br i1 %2, label %bb9, label %bb1
189 ; OPT: bb9: ; preds = %Flow
190 ; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
191 ; OPT-NEXT: store volatile i32 7
193 define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
195 %id = call i32 @llvm.amdgcn.workitem.id.x()
196 %tmp = sub i32 %id, %arg
199 bb1: ; preds = %Flow, %bb
200 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
201 %lsr.iv.next = add i32 %lsr.iv, 1
202 %cmp0 = icmp slt i32 %lsr.iv.next, 0
203 br i1 %cmp0, label %bb4, label %Flow
206 %load = load volatile i32, i32 addrspace(1)* undef, align 4
207 %cmp1 = icmp sge i32 %tmp, %load
210 Flow: ; preds = %bb4, %bb1
211 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
212 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
213 br i1 %tmp3, label %bb9, label %bb1
216 store volatile i32 7, i32 addrspace(3)* undef
220 ; OPT-LABEL: @false_phi_cond_break_loop(
222 ; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
223 ; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
225 ; OPT: br i1 %cmp0, label %bb4, label %Flow
228 ; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
229 ; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
230 ; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
231 ; OPT-NEXT: br label %Flow
234 ; OPT-NEXT: %loop.phi = phi i64 [ %0, %bb4 ], [ %phi.broken, %bb1 ]
235 ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
236 ; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
237 ; OPT-NEXT: br i1 %1, label %bb9, label %bb1
239 ; OPT: bb9: ; preds = %Flow
240 ; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
241 ; OPT-NEXT: store volatile i32 7
243 define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
245 %id = call i32 @llvm.amdgcn.workitem.id.x()
246 %tmp = sub i32 %id, %arg
249 bb1: ; preds = %Flow, %bb
250 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
251 %lsr.iv.next = add i32 %lsr.iv, 1
252 %cmp0 = icmp slt i32 %lsr.iv.next, 0
253 br i1 %cmp0, label %bb4, label %Flow
256 %load = load volatile i32, i32 addrspace(1)* undef, align 4
257 %cmp1 = icmp sge i32 %tmp, %load
260 Flow: ; preds = %bb4, %bb1
261 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
262 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
263 br i1 %tmp3, label %bb9, label %bb1
266 store volatile i32 7, i32 addrspace(3)* undef
270 ; Swap order of branches in flow block so that the true phi is
273 ; OPT-LABEL: @invert_true_phi_cond_break_loop(
275 ; OPT-NEXT: %phi.broken = phi i64 [ %1, %Flow ], [ 0, %bb ]
276 ; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
277 ; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
278 ; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
279 ; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
282 ; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
283 ; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
284 ; OPT-NEXT: br label %Flow
287 ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
288 ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
289 ; OPT-NEXT: %0 = xor i1 %tmp3, true
290 ; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %0, i64 %phi.broken)
291 ; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %1)
292 ; OPT-NEXT: br i1 %2, label %bb9, label %bb1
295 ; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %1)
296 ; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
298 define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
300 %id = call i32 @llvm.amdgcn.workitem.id.x()
301 %tmp = sub i32 %id, %arg
304 bb1: ; preds = %Flow, %bb
305 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
306 %lsr.iv.next = add i32 %lsr.iv, 1
307 %cmp0 = icmp slt i32 %lsr.iv.next, 0
308 br i1 %cmp0, label %bb4, label %Flow
311 %load = load volatile i32, i32 addrspace(1)* undef, align 4
312 %cmp1 = icmp sge i32 %tmp, %load
315 Flow: ; preds = %bb4, %bb1
316 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
317 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
318 br i1 %tmp3, label %bb1, label %bb9
321 store volatile i32 7, i32 addrspace(3)* undef
325 declare i32 @llvm.amdgcn.workitem.id.x() #1
327 attributes #0 = { nounwind }
328 attributes #1 = { nounwind readnone }