1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
3 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
5 ; Uses llvm.amdgcn.break
7 define amdgpu_kernel void @break_loop(i32 %arg) #0 {
8 ; OPT-LABEL: @break_loop(
10 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
11 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
12 ; OPT-NEXT: br label [[BB1:%.*]]
14 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
15 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[TMP0:%.*]], [[FLOW]] ]
16 ; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
17 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
18 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
20 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
21 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
22 ; OPT-NEXT: br label [[FLOW]]
24 ; OPT-NEXT: [[TMP0]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
25 ; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
26 ; OPT-NEXT: [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
27 ; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
28 ; OPT-NEXT: br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]]
30 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
33 ; GCN-LABEL: break_loop:
35 ; GCN-NEXT: s_load_dword s3, s[4:5], 0x9
36 ; GCN-NEXT: s_mov_b64 s[0:1], 0
37 ; GCN-NEXT: s_mov_b32 s2, -1
38 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
39 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
40 ; GCN-NEXT: s_mov_b32 s3, 0xf000
41 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
42 ; GCN-NEXT: ; implicit-def: $sgpr6
43 ; GCN-NEXT: .LBB0_1: ; %bb1
44 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
45 ; GCN-NEXT: s_add_i32 s6, s6, 1
46 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
47 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
48 ; GCN-NEXT: s_cbranch_scc0 .LBB0_3
49 ; GCN-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
50 ; GCN-NEXT: ; implicit-def: $sgpr6
51 ; GCN-NEXT: s_branch .LBB0_4
52 ; GCN-NEXT: .LBB0_3: ; %bb4
53 ; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1
54 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
55 ; GCN-NEXT: s_waitcnt vmcnt(0)
56 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
57 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
58 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
59 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
60 ; GCN-NEXT: .LBB0_4: ; %Flow
61 ; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1
62 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
63 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
64 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
65 ; GCN-NEXT: s_cbranch_execnz .LBB0_1
66 ; GCN-NEXT: ; %bb.5: ; %bb9
69 %id = call i32 @llvm.amdgcn.workitem.id.x()
70 %my.tmp = sub i32 %id, %arg
74 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
75 %lsr.iv.next = add i32 %lsr.iv, 1
76 %cmp0 = icmp slt i32 %lsr.iv.next, 0
77 br i1 %cmp0, label %bb4, label %bb9
80 %load = load volatile i32, ptr addrspace(1) undef, align 4
81 %cmp1 = icmp slt i32 %my.tmp, %load
82 br i1 %cmp1, label %bb1, label %bb9
88 define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
89 ; OPT-LABEL: @undef_phi_cond_break_loop(
91 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
92 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
93 ; OPT-NEXT: br label [[BB1:%.*]]
95 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
96 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
97 ; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
98 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
99 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
101 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
102 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
103 ; OPT-NEXT: br label [[FLOW]]
105 ; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
106 ; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ]
107 ; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
108 ; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
109 ; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
111 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
112 ; OPT-NEXT: store volatile i32 7, ptr addrspace(3) undef, align 4
115 ; GCN-LABEL: undef_phi_cond_break_loop:
116 ; GCN: ; %bb.0: ; %bb
117 ; GCN-NEXT: s_load_dword s3, s[4:5], 0x9
118 ; GCN-NEXT: s_mov_b64 s[0:1], 0
119 ; GCN-NEXT: s_mov_b32 s2, -1
120 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
121 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
122 ; GCN-NEXT: s_mov_b32 s3, 0xf000
123 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
124 ; GCN-NEXT: ; implicit-def: $sgpr6
125 ; GCN-NEXT: .LBB1_1: ; %bb1
126 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
127 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
128 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
129 ; GCN-NEXT: s_cbranch_scc1 .LBB1_3
130 ; GCN-NEXT: ; %bb.2: ; %bb4
131 ; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1
132 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
133 ; GCN-NEXT: s_waitcnt vmcnt(0)
134 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
135 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
136 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
137 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
138 ; GCN-NEXT: .LBB1_3: ; %Flow
139 ; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1
140 ; GCN-NEXT: s_add_i32 s6, s6, 1
141 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
142 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
143 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
144 ; GCN-NEXT: s_cbranch_execnz .LBB1_1
145 ; GCN-NEXT: ; %bb.4: ; %bb9
146 ; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
147 ; GCN-NEXT: v_mov_b32_e32 v0, 7
148 ; GCN-NEXT: s_mov_b32 m0, -1
149 ; GCN-NEXT: ds_write_b32 v0, v0
152 %id = call i32 @llvm.amdgcn.workitem.id.x()
153 %my.tmp = sub i32 %id, %arg
156 bb1: ; preds = %Flow, %bb
157 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
158 %lsr.iv.next = add i32 %lsr.iv, 1
159 %cmp0 = icmp slt i32 %lsr.iv.next, 0
160 br i1 %cmp0, label %bb4, label %Flow
163 %load = load volatile i32, ptr addrspace(1) undef, align 4
164 %cmp1 = icmp sge i32 %my.tmp, %load
167 Flow: ; preds = %bb4, %bb1
168 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
169 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
170 br i1 %my.tmp3, label %bb9, label %bb1
173 store volatile i32 7, ptr addrspace(3) undef
177 ; FIXME: ConstantExpr compare of address to null folds away
178 @lds = addrspace(3) global i32 undef
180 define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
181 ; OPT-LABEL: @constexpr_phi_cond_break_loop(
183 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
184 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
185 ; OPT-NEXT: br label [[BB1:%.*]]
187 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
188 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
189 ; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
190 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
191 ; OPT-NEXT: [[CMP2:%.*]] = icmp ne ptr addrspace(3) inttoptr (i32 4 to ptr addrspace(3)), @lds
192 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
194 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
195 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
196 ; OPT-NEXT: br label [[FLOW]]
198 ; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
199 ; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ [[CMP2]], [[BB1]] ]
200 ; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
201 ; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
202 ; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
204 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
205 ; OPT-NEXT: store volatile i32 7, ptr addrspace(3) undef, align 4
208 ; GCN-LABEL: constexpr_phi_cond_break_loop:
209 ; GCN: ; %bb.0: ; %bb
210 ; GCN-NEXT: s_load_dword s3, s[4:5], 0x9
211 ; GCN-NEXT: s_mov_b64 s[0:1], 0
212 ; GCN-NEXT: s_mov_b32 s2, -1
213 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
214 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
215 ; GCN-NEXT: s_mov_b32 s3, 0xf000
216 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
217 ; GCN-NEXT: ; implicit-def: $sgpr6
218 ; GCN-NEXT: .LBB2_1: ; %bb1
219 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
220 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
221 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
222 ; GCN-NEXT: s_cbranch_scc1 .LBB2_3
223 ; GCN-NEXT: ; %bb.2: ; %bb4
224 ; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1
225 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
226 ; GCN-NEXT: s_waitcnt vmcnt(0)
227 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
228 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
229 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
230 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
231 ; GCN-NEXT: .LBB2_3: ; %Flow
232 ; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1
233 ; GCN-NEXT: s_add_i32 s6, s6, 1
234 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
235 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
236 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
237 ; GCN-NEXT: s_cbranch_execnz .LBB2_1
238 ; GCN-NEXT: ; %bb.4: ; %bb9
239 ; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
240 ; GCN-NEXT: v_mov_b32_e32 v0, 7
241 ; GCN-NEXT: s_mov_b32 m0, -1
242 ; GCN-NEXT: ds_write_b32 v0, v0
245 %id = call i32 @llvm.amdgcn.workitem.id.x()
246 %my.tmp = sub i32 %id, %arg
249 bb1: ; preds = %Flow, %bb
250 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
251 %lsr.iv.next = add i32 %lsr.iv, 1
252 %cmp0 = icmp slt i32 %lsr.iv.next, 0
253 %cmp2 = icmp ne ptr addrspace(3) inttoptr (i32 4 to ptr addrspace(3)), @lds
254 br i1 %cmp0, label %bb4, label %Flow
257 %load = load volatile i32, ptr addrspace(1) undef, align 4
258 %cmp1 = icmp sge i32 %my.tmp, %load
261 Flow: ; preds = %bb4, %bb1
262 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
263 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ %cmp2, %bb1 ]
264 br i1 %my.tmp3, label %bb9, label %bb1
267 store volatile i32 7, ptr addrspace(3) undef
271 define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
272 ; OPT-LABEL: @true_phi_cond_break_loop(
274 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
275 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
276 ; OPT-NEXT: br label [[BB1:%.*]]
278 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
279 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
280 ; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
281 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
282 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
284 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
285 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
286 ; OPT-NEXT: br label [[FLOW]]
288 ; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
289 ; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
290 ; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
291 ; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
292 ; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
294 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
295 ; OPT-NEXT: store volatile i32 7, ptr addrspace(3) undef, align 4
298 ; GCN-LABEL: true_phi_cond_break_loop:
299 ; GCN: ; %bb.0: ; %bb
300 ; GCN-NEXT: s_load_dword s3, s[4:5], 0x9
301 ; GCN-NEXT: s_mov_b64 s[0:1], 0
302 ; GCN-NEXT: s_mov_b32 s2, -1
303 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
304 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
305 ; GCN-NEXT: s_mov_b32 s3, 0xf000
306 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
307 ; GCN-NEXT: ; implicit-def: $sgpr6
308 ; GCN-NEXT: .LBB3_1: ; %bb1
309 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
310 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
311 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
312 ; GCN-NEXT: s_cbranch_scc1 .LBB3_3
313 ; GCN-NEXT: ; %bb.2: ; %bb4
314 ; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1
315 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
316 ; GCN-NEXT: s_waitcnt vmcnt(0)
317 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
318 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
319 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
320 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
321 ; GCN-NEXT: .LBB3_3: ; %Flow
322 ; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1
323 ; GCN-NEXT: s_add_i32 s6, s6, 1
324 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
325 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
326 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
327 ; GCN-NEXT: s_cbranch_execnz .LBB3_1
328 ; GCN-NEXT: ; %bb.4: ; %bb9
329 ; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
330 ; GCN-NEXT: v_mov_b32_e32 v0, 7
331 ; GCN-NEXT: s_mov_b32 m0, -1
332 ; GCN-NEXT: ds_write_b32 v0, v0
335 %id = call i32 @llvm.amdgcn.workitem.id.x()
336 %my.tmp = sub i32 %id, %arg
339 bb1: ; preds = %Flow, %bb
340 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
341 %lsr.iv.next = add i32 %lsr.iv, 1
342 %cmp0 = icmp slt i32 %lsr.iv.next, 0
343 br i1 %cmp0, label %bb4, label %Flow
346 %load = load volatile i32, ptr addrspace(1) undef, align 4
347 %cmp1 = icmp sge i32 %my.tmp, %load
350 Flow: ; preds = %bb4, %bb1
351 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
352 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
353 br i1 %my.tmp3, label %bb9, label %bb1
356 store volatile i32 7, ptr addrspace(3) undef
360 define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
361 ; OPT-LABEL: @false_phi_cond_break_loop(
363 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
364 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
365 ; OPT-NEXT: br label [[BB1:%.*]]
367 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
368 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
369 ; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
370 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
371 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
373 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
374 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
375 ; OPT-NEXT: br label [[FLOW]]
377 ; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
378 ; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ]
379 ; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
380 ; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
381 ; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
383 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
384 ; OPT-NEXT: store volatile i32 7, ptr addrspace(3) undef, align 4
387 ; GCN-LABEL: false_phi_cond_break_loop:
388 ; GCN: ; %bb.0: ; %bb
389 ; GCN-NEXT: s_load_dword s3, s[4:5], 0x9
390 ; GCN-NEXT: s_mov_b64 s[0:1], 0
391 ; GCN-NEXT: s_mov_b32 s2, -1
392 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
393 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
394 ; GCN-NEXT: s_mov_b32 s3, 0xf000
395 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
396 ; GCN-NEXT: ; implicit-def: $sgpr6
397 ; GCN-NEXT: .LBB4_1: ; %bb1
398 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
399 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
400 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
401 ; GCN-NEXT: s_cbranch_scc1 .LBB4_3
402 ; GCN-NEXT: ; %bb.2: ; %bb4
403 ; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1
404 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
405 ; GCN-NEXT: s_waitcnt vmcnt(0)
406 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
407 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
408 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
409 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
410 ; GCN-NEXT: .LBB4_3: ; %Flow
411 ; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1
412 ; GCN-NEXT: s_add_i32 s6, s6, 1
413 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
414 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
415 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
416 ; GCN-NEXT: s_cbranch_execnz .LBB4_1
417 ; GCN-NEXT: ; %bb.4: ; %bb9
418 ; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
419 ; GCN-NEXT: v_mov_b32_e32 v0, 7
420 ; GCN-NEXT: s_mov_b32 m0, -1
421 ; GCN-NEXT: ds_write_b32 v0, v0
424 %id = call i32 @llvm.amdgcn.workitem.id.x()
425 %my.tmp = sub i32 %id, %arg
428 bb1: ; preds = %Flow, %bb
429 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
430 %lsr.iv.next = add i32 %lsr.iv, 1
431 %cmp0 = icmp slt i32 %lsr.iv.next, 0
432 br i1 %cmp0, label %bb4, label %Flow
435 %load = load volatile i32, ptr addrspace(1) undef, align 4
436 %cmp1 = icmp sge i32 %my.tmp, %load
439 Flow: ; preds = %bb4, %bb1
440 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
441 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
442 br i1 %my.tmp3, label %bb9, label %bb1
445 store volatile i32 7, ptr addrspace(3) undef
449 ; Swap order of branches in flow block so that the true phi is
452 define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
453 ; OPT-LABEL: @invert_true_phi_cond_break_loop(
455 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
456 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
457 ; OPT-NEXT: br label [[BB1:%.*]]
459 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
460 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
461 ; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
462 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
463 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
465 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
466 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
467 ; OPT-NEXT: br label [[FLOW]]
469 ; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
470 ; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
471 ; OPT-NEXT: [[MY_TMP3_INV:%.*]] = xor i1 [[MY_TMP3]], true
472 ; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3_INV]], i64 [[PHI_BROKEN]])
473 ; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
474 ; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
476 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
477 ; OPT-NEXT: store volatile i32 7, ptr addrspace(3) undef, align 4
480 ; GCN-LABEL: invert_true_phi_cond_break_loop:
481 ; GCN: ; %bb.0: ; %bb
482 ; GCN-NEXT: s_load_dword s3, s[4:5], 0x9
483 ; GCN-NEXT: s_mov_b64 s[0:1], 0
484 ; GCN-NEXT: s_mov_b32 s2, -1
485 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
486 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
487 ; GCN-NEXT: s_mov_b32 s3, 0xf000
488 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
489 ; GCN-NEXT: ; implicit-def: $sgpr6
490 ; GCN-NEXT: .LBB5_1: ; %bb1
491 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
492 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
493 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
494 ; GCN-NEXT: s_cbranch_scc1 .LBB5_3
495 ; GCN-NEXT: ; %bb.2: ; %bb4
496 ; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1
497 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
498 ; GCN-NEXT: s_waitcnt vmcnt(0)
499 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
500 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
501 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
502 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
503 ; GCN-NEXT: .LBB5_3: ; %Flow
504 ; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1
505 ; GCN-NEXT: s_xor_b64 s[8:9], s[4:5], -1
506 ; GCN-NEXT: s_add_i32 s6, s6, 1
507 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[8:9]
508 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
509 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
510 ; GCN-NEXT: s_cbranch_execnz .LBB5_1
511 ; GCN-NEXT: ; %bb.4: ; %bb9
512 ; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
513 ; GCN-NEXT: v_mov_b32_e32 v0, 7
514 ; GCN-NEXT: s_mov_b32 m0, -1
515 ; GCN-NEXT: ds_write_b32 v0, v0
518 %id = call i32 @llvm.amdgcn.workitem.id.x()
519 %my.tmp = sub i32 %id, %arg
522 bb1: ; preds = %Flow, %bb
523 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
524 %lsr.iv.next = add i32 %lsr.iv, 1
525 %cmp0 = icmp slt i32 %lsr.iv.next, 0
526 br i1 %cmp0, label %bb4, label %Flow
529 %load = load volatile i32, ptr addrspace(1) undef, align 4
530 %cmp1 = icmp sge i32 %my.tmp, %load
533 Flow: ; preds = %bb4, %bb1
534 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
535 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
536 br i1 %my.tmp3, label %bb1, label %bb9
539 store volatile i32 7, ptr addrspace(3) undef
543 declare i32 @llvm.amdgcn.workitem.id.x() #1
545 attributes #0 = { nounwind }
546 attributes #1 = { nounwind readnone }