1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
3 ; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
5 ; Uses llvm.amdgcn.break
7 define amdgpu_kernel void @break_loop(i32 %arg) #0 {
8 ; OPT-LABEL: @break_loop(
10 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
11 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
12 ; OPT-NEXT: br label [[BB1:%.*]]
14 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
15 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[TMP0:%.*]], [[FLOW]] ]
16 ; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
17 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
18 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
20 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
21 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
22 ; OPT-NEXT: br label [[FLOW]]
24 ; OPT-NEXT: [[TMP0]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
25 ; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
26 ; OPT-NEXT: [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
27 ; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
28 ; OPT-NEXT: br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]]
30 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
33 ; GCN-LABEL: break_loop:
35 ; GCN-NEXT: s_load_dword s3, s[0:1], 0x9
36 ; GCN-NEXT: s_mov_b64 s[0:1], 0
37 ; GCN-NEXT: s_mov_b32 s2, -1
38 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
39 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
40 ; GCN-NEXT: s_mov_b32 s3, 0xf000
41 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
42 ; GCN-NEXT: ; implicit-def: $sgpr6
43 ; GCN-NEXT: .LBB0_1: ; %bb1
44 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
45 ; GCN-NEXT: s_add_i32 s6, s6, 1
46 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
47 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
48 ; GCN-NEXT: s_cbranch_scc0 .LBB0_3
49 ; GCN-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
50 ; GCN-NEXT: ; implicit-def: $sgpr6
51 ; GCN-NEXT: s_branch .LBB0_4
52 ; GCN-NEXT: .LBB0_3: ; %bb4
53 ; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1
54 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
55 ; GCN-NEXT: s_waitcnt vmcnt(0)
56 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
57 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
58 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
59 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
60 ; GCN-NEXT: .LBB0_4: ; %Flow
61 ; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1
62 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
63 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
64 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
65 ; GCN-NEXT: s_cbranch_execnz .LBB0_1
66 ; GCN-NEXT: ; %bb.5: ; %bb9
69 %id = call i32 @llvm.amdgcn.workitem.id.x()
70 %my.tmp = sub i32 %id, %arg
74 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
75 %lsr.iv.next = add i32 %lsr.iv, 1
76 %cmp0 = icmp slt i32 %lsr.iv.next, 0
77 br i1 %cmp0, label %bb4, label %bb9
80 %load = load volatile i32, ptr addrspace(1) undef, align 4
81 %cmp1 = icmp slt i32 %my.tmp, %load
82 br i1 %cmp1, label %bb1, label %bb9
88 define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
89 ; OPT-LABEL: @undef_phi_cond_break_loop(
91 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
92 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
93 ; OPT-NEXT: br label [[BB1:%.*]]
95 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
96 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
97 ; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
98 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
99 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
101 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
102 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
103 ; OPT-NEXT: br label [[FLOW]]
105 ; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
106 ; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ]
107 ; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
108 ; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
109 ; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
111 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
112 ; OPT-NEXT: store volatile i32 7, ptr addrspace(3) undef, align 4
115 ; GCN-LABEL: undef_phi_cond_break_loop:
116 ; GCN: ; %bb.0: ; %bb
117 ; GCN-NEXT: s_load_dword s3, s[0:1], 0x9
118 ; GCN-NEXT: s_mov_b64 s[0:1], 0
119 ; GCN-NEXT: s_mov_b32 s2, -1
120 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
121 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
122 ; GCN-NEXT: s_mov_b32 s3, 0xf000
123 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
124 ; GCN-NEXT: ; implicit-def: $sgpr6
125 ; GCN-NEXT: .LBB1_1: ; %bb1
126 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
127 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
128 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
129 ; GCN-NEXT: s_cbranch_scc1 .LBB1_3
130 ; GCN-NEXT: ; %bb.2: ; %bb4
131 ; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1
132 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
133 ; GCN-NEXT: s_waitcnt vmcnt(0)
134 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
135 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
136 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
137 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
138 ; GCN-NEXT: .LBB1_3: ; %Flow
139 ; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1
140 ; GCN-NEXT: s_add_i32 s6, s6, 1
141 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
142 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
143 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
144 ; GCN-NEXT: s_cbranch_execnz .LBB1_1
145 ; GCN-NEXT: ; %bb.4: ; %bb9
146 ; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
147 ; GCN-NEXT: v_mov_b32_e32 v0, 7
148 ; GCN-NEXT: s_mov_b32 m0, -1
149 ; GCN-NEXT: ds_write_b32 v0, v0
152 %id = call i32 @llvm.amdgcn.workitem.id.x()
153 %my.tmp = sub i32 %id, %arg
156 bb1: ; preds = %Flow, %bb
157 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
158 %lsr.iv.next = add i32 %lsr.iv, 1
159 %cmp0 = icmp slt i32 %lsr.iv.next, 0
160 br i1 %cmp0, label %bb4, label %Flow
163 %load = load volatile i32, ptr addrspace(1) undef, align 4
164 %cmp1 = icmp sge i32 %my.tmp, %load
167 Flow: ; preds = %bb4, %bb1
168 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
169 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
170 br i1 %my.tmp3, label %bb9, label %bb1
173 store volatile i32 7, ptr addrspace(3) undef
177 ; FIXME: ConstantExpr compare of address to null folds away
178 @lds = addrspace(3) global i32 undef
180 define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
181 ; OPT-LABEL: @constexpr_phi_cond_break_loop(
183 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
184 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
185 ; OPT-NEXT: br label [[BB1:%.*]]
187 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
188 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
189 ; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
190 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
191 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
193 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
194 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
195 ; OPT-NEXT: br label [[FLOW]]
197 ; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
198 ; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (ptr addrspace(3) inttoptr (i32 4 to ptr addrspace(3)), ptr addrspace(3) @lds), [[BB1]] ]
199 ; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
200 ; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
201 ; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
203 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
204 ; OPT-NEXT: store volatile i32 7, ptr addrspace(3) undef, align 4
207 ; GCN-LABEL: constexpr_phi_cond_break_loop:
208 ; GCN: ; %bb.0: ; %bb
209 ; GCN-NEXT: s_load_dword s3, s[0:1], 0x9
210 ; GCN-NEXT: s_mov_b64 s[0:1], 0
211 ; GCN-NEXT: s_mov_b32 s2, -1
212 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
213 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
214 ; GCN-NEXT: s_mov_b32 s3, 0xf000
215 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
216 ; GCN-NEXT: ; implicit-def: $sgpr6
217 ; GCN-NEXT: .LBB2_1: ; %bb1
218 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
219 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
220 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
221 ; GCN-NEXT: s_cbranch_scc1 .LBB2_3
222 ; GCN-NEXT: ; %bb.2: ; %bb4
223 ; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1
224 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
225 ; GCN-NEXT: s_waitcnt vmcnt(0)
226 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
227 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
228 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
229 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
230 ; GCN-NEXT: .LBB2_3: ; %Flow
231 ; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1
232 ; GCN-NEXT: s_add_i32 s6, s6, 1
233 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
234 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
235 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
236 ; GCN-NEXT: s_cbranch_execnz .LBB2_1
237 ; GCN-NEXT: ; %bb.4: ; %bb9
238 ; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
239 ; GCN-NEXT: v_mov_b32_e32 v0, 7
240 ; GCN-NEXT: s_mov_b32 m0, -1
241 ; GCN-NEXT: ds_write_b32 v0, v0
244 %id = call i32 @llvm.amdgcn.workitem.id.x()
245 %my.tmp = sub i32 %id, %arg
248 bb1: ; preds = %Flow, %bb
249 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
250 %lsr.iv.next = add i32 %lsr.iv, 1
251 %cmp0 = icmp slt i32 %lsr.iv.next, 0
252 br i1 %cmp0, label %bb4, label %Flow
255 %load = load volatile i32, ptr addrspace(1) undef, align 4
256 %cmp1 = icmp sge i32 %my.tmp, %load
259 Flow: ; preds = %bb4, %bb1
260 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
261 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (ptr addrspace(3) inttoptr (i32 4 to ptr addrspace(3)), ptr addrspace(3) @lds), %bb1 ]
262 br i1 %my.tmp3, label %bb9, label %bb1
265 store volatile i32 7, ptr addrspace(3) undef
269 define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
270 ; OPT-LABEL: @true_phi_cond_break_loop(
272 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
273 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
274 ; OPT-NEXT: br label [[BB1:%.*]]
276 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
277 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
278 ; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
279 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
280 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
282 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
283 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
284 ; OPT-NEXT: br label [[FLOW]]
286 ; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
287 ; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
288 ; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
289 ; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
290 ; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
292 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
293 ; OPT-NEXT: store volatile i32 7, ptr addrspace(3) undef, align 4
296 ; GCN-LABEL: true_phi_cond_break_loop:
297 ; GCN: ; %bb.0: ; %bb
298 ; GCN-NEXT: s_load_dword s3, s[0:1], 0x9
299 ; GCN-NEXT: s_mov_b64 s[0:1], 0
300 ; GCN-NEXT: s_mov_b32 s2, -1
301 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
302 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
303 ; GCN-NEXT: s_mov_b32 s3, 0xf000
304 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
305 ; GCN-NEXT: ; implicit-def: $sgpr6
306 ; GCN-NEXT: .LBB3_1: ; %bb1
307 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
308 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
309 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
310 ; GCN-NEXT: s_cbranch_scc1 .LBB3_3
311 ; GCN-NEXT: ; %bb.2: ; %bb4
312 ; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1
313 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
314 ; GCN-NEXT: s_waitcnt vmcnt(0)
315 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
316 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
317 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
318 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
319 ; GCN-NEXT: .LBB3_3: ; %Flow
320 ; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1
321 ; GCN-NEXT: s_add_i32 s6, s6, 1
322 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
323 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
324 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
325 ; GCN-NEXT: s_cbranch_execnz .LBB3_1
326 ; GCN-NEXT: ; %bb.4: ; %bb9
327 ; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
328 ; GCN-NEXT: v_mov_b32_e32 v0, 7
329 ; GCN-NEXT: s_mov_b32 m0, -1
330 ; GCN-NEXT: ds_write_b32 v0, v0
333 %id = call i32 @llvm.amdgcn.workitem.id.x()
334 %my.tmp = sub i32 %id, %arg
337 bb1: ; preds = %Flow, %bb
338 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
339 %lsr.iv.next = add i32 %lsr.iv, 1
340 %cmp0 = icmp slt i32 %lsr.iv.next, 0
341 br i1 %cmp0, label %bb4, label %Flow
344 %load = load volatile i32, ptr addrspace(1) undef, align 4
345 %cmp1 = icmp sge i32 %my.tmp, %load
348 Flow: ; preds = %bb4, %bb1
349 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
350 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
351 br i1 %my.tmp3, label %bb9, label %bb1
354 store volatile i32 7, ptr addrspace(3) undef
358 define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
359 ; OPT-LABEL: @false_phi_cond_break_loop(
361 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
362 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
363 ; OPT-NEXT: br label [[BB1:%.*]]
365 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
366 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
367 ; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
368 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
369 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
371 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
372 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
373 ; OPT-NEXT: br label [[FLOW]]
375 ; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
376 ; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ]
377 ; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
378 ; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
379 ; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
381 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
382 ; OPT-NEXT: store volatile i32 7, ptr addrspace(3) undef, align 4
385 ; GCN-LABEL: false_phi_cond_break_loop:
386 ; GCN: ; %bb.0: ; %bb
387 ; GCN-NEXT: s_load_dword s3, s[0:1], 0x9
388 ; GCN-NEXT: s_mov_b64 s[0:1], 0
389 ; GCN-NEXT: s_mov_b32 s2, -1
390 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
391 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
392 ; GCN-NEXT: s_mov_b32 s3, 0xf000
393 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
394 ; GCN-NEXT: ; implicit-def: $sgpr6
395 ; GCN-NEXT: .LBB4_1: ; %bb1
396 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
397 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
398 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
399 ; GCN-NEXT: s_cbranch_scc1 .LBB4_3
400 ; GCN-NEXT: ; %bb.2: ; %bb4
401 ; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1
402 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
403 ; GCN-NEXT: s_waitcnt vmcnt(0)
404 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
405 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
406 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
407 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
408 ; GCN-NEXT: .LBB4_3: ; %Flow
409 ; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1
410 ; GCN-NEXT: s_add_i32 s6, s6, 1
411 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
412 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
413 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
414 ; GCN-NEXT: s_cbranch_execnz .LBB4_1
415 ; GCN-NEXT: ; %bb.4: ; %bb9
416 ; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
417 ; GCN-NEXT: v_mov_b32_e32 v0, 7
418 ; GCN-NEXT: s_mov_b32 m0, -1
419 ; GCN-NEXT: ds_write_b32 v0, v0
422 %id = call i32 @llvm.amdgcn.workitem.id.x()
423 %my.tmp = sub i32 %id, %arg
426 bb1: ; preds = %Flow, %bb
427 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
428 %lsr.iv.next = add i32 %lsr.iv, 1
429 %cmp0 = icmp slt i32 %lsr.iv.next, 0
430 br i1 %cmp0, label %bb4, label %Flow
433 %load = load volatile i32, ptr addrspace(1) undef, align 4
434 %cmp1 = icmp sge i32 %my.tmp, %load
437 Flow: ; preds = %bb4, %bb1
438 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
439 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
440 br i1 %my.tmp3, label %bb9, label %bb1
443 store volatile i32 7, ptr addrspace(3) undef
447 ; Swap order of branches in flow block so that the true phi is
450 define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
451 ; OPT-LABEL: @invert_true_phi_cond_break_loop(
453 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
454 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
455 ; OPT-NEXT: br label [[BB1:%.*]]
457 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
458 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
459 ; OPT-NEXT: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
460 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
461 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
463 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(1) undef, align 4
464 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
465 ; OPT-NEXT: br label [[FLOW]]
467 ; OPT-NEXT: [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
468 ; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
469 ; OPT-NEXT: [[MY_TMP3_INV:%.*]] = xor i1 [[MY_TMP3]], true
470 ; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3_INV]], i64 [[PHI_BROKEN]])
471 ; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
472 ; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
474 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
475 ; OPT-NEXT: store volatile i32 7, ptr addrspace(3) undef, align 4
478 ; GCN-LABEL: invert_true_phi_cond_break_loop:
479 ; GCN: ; %bb.0: ; %bb
480 ; GCN-NEXT: s_load_dword s3, s[0:1], 0x9
481 ; GCN-NEXT: s_mov_b64 s[0:1], 0
482 ; GCN-NEXT: s_mov_b32 s2, -1
483 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
484 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
485 ; GCN-NEXT: s_mov_b32 s3, 0xf000
486 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
487 ; GCN-NEXT: ; implicit-def: $sgpr6
488 ; GCN-NEXT: .LBB5_1: ; %bb1
489 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
490 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
491 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
492 ; GCN-NEXT: s_cbranch_scc1 .LBB5_3
493 ; GCN-NEXT: ; %bb.2: ; %bb4
494 ; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1
495 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
496 ; GCN-NEXT: s_waitcnt vmcnt(0)
497 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
498 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
499 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
500 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
501 ; GCN-NEXT: .LBB5_3: ; %Flow
502 ; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1
503 ; GCN-NEXT: s_xor_b64 s[8:9], s[4:5], -1
504 ; GCN-NEXT: s_add_i32 s6, s6, 1
505 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[8:9]
506 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
507 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
508 ; GCN-NEXT: s_cbranch_execnz .LBB5_1
509 ; GCN-NEXT: ; %bb.4: ; %bb9
510 ; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
511 ; GCN-NEXT: v_mov_b32_e32 v0, 7
512 ; GCN-NEXT: s_mov_b32 m0, -1
513 ; GCN-NEXT: ds_write_b32 v0, v0
516 %id = call i32 @llvm.amdgcn.workitem.id.x()
517 %my.tmp = sub i32 %id, %arg
520 bb1: ; preds = %Flow, %bb
521 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
522 %lsr.iv.next = add i32 %lsr.iv, 1
523 %cmp0 = icmp slt i32 %lsr.iv.next, 0
524 br i1 %cmp0, label %bb4, label %Flow
527 %load = load volatile i32, ptr addrspace(1) undef, align 4
528 %cmp1 = icmp sge i32 %my.tmp, %load
531 Flow: ; preds = %bb4, %bb1
532 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
533 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
534 br i1 %my.tmp3, label %bb1, label %bb9
537 store volatile i32 7, ptr addrspace(3) undef
541 declare i32 @llvm.amdgcn.workitem.id.x() #1
543 attributes #0 = { nounwind }
544 attributes #1 = { nounwind readnone }