1 ; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2 ; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
4 ; Uses llvm.amdgcn.break
6 define amdgpu_kernel void @break_loop(i32 %arg) #0 {
7 ; OPT-LABEL: @break_loop(
9 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
10 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
11 ; OPT-NEXT: br label [[BB1:%.*]]
13 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
14 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
15 ; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
16 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
17 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
19 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
20 ; OPT-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MY_TMP]], [[LOAD]]
21 ; OPT-NEXT: [[TMP0:%.*]] = xor i1 [[CMP1]], true
22 ; OPT-NEXT: br label [[FLOW]]
24 ; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB4]] ], [ true, [[BB1]] ]
25 ; OPT-NEXT: [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
26 ; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
27 ; OPT-NEXT: br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]]
29 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
32 ; GCN-LABEL: break_loop:
34 ; GCN-NEXT: s_load_dword s3, s[0:1], 0x9
35 ; GCN-NEXT: s_mov_b64 s[0:1], 0
36 ; GCN-NEXT: s_mov_b32 s2, -1
37 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
38 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
39 ; GCN-NEXT: s_mov_b32 s3, 0xf000
40 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
41 ; GCN-NEXT: ; implicit-def: $sgpr6
42 ; GCN-NEXT: BB0_1: ; %bb1
43 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
44 ; GCN-NEXT: s_add_i32 s6, s6, 1
45 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
46 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
47 ; GCN-NEXT: s_cbranch_scc1 BB0_3
48 ; GCN-NEXT: ; %bb.2: ; %bb4
49 ; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1
50 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
51 ; GCN-NEXT: s_waitcnt vmcnt(0)
52 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
53 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
54 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
55 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
56 ; GCN-NEXT: BB0_3: ; %Flow
57 ; GCN-NEXT: ; in Loop: Header=BB0_1 Depth=1
58 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
59 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
60 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
61 ; GCN-NEXT: s_cbranch_execnz BB0_1
62 ; GCN-NEXT: ; %bb.4: ; %bb9
65 %id = call i32 @llvm.amdgcn.workitem.id.x()
66 %my.tmp = sub i32 %id, %arg
70 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
71 %lsr.iv.next = add i32 %lsr.iv, 1
72 %cmp0 = icmp slt i32 %lsr.iv.next, 0
73 br i1 %cmp0, label %bb4, label %bb9
76 %load = load volatile i32, i32 addrspace(1)* undef, align 4
77 %cmp1 = icmp slt i32 %my.tmp, %load
78 br i1 %cmp1, label %bb1, label %bb9
84 define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
85 ; OPT-LABEL: @undef_phi_cond_break_loop(
87 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
88 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
89 ; OPT-NEXT: br label [[BB1:%.*]]
91 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
92 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
93 ; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
94 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
95 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
97 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
98 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
99 ; OPT-NEXT: br label [[FLOW]]
101 ; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ]
102 ; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
103 ; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
104 ; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
106 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
107 ; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
110 ; GCN-LABEL: undef_phi_cond_break_loop:
111 ; GCN: ; %bb.0: ; %bb
112 ; GCN-NEXT: s_load_dword s3, s[0:1], 0x9
113 ; GCN-NEXT: s_mov_b64 s[0:1], 0
114 ; GCN-NEXT: s_mov_b32 s2, -1
115 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
116 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
117 ; GCN-NEXT: s_mov_b32 s3, 0xf000
118 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
119 ; GCN-NEXT: ; implicit-def: $sgpr6
120 ; GCN-NEXT: BB1_1: ; %bb1
121 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
122 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
123 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
124 ; GCN-NEXT: s_cbranch_scc1 BB1_3
125 ; GCN-NEXT: ; %bb.2: ; %bb4
126 ; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1
127 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
128 ; GCN-NEXT: s_waitcnt vmcnt(0)
129 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
130 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
131 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
132 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
133 ; GCN-NEXT: BB1_3: ; %Flow
134 ; GCN-NEXT: ; in Loop: Header=BB1_1 Depth=1
135 ; GCN-NEXT: s_add_i32 s6, s6, 1
136 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
137 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
138 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
139 ; GCN-NEXT: s_cbranch_execnz BB1_1
140 ; GCN-NEXT: ; %bb.4: ; %bb9
141 ; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
142 ; GCN-NEXT: v_mov_b32_e32 v0, 7
143 ; GCN-NEXT: s_mov_b32 m0, -1
144 ; GCN-NEXT: ds_write_b32 v0, v0
147 %id = call i32 @llvm.amdgcn.workitem.id.x()
148 %my.tmp = sub i32 %id, %arg
151 bb1: ; preds = %Flow, %bb
152 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
153 %lsr.iv.next = add i32 %lsr.iv, 1
154 %cmp0 = icmp slt i32 %lsr.iv.next, 0
155 br i1 %cmp0, label %bb4, label %Flow
158 %load = load volatile i32, i32 addrspace(1)* undef, align 4
159 %cmp1 = icmp sge i32 %my.tmp, %load
162 Flow: ; preds = %bb4, %bb1
163 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
164 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
165 br i1 %my.tmp3, label %bb9, label %bb1
168 store volatile i32 7, i32 addrspace(3)* undef
172 ; FIXME: ConstantExpr compare of address to null folds away
173 @lds = addrspace(3) global i32 undef
175 define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
176 ; OPT-LABEL: @constexpr_phi_cond_break_loop(
178 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
179 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
180 ; OPT-NEXT: br label [[BB1:%.*]]
182 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
183 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
184 ; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
185 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
186 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
188 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
189 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
190 ; OPT-NEXT: br label [[FLOW]]
192 ; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), [[BB1]] ]
193 ; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
194 ; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
195 ; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
197 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
198 ; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
201 ; GCN-LABEL: constexpr_phi_cond_break_loop:
202 ; GCN: ; %bb.0: ; %bb
203 ; GCN-NEXT: s_load_dword s3, s[0:1], 0x9
204 ; GCN-NEXT: s_mov_b64 s[0:1], 0
205 ; GCN-NEXT: s_mov_b32 s2, -1
206 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
207 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
208 ; GCN-NEXT: s_mov_b32 s3, 0xf000
209 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
210 ; GCN-NEXT: ; implicit-def: $sgpr6
211 ; GCN-NEXT: BB2_1: ; %bb1
212 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
213 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
214 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
215 ; GCN-NEXT: s_cbranch_scc1 BB2_3
216 ; GCN-NEXT: ; %bb.2: ; %bb4
217 ; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1
218 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
219 ; GCN-NEXT: s_waitcnt vmcnt(0)
220 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
221 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
222 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
223 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
224 ; GCN-NEXT: BB2_3: ; %Flow
225 ; GCN-NEXT: ; in Loop: Header=BB2_1 Depth=1
226 ; GCN-NEXT: s_add_i32 s6, s6, 1
227 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
228 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
229 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
230 ; GCN-NEXT: s_cbranch_execnz BB2_1
231 ; GCN-NEXT: ; %bb.4: ; %bb9
232 ; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
233 ; GCN-NEXT: v_mov_b32_e32 v0, 7
234 ; GCN-NEXT: s_mov_b32 m0, -1
235 ; GCN-NEXT: ds_write_b32 v0, v0
238 %id = call i32 @llvm.amdgcn.workitem.id.x()
239 %my.tmp = sub i32 %id, %arg
242 bb1: ; preds = %Flow, %bb
243 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
244 %lsr.iv.next = add i32 %lsr.iv, 1
245 %cmp0 = icmp slt i32 %lsr.iv.next, 0
246 br i1 %cmp0, label %bb4, label %Flow
249 %load = load volatile i32, i32 addrspace(1)* undef, align 4
250 %cmp1 = icmp sge i32 %my.tmp, %load
253 Flow: ; preds = %bb4, %bb1
254 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
255 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
256 br i1 %my.tmp3, label %bb9, label %bb1
259 store volatile i32 7, i32 addrspace(3)* undef
263 define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
264 ; OPT-LABEL: @true_phi_cond_break_loop(
266 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
267 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
268 ; OPT-NEXT: br label [[BB1:%.*]]
270 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
271 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
272 ; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
273 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
274 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
276 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
277 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
278 ; OPT-NEXT: br label [[FLOW]]
280 ; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
281 ; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
282 ; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
283 ; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
285 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
286 ; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
289 ; GCN-LABEL: true_phi_cond_break_loop:
290 ; GCN: ; %bb.0: ; %bb
291 ; GCN-NEXT: s_load_dword s3, s[0:1], 0x9
292 ; GCN-NEXT: s_mov_b64 s[0:1], 0
293 ; GCN-NEXT: s_mov_b32 s2, -1
294 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
295 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
296 ; GCN-NEXT: s_mov_b32 s3, 0xf000
297 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
298 ; GCN-NEXT: ; implicit-def: $sgpr6
299 ; GCN-NEXT: BB3_1: ; %bb1
300 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
301 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
302 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
303 ; GCN-NEXT: s_cbranch_scc1 BB3_3
304 ; GCN-NEXT: ; %bb.2: ; %bb4
305 ; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1
306 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
307 ; GCN-NEXT: s_waitcnt vmcnt(0)
308 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
309 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
310 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
311 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
312 ; GCN-NEXT: BB3_3: ; %Flow
313 ; GCN-NEXT: ; in Loop: Header=BB3_1 Depth=1
314 ; GCN-NEXT: s_add_i32 s6, s6, 1
315 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
316 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
317 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
318 ; GCN-NEXT: s_cbranch_execnz BB3_1
319 ; GCN-NEXT: ; %bb.4: ; %bb9
320 ; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
321 ; GCN-NEXT: v_mov_b32_e32 v0, 7
322 ; GCN-NEXT: s_mov_b32 m0, -1
323 ; GCN-NEXT: ds_write_b32 v0, v0
326 %id = call i32 @llvm.amdgcn.workitem.id.x()
327 %my.tmp = sub i32 %id, %arg
330 bb1: ; preds = %Flow, %bb
331 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
332 %lsr.iv.next = add i32 %lsr.iv, 1
333 %cmp0 = icmp slt i32 %lsr.iv.next, 0
334 br i1 %cmp0, label %bb4, label %Flow
337 %load = load volatile i32, i32 addrspace(1)* undef, align 4
338 %cmp1 = icmp sge i32 %my.tmp, %load
341 Flow: ; preds = %bb4, %bb1
342 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
343 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
344 br i1 %my.tmp3, label %bb9, label %bb1
347 store volatile i32 7, i32 addrspace(3)* undef
351 define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
352 ; OPT-LABEL: @false_phi_cond_break_loop(
354 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
355 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
356 ; OPT-NEXT: br label [[BB1:%.*]]
358 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
359 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
360 ; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
361 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
362 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
364 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
365 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
366 ; OPT-NEXT: br label [[FLOW]]
368 ; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ]
369 ; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
370 ; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
371 ; OPT-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
373 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
374 ; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
377 ; GCN-LABEL: false_phi_cond_break_loop:
378 ; GCN: ; %bb.0: ; %bb
379 ; GCN-NEXT: s_load_dword s3, s[0:1], 0x9
380 ; GCN-NEXT: s_mov_b64 s[0:1], 0
381 ; GCN-NEXT: s_mov_b32 s2, -1
382 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
383 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
384 ; GCN-NEXT: s_mov_b32 s3, 0xf000
385 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
386 ; GCN-NEXT: ; implicit-def: $sgpr6
387 ; GCN-NEXT: BB4_1: ; %bb1
388 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
389 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
390 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
391 ; GCN-NEXT: s_cbranch_scc1 BB4_3
392 ; GCN-NEXT: ; %bb.2: ; %bb4
393 ; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1
394 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
395 ; GCN-NEXT: s_waitcnt vmcnt(0)
396 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
397 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
398 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
399 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
400 ; GCN-NEXT: BB4_3: ; %Flow
401 ; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1
402 ; GCN-NEXT: s_add_i32 s6, s6, 1
403 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
404 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
405 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
406 ; GCN-NEXT: s_cbranch_execnz BB4_1
407 ; GCN-NEXT: ; %bb.4: ; %bb9
408 ; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
409 ; GCN-NEXT: v_mov_b32_e32 v0, 7
410 ; GCN-NEXT: s_mov_b32 m0, -1
411 ; GCN-NEXT: ds_write_b32 v0, v0
414 %id = call i32 @llvm.amdgcn.workitem.id.x()
415 %my.tmp = sub i32 %id, %arg
418 bb1: ; preds = %Flow, %bb
419 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
420 %lsr.iv.next = add i32 %lsr.iv, 1
421 %cmp0 = icmp slt i32 %lsr.iv.next, 0
422 br i1 %cmp0, label %bb4, label %Flow
425 %load = load volatile i32, i32 addrspace(1)* undef, align 4
426 %cmp1 = icmp sge i32 %my.tmp, %load
429 Flow: ; preds = %bb4, %bb1
430 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
431 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
432 br i1 %my.tmp3, label %bb9, label %bb1
435 store volatile i32 7, i32 addrspace(3)* undef
439 ; Swap order of branches in flow block so that the true phi is
442 define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
443 ; OPT-LABEL: @invert_true_phi_cond_break_loop(
445 ; OPT-NEXT: [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
446 ; OPT-NEXT: [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
447 ; OPT-NEXT: br label [[BB1:%.*]]
449 ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP1:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
450 ; OPT-NEXT: [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
451 ; OPT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
452 ; OPT-NEXT: [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
453 ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
455 ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
456 ; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
457 ; OPT-NEXT: br label [[FLOW]]
459 ; OPT-NEXT: [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
460 ; OPT-NEXT: [[TMP0:%.*]] = xor i1 [[MY_TMP3]], true
461 ; OPT-NEXT: [[TMP1]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP0]], i64 [[PHI_BROKEN]])
462 ; OPT-NEXT: [[TMP2:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP1]])
463 ; OPT-NEXT: br i1 [[TMP2]], label [[BB9:%.*]], label [[BB1]]
465 ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP1]])
466 ; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
469 ; GCN-LABEL: invert_true_phi_cond_break_loop:
470 ; GCN: ; %bb.0: ; %bb
471 ; GCN-NEXT: s_load_dword s3, s[0:1], 0x9
472 ; GCN-NEXT: s_mov_b64 s[0:1], 0
473 ; GCN-NEXT: s_mov_b32 s2, -1
474 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
475 ; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s3, v0
476 ; GCN-NEXT: s_mov_b32 s3, 0xf000
477 ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
478 ; GCN-NEXT: ; implicit-def: $sgpr6
479 ; GCN-NEXT: BB5_1: ; %bb1
480 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
481 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec
482 ; GCN-NEXT: s_cmp_gt_i32 s6, -1
483 ; GCN-NEXT: s_cbranch_scc1 BB5_3
484 ; GCN-NEXT: ; %bb.2: ; %bb4
485 ; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1
486 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc
487 ; GCN-NEXT: s_waitcnt vmcnt(0)
488 ; GCN-NEXT: v_cmp_ge_i32_e32 vcc, v0, v1
489 ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
490 ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
491 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
492 ; GCN-NEXT: BB5_3: ; %Flow
493 ; GCN-NEXT: ; in Loop: Header=BB5_1 Depth=1
494 ; GCN-NEXT: s_xor_b64 s[8:9], s[4:5], -1
495 ; GCN-NEXT: s_add_i32 s6, s6, 1
496 ; GCN-NEXT: s_and_b64 s[8:9], exec, s[8:9]
497 ; GCN-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
498 ; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1]
499 ; GCN-NEXT: s_cbranch_execnz BB5_1
500 ; GCN-NEXT: ; %bb.4: ; %bb9
501 ; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
502 ; GCN-NEXT: v_mov_b32_e32 v0, 7
503 ; GCN-NEXT: s_mov_b32 m0, -1
504 ; GCN-NEXT: ds_write_b32 v0, v0
507 %id = call i32 @llvm.amdgcn.workitem.id.x()
508 %my.tmp = sub i32 %id, %arg
511 bb1: ; preds = %Flow, %bb
512 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
513 %lsr.iv.next = add i32 %lsr.iv, 1
514 %cmp0 = icmp slt i32 %lsr.iv.next, 0
515 br i1 %cmp0, label %bb4, label %Flow
518 %load = load volatile i32, i32 addrspace(1)* undef, align 4
519 %cmp1 = icmp sge i32 %my.tmp, %load
522 Flow: ; preds = %bb4, %bb1
523 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
524 %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
525 br i1 %my.tmp3, label %bb1, label %bb9
528 store volatile i32 7, i32 addrspace(3)* undef
532 declare i32 @llvm.amdgcn.workitem.id.x() #1
534 attributes #0 = { nounwind }
535 attributes #1 = { nounwind readnone }