1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=verde -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -structurizecfg-skip-uniform-regions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -structurizecfg-skip-uniform-regions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
5 define amdgpu_kernel void @uniform_if_scc(i32 %cond, ptr addrspace(1) %out) {
6 ; SI-LABEL: uniform_if_scc:
7 ; SI: ; %bb.0: ; %entry
8 ; SI-NEXT: s_load_dword s0, s[2:3], 0x9
9 ; SI-NEXT: s_waitcnt lgkmcnt(0)
10 ; SI-NEXT: s_cmp_eq_u32 s0, 0
11 ; SI-NEXT: s_mov_b32 s0, 0
12 ; SI-NEXT: s_cbranch_scc1 .LBB0_2
13 ; SI-NEXT: ; %bb.1: ; %else
14 ; SI-NEXT: s_mov_b32 s0, 1
15 ; SI-NEXT: .LBB0_2: ; %done
16 ; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xb
17 ; SI-NEXT: s_mov_b32 s7, 0xf000
18 ; SI-NEXT: s_mov_b32 s6, -1
19 ; SI-NEXT: v_mov_b32_e32 v0, s0
20 ; SI-NEXT: s_waitcnt lgkmcnt(0)
21 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
24 ; VI-LABEL: uniform_if_scc:
25 ; VI: ; %bb.0: ; %entry
26 ; VI-NEXT: s_load_dword s0, s[2:3], 0x24
27 ; VI-NEXT: s_waitcnt lgkmcnt(0)
28 ; VI-NEXT: s_cmp_eq_u32 s0, 0
29 ; VI-NEXT: s_mov_b32 s0, 0
30 ; VI-NEXT: s_cbranch_scc1 .LBB0_2
31 ; VI-NEXT: ; %bb.1: ; %else
32 ; VI-NEXT: s_mov_b32 s0, 1
33 ; VI-NEXT: .LBB0_2: ; %done
34 ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x2c
35 ; VI-NEXT: s_mov_b32 s7, 0xf000
36 ; VI-NEXT: s_mov_b32 s6, -1
37 ; VI-NEXT: v_mov_b32_e32 v0, s0
38 ; VI-NEXT: s_waitcnt lgkmcnt(0)
39 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
42 %cmp0 = icmp eq i32 %cond, 0
43 br i1 %cmp0, label %if, label %else
52 %value = phi i32 [0, %if], [1, %else]
53 store i32 %value, ptr addrspace(1) %out
57 define amdgpu_kernel void @uniform_if_vcc(float %cond, ptr addrspace(1) %out) {
58 ; SI-LABEL: uniform_if_vcc:
59 ; SI: ; %bb.0: ; %entry
60 ; SI-NEXT: s_load_dword s1, s[2:3], 0x9
61 ; SI-NEXT: s_mov_b32 s0, 0
62 ; SI-NEXT: s_waitcnt lgkmcnt(0)
63 ; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], s1, 0
64 ; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
65 ; SI-NEXT: s_cbranch_vccnz .LBB1_2
66 ; SI-NEXT: ; %bb.1: ; %else
67 ; SI-NEXT: s_mov_b32 s0, 1
68 ; SI-NEXT: .LBB1_2: ; %done
69 ; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xb
70 ; SI-NEXT: s_mov_b32 s7, 0xf000
71 ; SI-NEXT: s_mov_b32 s6, -1
72 ; SI-NEXT: v_mov_b32_e32 v0, s0
73 ; SI-NEXT: s_waitcnt lgkmcnt(0)
74 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
77 ; VI-LABEL: uniform_if_vcc:
78 ; VI: ; %bb.0: ; %entry
79 ; VI-NEXT: s_load_dword s1, s[2:3], 0x24
80 ; VI-NEXT: s_mov_b32 s0, 0
81 ; VI-NEXT: s_waitcnt lgkmcnt(0)
82 ; VI-NEXT: v_cmp_eq_f32_e64 s[4:5], s1, 0
83 ; VI-NEXT: s_and_b64 vcc, exec, s[4:5]
84 ; VI-NEXT: s_cbranch_vccnz .LBB1_2
85 ; VI-NEXT: ; %bb.1: ; %else
86 ; VI-NEXT: s_mov_b32 s0, 1
87 ; VI-NEXT: .LBB1_2: ; %done
88 ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x2c
89 ; VI-NEXT: s_mov_b32 s7, 0xf000
90 ; VI-NEXT: s_mov_b32 s6, -1
91 ; VI-NEXT: v_mov_b32_e32 v0, s0
92 ; VI-NEXT: s_waitcnt lgkmcnt(0)
93 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
96 %cmp0 = fcmp oeq float %cond, 0.0
97 br i1 %cmp0, label %if, label %else
106 %value = phi i32 [0, %if], [1, %else]
107 store i32 %value, ptr addrspace(1) %out
111 define amdgpu_kernel void @uniform_if_swap_br_targets_scc(i32 %cond, ptr addrspace(1) %out) {
112 ; SI-LABEL: uniform_if_swap_br_targets_scc:
113 ; SI: ; %bb.0: ; %entry
114 ; SI-NEXT: s_load_dword s0, s[2:3], 0x9
115 ; SI-NEXT: s_waitcnt lgkmcnt(0)
116 ; SI-NEXT: s_cmp_lg_u32 s0, 0
117 ; SI-NEXT: s_mov_b32 s0, 0
118 ; SI-NEXT: s_cbranch_scc1 .LBB2_2
119 ; SI-NEXT: ; %bb.1: ; %else
120 ; SI-NEXT: s_mov_b32 s0, 1
121 ; SI-NEXT: .LBB2_2: ; %done
122 ; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xb
123 ; SI-NEXT: s_mov_b32 s7, 0xf000
124 ; SI-NEXT: s_mov_b32 s6, -1
125 ; SI-NEXT: v_mov_b32_e32 v0, s0
126 ; SI-NEXT: s_waitcnt lgkmcnt(0)
127 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
130 ; VI-LABEL: uniform_if_swap_br_targets_scc:
131 ; VI: ; %bb.0: ; %entry
132 ; VI-NEXT: s_load_dword s0, s[2:3], 0x24
133 ; VI-NEXT: s_waitcnt lgkmcnt(0)
134 ; VI-NEXT: s_cmp_lg_u32 s0, 0
135 ; VI-NEXT: s_mov_b32 s0, 0
136 ; VI-NEXT: s_cbranch_scc1 .LBB2_2
137 ; VI-NEXT: ; %bb.1: ; %else
138 ; VI-NEXT: s_mov_b32 s0, 1
139 ; VI-NEXT: .LBB2_2: ; %done
140 ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x2c
141 ; VI-NEXT: s_mov_b32 s7, 0xf000
142 ; VI-NEXT: s_mov_b32 s6, -1
143 ; VI-NEXT: v_mov_b32_e32 v0, s0
144 ; VI-NEXT: s_waitcnt lgkmcnt(0)
145 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
148 %cmp0 = icmp eq i32 %cond, 0
149 br i1 %cmp0, label %else, label %if
158 %value = phi i32 [0, %if], [1, %else]
159 store i32 %value, ptr addrspace(1) %out
163 define amdgpu_kernel void @uniform_if_swap_br_targets_vcc(float %cond, ptr addrspace(1) %out) {
164 ; SI-LABEL: uniform_if_swap_br_targets_vcc:
165 ; SI: ; %bb.0: ; %entry
166 ; SI-NEXT: s_load_dword s1, s[2:3], 0x9
167 ; SI-NEXT: s_mov_b32 s0, 0
168 ; SI-NEXT: s_waitcnt lgkmcnt(0)
169 ; SI-NEXT: v_cmp_neq_f32_e64 s[4:5], s1, 0
170 ; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
171 ; SI-NEXT: s_cbranch_vccnz .LBB3_2
172 ; SI-NEXT: ; %bb.1: ; %else
173 ; SI-NEXT: s_mov_b32 s0, 1
174 ; SI-NEXT: .LBB3_2: ; %done
175 ; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xb
176 ; SI-NEXT: s_mov_b32 s7, 0xf000
177 ; SI-NEXT: s_mov_b32 s6, -1
178 ; SI-NEXT: v_mov_b32_e32 v0, s0
179 ; SI-NEXT: s_waitcnt lgkmcnt(0)
180 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
183 ; VI-LABEL: uniform_if_swap_br_targets_vcc:
184 ; VI: ; %bb.0: ; %entry
185 ; VI-NEXT: s_load_dword s1, s[2:3], 0x24
186 ; VI-NEXT: s_mov_b32 s0, 0
187 ; VI-NEXT: s_waitcnt lgkmcnt(0)
188 ; VI-NEXT: v_cmp_neq_f32_e64 s[4:5], s1, 0
189 ; VI-NEXT: s_and_b64 vcc, exec, s[4:5]
190 ; VI-NEXT: s_cbranch_vccnz .LBB3_2
191 ; VI-NEXT: ; %bb.1: ; %else
192 ; VI-NEXT: s_mov_b32 s0, 1
193 ; VI-NEXT: .LBB3_2: ; %done
194 ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x2c
195 ; VI-NEXT: s_mov_b32 s7, 0xf000
196 ; VI-NEXT: s_mov_b32 s6, -1
197 ; VI-NEXT: v_mov_b32_e32 v0, s0
198 ; VI-NEXT: s_waitcnt lgkmcnt(0)
199 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
202 %cmp0 = fcmp oeq float %cond, 0.0
203 br i1 %cmp0, label %else, label %if
212 %value = phi i32 [0, %if], [1, %else]
213 store i32 %value, ptr addrspace(1) %out
217 ; Using a floating-point value in an integer compare will cause the compare to
218 ; be selected for the SALU and then later moved to the VALU.
219 define amdgpu_kernel void @uniform_if_move_valu(ptr addrspace(1) %out, float %a) {
220 ; SI-LABEL: uniform_if_move_valu:
221 ; SI: ; %bb.0: ; %entry
222 ; SI-NEXT: s_load_dword s0, s[2:3], 0xb
223 ; SI-NEXT: v_mov_b32_e32 v0, 0x41200000
224 ; SI-NEXT: s_waitcnt lgkmcnt(0)
225 ; SI-NEXT: v_add_f32_e32 v0, s0, v0
226 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 5, v0
227 ; SI-NEXT: s_cbranch_vccnz .LBB4_2
228 ; SI-NEXT: ; %bb.1: ; %if
229 ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
230 ; SI-NEXT: s_mov_b32 s3, 0xf000
231 ; SI-NEXT: s_mov_b32 s2, -1
232 ; SI-NEXT: v_mov_b32_e32 v0, 0
233 ; SI-NEXT: s_waitcnt lgkmcnt(0)
234 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
235 ; SI-NEXT: .LBB4_2: ; %endif
238 ; VI-LABEL: uniform_if_move_valu:
239 ; VI: ; %bb.0: ; %entry
240 ; VI-NEXT: s_load_dword s0, s[2:3], 0x2c
241 ; VI-NEXT: v_mov_b32_e32 v0, 0x41200000
242 ; VI-NEXT: s_waitcnt lgkmcnt(0)
243 ; VI-NEXT: v_add_f32_e32 v0, s0, v0
244 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 5, v0
245 ; VI-NEXT: s_cbranch_vccnz .LBB4_2
246 ; VI-NEXT: ; %bb.1: ; %if
247 ; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
248 ; VI-NEXT: s_mov_b32 s3, 0xf000
249 ; VI-NEXT: s_mov_b32 s2, -1
250 ; VI-NEXT: v_mov_b32_e32 v0, 0
251 ; VI-NEXT: s_waitcnt lgkmcnt(0)
252 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
253 ; VI-NEXT: .LBB4_2: ; %endif
256 %a.0 = fadd float %a, 10.0
257 %cond = bitcast float %a.0 to i32
258 %cmp = icmp eq i32 %cond, 5
259 br i1 %cmp, label %if, label %endif
262 store i32 0, ptr addrspace(1) %out
269 ; Using a floating-point value in an integer compare will cause the compare to
270 ; be selected for the SALU and then later moved to the VALU.
271 define amdgpu_kernel void @uniform_if_move_valu_commute(ptr addrspace(1) %out, float %a) {
272 ; SI-LABEL: uniform_if_move_valu_commute:
273 ; SI: ; %bb.0: ; %entry
274 ; SI-NEXT: s_load_dword s0, s[2:3], 0xb
275 ; SI-NEXT: v_mov_b32_e32 v0, 0x41200000
276 ; SI-NEXT: s_waitcnt lgkmcnt(0)
277 ; SI-NEXT: v_add_f32_e32 v0, s0, v0
278 ; SI-NEXT: v_cmp_gt_u32_e32 vcc, 6, v0
279 ; SI-NEXT: s_cbranch_vccnz .LBB5_2
280 ; SI-NEXT: ; %bb.1: ; %if
281 ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
282 ; SI-NEXT: s_mov_b32 s3, 0xf000
283 ; SI-NEXT: s_mov_b32 s2, -1
284 ; SI-NEXT: v_mov_b32_e32 v0, 0
285 ; SI-NEXT: s_waitcnt lgkmcnt(0)
286 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
287 ; SI-NEXT: .LBB5_2: ; %endif
290 ; VI-LABEL: uniform_if_move_valu_commute:
291 ; VI: ; %bb.0: ; %entry
292 ; VI-NEXT: s_load_dword s0, s[2:3], 0x2c
293 ; VI-NEXT: v_mov_b32_e32 v0, 0x41200000
294 ; VI-NEXT: s_waitcnt lgkmcnt(0)
295 ; VI-NEXT: v_add_f32_e32 v0, s0, v0
296 ; VI-NEXT: v_cmp_gt_u32_e32 vcc, 6, v0
297 ; VI-NEXT: s_cbranch_vccnz .LBB5_2
298 ; VI-NEXT: ; %bb.1: ; %if
299 ; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
300 ; VI-NEXT: s_mov_b32 s3, 0xf000
301 ; VI-NEXT: s_mov_b32 s2, -1
302 ; VI-NEXT: v_mov_b32_e32 v0, 0
303 ; VI-NEXT: s_waitcnt lgkmcnt(0)
304 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
305 ; VI-NEXT: .LBB5_2: ; %endif
308 %a.0 = fadd float %a, 10.0
309 %cond = bitcast float %a.0 to i32
310 %cmp = icmp ugt i32 %cond, 5
311 br i1 %cmp, label %if, label %endif
314 store i32 0, ptr addrspace(1) %out
322 define amdgpu_kernel void @uniform_if_else_ret(ptr addrspace(1) nocapture %out, i32 %a) {
323 ; SI-LABEL: uniform_if_else_ret:
324 ; SI: ; %bb.0: ; %entry
325 ; SI-NEXT: s_load_dword s4, s[2:3], 0xb
326 ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
327 ; SI-NEXT: s_mov_b32 s3, 0xf000
328 ; SI-NEXT: s_mov_b32 s2, -1
329 ; SI-NEXT: s_waitcnt lgkmcnt(0)
330 ; SI-NEXT: s_cmp_lg_u32 s4, 0
331 ; SI-NEXT: s_cbranch_scc0 .LBB6_2
332 ; SI-NEXT: ; %bb.1: ; %if.else
333 ; SI-NEXT: v_mov_b32_e32 v0, 2
334 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
336 ; SI-NEXT: .LBB6_2: ; %if.then
337 ; SI-NEXT: v_mov_b32_e32 v0, 1
338 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
341 ; VI-LABEL: uniform_if_else_ret:
342 ; VI: ; %bb.0: ; %entry
343 ; VI-NEXT: s_load_dword s4, s[2:3], 0x2c
344 ; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
345 ; VI-NEXT: s_mov_b32 s3, 0xf000
346 ; VI-NEXT: s_mov_b32 s2, -1
347 ; VI-NEXT: s_waitcnt lgkmcnt(0)
348 ; VI-NEXT: s_cmp_lg_u32 s4, 0
349 ; VI-NEXT: s_cbranch_scc0 .LBB6_2
350 ; VI-NEXT: ; %bb.1: ; %if.else
351 ; VI-NEXT: v_mov_b32_e32 v0, 2
352 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
354 ; VI-NEXT: .LBB6_2: ; %if.then
355 ; VI-NEXT: v_mov_b32_e32 v0, 1
356 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
359 %cmp = icmp eq i32 %a, 0
360 br i1 %cmp, label %if.then, label %if.else
362 if.then: ; preds = %entry
363 store i32 1, ptr addrspace(1) %out
366 if.else: ; preds = %entry
367 store i32 2, ptr addrspace(1) %out
370 if.end: ; preds = %if.else, %if.then
374 define amdgpu_kernel void @uniform_if_else(ptr addrspace(1) nocapture %out0, ptr addrspace(1) nocapture %out1, i32 %a) {
375 ; SI-LABEL: uniform_if_else:
376 ; SI: ; %bb.0: ; %entry
377 ; SI-NEXT: s_load_dword s4, s[2:3], 0xd
378 ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
379 ; SI-NEXT: s_mov_b32 s7, 0xf000
380 ; SI-NEXT: s_mov_b32 s6, -1
381 ; SI-NEXT: s_waitcnt lgkmcnt(0)
382 ; SI-NEXT: s_cmp_lg_u32 s4, 0
383 ; SI-NEXT: s_cbranch_scc0 .LBB7_2
384 ; SI-NEXT: ; %bb.1: ; %if.else
385 ; SI-NEXT: s_mov_b32 s4, s0
386 ; SI-NEXT: s_mov_b32 s5, s1
387 ; SI-NEXT: v_mov_b32_e32 v0, 2
388 ; SI-NEXT: s_branch .LBB7_3
389 ; SI-NEXT: .LBB7_2: ; %if.then
390 ; SI-NEXT: s_mov_b32 s4, s0
391 ; SI-NEXT: s_mov_b32 s5, s1
392 ; SI-NEXT: v_mov_b32_e32 v0, 1
393 ; SI-NEXT: .LBB7_3: ; %if.end
394 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
395 ; SI-NEXT: s_mov_b32 s4, s2
396 ; SI-NEXT: s_mov_b32 s5, s3
397 ; SI-NEXT: s_waitcnt expcnt(0)
398 ; SI-NEXT: v_mov_b32_e32 v0, 3
399 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
402 ; VI-LABEL: uniform_if_else:
403 ; VI: ; %bb.0: ; %entry
404 ; VI-NEXT: s_load_dword s4, s[2:3], 0x34
405 ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
406 ; VI-NEXT: s_mov_b32 s7, 0xf000
407 ; VI-NEXT: s_mov_b32 s6, -1
408 ; VI-NEXT: s_waitcnt lgkmcnt(0)
409 ; VI-NEXT: s_cmp_lg_u32 s4, 0
410 ; VI-NEXT: s_cbranch_scc0 .LBB7_2
411 ; VI-NEXT: ; %bb.1: ; %if.else
412 ; VI-NEXT: s_mov_b32 s4, s0
413 ; VI-NEXT: s_mov_b32 s5, s1
414 ; VI-NEXT: v_mov_b32_e32 v0, 2
415 ; VI-NEXT: s_branch .LBB7_3
416 ; VI-NEXT: .LBB7_2: ; %if.then
417 ; VI-NEXT: s_mov_b32 s4, s0
418 ; VI-NEXT: s_mov_b32 s5, s1
419 ; VI-NEXT: v_mov_b32_e32 v0, 1
420 ; VI-NEXT: .LBB7_3: ; %if.end
421 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
422 ; VI-NEXT: s_mov_b32 s4, s2
423 ; VI-NEXT: s_mov_b32 s5, s3
424 ; VI-NEXT: v_mov_b32_e32 v0, 3
425 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
428 %cmp = icmp eq i32 %a, 0
429 br i1 %cmp, label %if.then, label %if.else
431 if.then: ; preds = %entry
432 store i32 1, ptr addrspace(1) %out0
435 if.else: ; preds = %entry
436 store i32 2, ptr addrspace(1) %out0
439 if.end: ; preds = %if.else, %if.then
440 store i32 3, ptr addrspace(1) %out1
444 define amdgpu_kernel void @icmp_2_users(ptr addrspace(1) %out, i32 %cond) {
445 ; SI-LABEL: icmp_2_users:
446 ; SI: ; %bb.0: ; %main_body
447 ; SI-NEXT: s_load_dword s4, s[2:3], 0xb
448 ; SI-NEXT: s_waitcnt lgkmcnt(0)
449 ; SI-NEXT: s_cmp_gt_i32 s4, 0
450 ; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
451 ; SI-NEXT: s_cmp_lt_i32 s4, 1
452 ; SI-NEXT: s_cbranch_scc1 .LBB8_2
453 ; SI-NEXT: ; %bb.1: ; %IF
454 ; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x9
455 ; SI-NEXT: s_mov_b32 s7, 0xf000
456 ; SI-NEXT: s_mov_b32 s6, -1
457 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[0:1]
458 ; SI-NEXT: s_waitcnt lgkmcnt(0)
459 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
460 ; SI-NEXT: .LBB8_2: ; %ENDIF
463 ; VI-LABEL: icmp_2_users:
464 ; VI: ; %bb.0: ; %main_body
465 ; VI-NEXT: s_load_dword s4, s[2:3], 0x2c
466 ; VI-NEXT: s_waitcnt lgkmcnt(0)
467 ; VI-NEXT: s_cmp_gt_i32 s4, 0
468 ; VI-NEXT: s_cselect_b64 s[0:1], -1, 0
469 ; VI-NEXT: s_cmp_lt_i32 s4, 1
470 ; VI-NEXT: s_cbranch_scc1 .LBB8_2
471 ; VI-NEXT: ; %bb.1: ; %IF
472 ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
473 ; VI-NEXT: s_mov_b32 s7, 0xf000
474 ; VI-NEXT: s_mov_b32 s6, -1
475 ; VI-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[0:1]
476 ; VI-NEXT: s_waitcnt lgkmcnt(0)
477 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
478 ; VI-NEXT: .LBB8_2: ; %ENDIF
481 %0 = icmp sgt i32 %cond, 0
482 %1 = sext i1 %0 to i32
483 br i1 %0, label %IF, label %ENDIF
486 store i32 %1, ptr addrspace(1) %out
489 ENDIF: ; preds = %IF, %main_body
493 define amdgpu_kernel void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, ptr addrspace(1) %out) {
494 ; SI-LABEL: icmp_users_different_blocks:
496 ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
497 ; SI-NEXT: s_waitcnt lgkmcnt(0)
498 ; SI-NEXT: s_cmp_lt_i32 s0, 1
499 ; SI-NEXT: s_cbranch_scc1 .LBB9_2
500 ; SI-NEXT: ; %bb.1: ; %bb2
501 ; SI-NEXT: s_cmp_gt_i32 s1, 0
502 ; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
503 ; SI-NEXT: s_and_b64 vcc, exec, s[0:1]
504 ; SI-NEXT: s_cbranch_vccz .LBB9_3
505 ; SI-NEXT: .LBB9_2: ; %bb9
507 ; SI-NEXT: .LBB9_3: ; %bb7
508 ; SI-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
509 ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xb
510 ; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
511 ; SI-NEXT: s_mov_b32 s3, 0xf000
512 ; SI-NEXT: s_mov_b32 s2, -1
513 ; SI-NEXT: s_waitcnt lgkmcnt(0)
514 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
517 ; VI-LABEL: icmp_users_different_blocks:
519 ; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
520 ; VI-NEXT: s_waitcnt lgkmcnt(0)
521 ; VI-NEXT: s_cmp_lt_i32 s0, 1
522 ; VI-NEXT: s_cbranch_scc1 .LBB9_2
523 ; VI-NEXT: ; %bb.1: ; %bb2
524 ; VI-NEXT: s_cmp_gt_i32 s1, 0
525 ; VI-NEXT: s_cselect_b64 s[0:1], -1, 0
526 ; VI-NEXT: s_and_b64 vcc, exec, s[0:1]
527 ; VI-NEXT: s_cbranch_vccz .LBB9_3
528 ; VI-NEXT: .LBB9_2: ; %bb9
530 ; VI-NEXT: .LBB9_3: ; %bb7
531 ; VI-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
532 ; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2c
533 ; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
534 ; VI-NEXT: s_mov_b32 s3, 0xf000
535 ; VI-NEXT: s_mov_b32 s2, -1
536 ; VI-NEXT: s_waitcnt lgkmcnt(0)
537 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
540 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
541 %cmp0 = icmp sgt i32 %cond0, 0
542 br i1 %cmp0, label %bb2, label %bb9
545 %cmp1 = icmp sgt i32 %cond1, 0
546 %tmp2 = sext i1 %cmp1 to i32
547 %tmp3 = add i32 %tmp2, %tmp
548 br i1 %cmp1, label %bb9, label %bb7
551 store i32 %tmp3, ptr addrspace(1) %out
554 bb9: ; preds = %bb8, %bb4
558 define amdgpu_kernel void @uniform_loop(ptr addrspace(1) %out, i32 %a) {
559 ; SI-LABEL: uniform_loop:
560 ; SI: ; %bb.0: ; %entry
561 ; SI-NEXT: s_load_dword s0, s[2:3], 0xb
562 ; SI-NEXT: .LBB10_1: ; %loop
563 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1
564 ; SI-NEXT: s_waitcnt lgkmcnt(0)
565 ; SI-NEXT: s_add_i32 s0, s0, -1
566 ; SI-NEXT: s_cmp_lg_u32 s0, 0
567 ; SI-NEXT: s_cbranch_scc1 .LBB10_1
568 ; SI-NEXT: ; %bb.2: ; %done
571 ; VI-LABEL: uniform_loop:
572 ; VI: ; %bb.0: ; %entry
573 ; VI-NEXT: s_load_dword s0, s[2:3], 0x2c
574 ; VI-NEXT: .LBB10_1: ; %loop
575 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1
576 ; VI-NEXT: s_waitcnt lgkmcnt(0)
577 ; VI-NEXT: s_add_i32 s0, s0, -1
578 ; VI-NEXT: s_cmp_lg_u32 s0, 0
579 ; VI-NEXT: s_cbranch_scc1 .LBB10_1
580 ; VI-NEXT: ; %bb.2: ; %done
586 %i = phi i32 [0, %entry], [%i.i, %loop]
588 %cmp = icmp eq i32 %a, %i.i
589 br i1 %cmp, label %done, label %loop
595 ; Test uniform and divergent.
597 define amdgpu_kernel void @uniform_inside_divergent(ptr addrspace(1) %out, i32 %cond) {
598 ; SI-LABEL: uniform_inside_divergent:
599 ; SI: ; %bb.0: ; %entry
600 ; SI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
601 ; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc
602 ; SI-NEXT: s_cbranch_execz .LBB11_2
603 ; SI-NEXT: ; %bb.1: ; %if
604 ; SI-NEXT: s_load_dword s4, s[2:3], 0xb
605 ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
606 ; SI-NEXT: s_mov_b32 s3, 0xf000
607 ; SI-NEXT: s_mov_b32 s2, -1
608 ; SI-NEXT: v_mov_b32_e32 v0, 0
609 ; SI-NEXT: s_waitcnt lgkmcnt(0)
610 ; SI-NEXT: s_cmp_lg_u32 s4, 0
611 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
612 ; SI-NEXT: s_cbranch_scc0 .LBB11_3
613 ; SI-NEXT: .LBB11_2: ; %endif
615 ; SI-NEXT: .LBB11_3: ; %if_uniform
616 ; SI-NEXT: s_waitcnt expcnt(0)
617 ; SI-NEXT: v_mov_b32_e32 v0, 1
618 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
621 ; VI-LABEL: uniform_inside_divergent:
622 ; VI: ; %bb.0: ; %entry
623 ; VI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
624 ; VI-NEXT: s_and_saveexec_b64 s[0:1], vcc
625 ; VI-NEXT: s_cbranch_execz .LBB11_2
626 ; VI-NEXT: ; %bb.1: ; %if
627 ; VI-NEXT: s_load_dword s4, s[2:3], 0x2c
628 ; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
629 ; VI-NEXT: s_mov_b32 s3, 0xf000
630 ; VI-NEXT: s_mov_b32 s2, -1
631 ; VI-NEXT: v_mov_b32_e32 v0, 0
632 ; VI-NEXT: s_waitcnt lgkmcnt(0)
633 ; VI-NEXT: s_cmp_lg_u32 s4, 0
634 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
635 ; VI-NEXT: s_cbranch_scc0 .LBB11_3
636 ; VI-NEXT: .LBB11_2: ; %endif
638 ; VI-NEXT: .LBB11_3: ; %if_uniform
639 ; VI-NEXT: v_mov_b32_e32 v0, 1
640 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
643 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
644 %d_cmp = icmp ult i32 %tid, 16
645 br i1 %d_cmp, label %if, label %endif
648 store i32 0, ptr addrspace(1) %out
649 %u_cmp = icmp eq i32 %cond, 0
650 br i1 %u_cmp, label %if_uniform, label %endif
653 store i32 1, ptr addrspace(1) %out
660 define amdgpu_kernel void @divergent_inside_uniform(ptr addrspace(1) %out, i32 %cond) {
661 ; SI-LABEL: divergent_inside_uniform:
662 ; SI: ; %bb.0: ; %entry
663 ; SI-NEXT: s_load_dword s0, s[2:3], 0xb
664 ; SI-NEXT: s_waitcnt lgkmcnt(0)
665 ; SI-NEXT: s_cmp_lg_u32 s0, 0
666 ; SI-NEXT: s_cbranch_scc0 .LBB12_2
667 ; SI-NEXT: .LBB12_1: ; %endif
669 ; SI-NEXT: .LBB12_2: ; %if
670 ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
671 ; SI-NEXT: s_mov_b32 s3, 0xf000
672 ; SI-NEXT: s_mov_b32 s2, -1
673 ; SI-NEXT: v_mov_b32_e32 v1, 0
674 ; SI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
675 ; SI-NEXT: s_waitcnt lgkmcnt(0)
676 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0
677 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
678 ; SI-NEXT: s_cbranch_execz .LBB12_1
679 ; SI-NEXT: ; %bb.3: ; %if_uniform
680 ; SI-NEXT: v_mov_b32_e32 v0, 1
681 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
684 ; VI-LABEL: divergent_inside_uniform:
685 ; VI: ; %bb.0: ; %entry
686 ; VI-NEXT: s_load_dword s0, s[2:3], 0x2c
687 ; VI-NEXT: s_waitcnt lgkmcnt(0)
688 ; VI-NEXT: s_cmp_lg_u32 s0, 0
689 ; VI-NEXT: s_cbranch_scc0 .LBB12_2
690 ; VI-NEXT: .LBB12_1: ; %endif
692 ; VI-NEXT: .LBB12_2: ; %if
693 ; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
694 ; VI-NEXT: s_mov_b32 s3, 0xf000
695 ; VI-NEXT: s_mov_b32 s2, -1
696 ; VI-NEXT: v_mov_b32_e32 v1, 0
697 ; VI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
698 ; VI-NEXT: s_waitcnt lgkmcnt(0)
699 ; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0
700 ; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc
701 ; VI-NEXT: s_cbranch_execz .LBB12_1
702 ; VI-NEXT: ; %bb.3: ; %if_uniform
703 ; VI-NEXT: v_mov_b32_e32 v0, 1
704 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
707 %u_cmp = icmp eq i32 %cond, 0
708 br i1 %u_cmp, label %if, label %endif
711 store i32 0, ptr addrspace(1) %out
712 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
713 %d_cmp = icmp ult i32 %tid, 16
714 br i1 %d_cmp, label %if_uniform, label %endif
717 store i32 1, ptr addrspace(1) %out
724 define amdgpu_kernel void @divergent_if_uniform_if(ptr addrspace(1) %out, i32 %cond) {
725 ; SI-LABEL: divergent_if_uniform_if:
726 ; SI: ; %bb.0: ; %entry
727 ; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x9
728 ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
729 ; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc
730 ; SI-NEXT: s_cbranch_execz .LBB13_2
731 ; SI-NEXT: ; %bb.1: ; %if
732 ; SI-NEXT: s_mov_b32 s7, 0xf000
733 ; SI-NEXT: s_mov_b32 s6, -1
734 ; SI-NEXT: v_mov_b32_e32 v0, 1
735 ; SI-NEXT: s_waitcnt lgkmcnt(0)
736 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
737 ; SI-NEXT: .LBB13_2: ; %endif
738 ; SI-NEXT: s_or_b64 exec, exec, s[0:1]
739 ; SI-NEXT: s_load_dword s0, s[2:3], 0xb
740 ; SI-NEXT: s_waitcnt lgkmcnt(0)
741 ; SI-NEXT: s_cmp_lg_u32 s0, 0
742 ; SI-NEXT: s_cbranch_scc0 .LBB13_4
743 ; SI-NEXT: ; %bb.3: ; %exit
745 ; SI-NEXT: .LBB13_4: ; %if_uniform
746 ; SI-NEXT: s_mov_b32 s7, 0xf000
747 ; SI-NEXT: s_mov_b32 s6, -1
748 ; SI-NEXT: s_waitcnt expcnt(0)
749 ; SI-NEXT: v_mov_b32_e32 v0, 2
750 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
753 ; VI-LABEL: divergent_if_uniform_if:
754 ; VI: ; %bb.0: ; %entry
755 ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
756 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
757 ; VI-NEXT: s_and_saveexec_b64 s[0:1], vcc
758 ; VI-NEXT: s_cbranch_execz .LBB13_2
759 ; VI-NEXT: ; %bb.1: ; %if
760 ; VI-NEXT: s_mov_b32 s7, 0xf000
761 ; VI-NEXT: s_mov_b32 s6, -1
762 ; VI-NEXT: v_mov_b32_e32 v0, 1
763 ; VI-NEXT: s_waitcnt lgkmcnt(0)
764 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
765 ; VI-NEXT: .LBB13_2: ; %endif
766 ; VI-NEXT: s_or_b64 exec, exec, s[0:1]
767 ; VI-NEXT: s_load_dword s0, s[2:3], 0x2c
768 ; VI-NEXT: s_waitcnt lgkmcnt(0)
769 ; VI-NEXT: s_cmp_lg_u32 s0, 0
770 ; VI-NEXT: s_cbranch_scc0 .LBB13_4
771 ; VI-NEXT: ; %bb.3: ; %exit
773 ; VI-NEXT: .LBB13_4: ; %if_uniform
774 ; VI-NEXT: s_mov_b32 s7, 0xf000
775 ; VI-NEXT: s_mov_b32 s6, -1
776 ; VI-NEXT: v_mov_b32_e32 v0, 2
777 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
780 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
781 %d_cmp = icmp eq i32 %tid, 0
782 br i1 %d_cmp, label %if, label %endif
785 store i32 1, ptr addrspace(1) %out
789 %u_cmp = icmp eq i32 %cond, 0
790 br i1 %u_cmp, label %if_uniform, label %exit
793 store i32 2, ptr addrspace(1) %out
800 ; The condition of the branches in the two blocks are
801 ; uniform. MachineCSE replaces the 2nd condition with the inverse of
802 ; the first, leaving an scc use in a different block than it was
805 define amdgpu_kernel void @cse_uniform_condition_different_blocks(i32 %cond, ptr addrspace(1) %out) {
806 ; SI-LABEL: cse_uniform_condition_different_blocks:
808 ; SI-NEXT: s_load_dword s0, s[2:3], 0x9
809 ; SI-NEXT: s_waitcnt lgkmcnt(0)
810 ; SI-NEXT: s_cmp_lt_i32 s0, 1
811 ; SI-NEXT: s_cbranch_scc1 .LBB14_2
812 ; SI-NEXT: ; %bb.1: ; %bb2
813 ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xb
814 ; SI-NEXT: s_mov_b32 s3, 0xf000
815 ; SI-NEXT: s_mov_b32 s2, -1
816 ; SI-NEXT: v_mov_b32_e32 v1, 0
817 ; SI-NEXT: s_waitcnt lgkmcnt(0)
818 ; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
819 ; SI-NEXT: s_waitcnt vmcnt(0)
820 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0
821 ; SI-NEXT: s_waitcnt vmcnt(0)
822 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
823 ; SI-NEXT: .LBB14_2: ; %bb9
826 ; VI-LABEL: cse_uniform_condition_different_blocks:
828 ; VI-NEXT: s_load_dword s0, s[2:3], 0x24
829 ; VI-NEXT: s_waitcnt lgkmcnt(0)
830 ; VI-NEXT: s_cmp_lt_i32 s0, 1
831 ; VI-NEXT: s_cbranch_scc1 .LBB14_2
832 ; VI-NEXT: ; %bb.1: ; %bb2
833 ; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x2c
834 ; VI-NEXT: s_mov_b32 s3, 0xf000
835 ; VI-NEXT: s_mov_b32 s2, -1
836 ; VI-NEXT: v_mov_b32_e32 v1, 0
837 ; VI-NEXT: s_waitcnt lgkmcnt(0)
838 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
839 ; VI-NEXT: s_waitcnt vmcnt(0)
840 ; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0
841 ; VI-NEXT: s_waitcnt vmcnt(0)
842 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
843 ; VI-NEXT: .LBB14_2: ; %bb9
846 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
847 %tmp1 = icmp sgt i32 %cond, 0
848 br i1 %tmp1, label %bb2, label %bb9
851 %tmp3 = load volatile i32, ptr addrspace(1) undef
852 store volatile i32 0, ptr addrspace(1) undef
853 %tmp9 = icmp sle i32 %cond, 0
854 br i1 %tmp9, label %bb9, label %bb7
857 store i32 %tmp3, ptr addrspace(1) %out
860 bb9: ; preds = %bb8, %bb4
864 ; Fall-through to the else
865 define amdgpu_kernel void @uniform_if_scc_i64_eq(i64 %cond, ptr addrspace(1) %out) {
866 ; SI-LABEL: uniform_if_scc_i64_eq:
867 ; SI: ; %bb.0: ; %entry
868 ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
869 ; SI-NEXT: s_waitcnt lgkmcnt(0)
870 ; SI-NEXT: v_cmp_eq_u64_e64 s[4:5], s[0:1], 0
871 ; SI-NEXT: s_mov_b32 s0, 0
872 ; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
873 ; SI-NEXT: s_cbranch_vccnz .LBB15_2
874 ; SI-NEXT: ; %bb.1: ; %else
875 ; SI-NEXT: s_mov_b32 s0, 1
876 ; SI-NEXT: .LBB15_2: ; %done
877 ; SI-NEXT: s_mov_b32 s7, 0xf000
878 ; SI-NEXT: s_mov_b32 s6, -1
879 ; SI-NEXT: s_mov_b32 s4, s2
880 ; SI-NEXT: s_mov_b32 s5, s3
881 ; SI-NEXT: v_mov_b32_e32 v0, s0
882 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
885 ; VI-LABEL: uniform_if_scc_i64_eq:
886 ; VI: ; %bb.0: ; %entry
887 ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
888 ; VI-NEXT: s_waitcnt lgkmcnt(0)
889 ; VI-NEXT: s_cmp_eq_u64 s[0:1], 0
890 ; VI-NEXT: s_mov_b32 s0, 0
891 ; VI-NEXT: s_cbranch_scc1 .LBB15_2
892 ; VI-NEXT: ; %bb.1: ; %else
893 ; VI-NEXT: s_mov_b32 s0, 1
894 ; VI-NEXT: .LBB15_2: ; %done
895 ; VI-NEXT: s_mov_b32 s7, 0xf000
896 ; VI-NEXT: s_mov_b32 s6, -1
897 ; VI-NEXT: s_mov_b32 s4, s2
898 ; VI-NEXT: s_mov_b32 s5, s3
899 ; VI-NEXT: v_mov_b32_e32 v0, s0
900 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
903 %cmp0 = icmp eq i64 %cond, 0
904 br i1 %cmp0, label %if, label %else
913 %value = phi i32 [0, %if], [1, %else]
914 store i32 %value, ptr addrspace(1) %out
918 ; Fall-through to the else
919 define amdgpu_kernel void @uniform_if_scc_i64_ne(i64 %cond, ptr addrspace(1) %out) {
920 ; SI-LABEL: uniform_if_scc_i64_ne:
921 ; SI: ; %bb.0: ; %entry
922 ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
923 ; SI-NEXT: s_waitcnt lgkmcnt(0)
924 ; SI-NEXT: v_cmp_ne_u64_e64 s[4:5], s[0:1], 0
925 ; SI-NEXT: s_mov_b32 s0, 0
926 ; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
927 ; SI-NEXT: s_cbranch_vccnz .LBB16_2
928 ; SI-NEXT: ; %bb.1: ; %else
929 ; SI-NEXT: s_mov_b32 s0, 1
930 ; SI-NEXT: .LBB16_2: ; %done
931 ; SI-NEXT: s_mov_b32 s7, 0xf000
932 ; SI-NEXT: s_mov_b32 s6, -1
933 ; SI-NEXT: s_mov_b32 s4, s2
934 ; SI-NEXT: s_mov_b32 s5, s3
935 ; SI-NEXT: v_mov_b32_e32 v0, s0
936 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
939 ; VI-LABEL: uniform_if_scc_i64_ne:
940 ; VI: ; %bb.0: ; %entry
941 ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
942 ; VI-NEXT: s_waitcnt lgkmcnt(0)
943 ; VI-NEXT: s_cmp_lg_u64 s[0:1], 0
944 ; VI-NEXT: s_mov_b32 s0, 0
945 ; VI-NEXT: s_cbranch_scc1 .LBB16_2
946 ; VI-NEXT: ; %bb.1: ; %else
947 ; VI-NEXT: s_mov_b32 s0, 1
948 ; VI-NEXT: .LBB16_2: ; %done
949 ; VI-NEXT: s_mov_b32 s7, 0xf000
950 ; VI-NEXT: s_mov_b32 s6, -1
951 ; VI-NEXT: s_mov_b32 s4, s2
952 ; VI-NEXT: s_mov_b32 s5, s3
953 ; VI-NEXT: v_mov_b32_e32 v0, s0
954 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
957 %cmp0 = icmp ne i64 %cond, 0
958 br i1 %cmp0, label %if, label %else
967 %value = phi i32 [0, %if], [1, %else]
968 store i32 %value, ptr addrspace(1) %out
972 ; Fall-through to the else
973 define amdgpu_kernel void @uniform_if_scc_i64_sgt(i64 %cond, ptr addrspace(1) %out) {
974 ; SI-LABEL: uniform_if_scc_i64_sgt:
975 ; SI: ; %bb.0: ; %entry
976 ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
977 ; SI-NEXT: s_waitcnt lgkmcnt(0)
978 ; SI-NEXT: v_cmp_gt_i64_e64 s[4:5], s[0:1], 0
979 ; SI-NEXT: s_mov_b32 s0, 0
980 ; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
981 ; SI-NEXT: s_cbranch_vccnz .LBB17_2
982 ; SI-NEXT: ; %bb.1: ; %else
983 ; SI-NEXT: s_mov_b32 s0, 1
984 ; SI-NEXT: .LBB17_2: ; %done
985 ; SI-NEXT: s_mov_b32 s7, 0xf000
986 ; SI-NEXT: s_mov_b32 s6, -1
987 ; SI-NEXT: s_mov_b32 s4, s2
988 ; SI-NEXT: s_mov_b32 s5, s3
989 ; SI-NEXT: v_mov_b32_e32 v0, s0
990 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
993 ; VI-LABEL: uniform_if_scc_i64_sgt:
994 ; VI: ; %bb.0: ; %entry
995 ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
996 ; VI-NEXT: s_waitcnt lgkmcnt(0)
997 ; VI-NEXT: v_cmp_gt_i64_e64 s[4:5], s[0:1], 0
998 ; VI-NEXT: s_mov_b32 s0, 0
999 ; VI-NEXT: s_and_b64 vcc, exec, s[4:5]
1000 ; VI-NEXT: s_cbranch_vccnz .LBB17_2
1001 ; VI-NEXT: ; %bb.1: ; %else
1002 ; VI-NEXT: s_mov_b32 s0, 1
1003 ; VI-NEXT: .LBB17_2: ; %done
1004 ; VI-NEXT: s_mov_b32 s7, 0xf000
1005 ; VI-NEXT: s_mov_b32 s6, -1
1006 ; VI-NEXT: s_mov_b32 s4, s2
1007 ; VI-NEXT: s_mov_b32 s5, s3
1008 ; VI-NEXT: v_mov_b32_e32 v0, s0
1009 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1012 %cmp0 = icmp sgt i64 %cond, 0
1013 br i1 %cmp0, label %if, label %else
1022 %value = phi i32 [0, %if], [1, %else]
1023 store i32 %value, ptr addrspace(1) %out
1027 define amdgpu_kernel void @move_to_valu_i64_eq(ptr addrspace(1) %out) {
1028 ; SI-LABEL: move_to_valu_i64_eq:
1030 ; SI-NEXT: s_mov_b32 m0, -1
1031 ; SI-NEXT: ds_read_b64 v[0:1], v0
1032 ; SI-NEXT: s_mov_b32 s0, 0
1033 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1034 ; SI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1035 ; SI-NEXT: s_cbranch_vccnz .LBB18_2
1036 ; SI-NEXT: ; %bb.1: ; %else
1037 ; SI-NEXT: s_mov_b32 s0, 1
1038 ; SI-NEXT: .LBB18_2: ; %done
1039 ; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x9
1040 ; SI-NEXT: s_mov_b32 s7, 0xf000
1041 ; SI-NEXT: s_mov_b32 s6, -1
1042 ; SI-NEXT: v_mov_b32_e32 v0, s0
1043 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1044 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1047 ; VI-LABEL: move_to_valu_i64_eq:
1049 ; VI-NEXT: s_mov_b32 m0, -1
1050 ; VI-NEXT: ds_read_b64 v[0:1], v0
1051 ; VI-NEXT: s_mov_b32 s0, 0
1052 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1053 ; VI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1054 ; VI-NEXT: s_cbranch_vccnz .LBB18_2
1055 ; VI-NEXT: ; %bb.1: ; %else
1056 ; VI-NEXT: s_mov_b32 s0, 1
1057 ; VI-NEXT: .LBB18_2: ; %done
1058 ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
1059 ; VI-NEXT: s_mov_b32 s7, 0xf000
1060 ; VI-NEXT: s_mov_b32 s6, -1
1061 ; VI-NEXT: v_mov_b32_e32 v0, s0
1062 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1063 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1065 %cond = load volatile i64, ptr addrspace(3) undef
1066 %cmp0 = icmp eq i64 %cond, 0
1067 br i1 %cmp0, label %if, label %else
1076 %value = phi i32 [0, %if], [1, %else]
1077 store i32 %value, ptr addrspace(1) %out
1081 define amdgpu_kernel void @move_to_valu_i64_ne(ptr addrspace(1) %out) {
1082 ; SI-LABEL: move_to_valu_i64_ne:
1084 ; SI-NEXT: s_mov_b32 m0, -1
1085 ; SI-NEXT: ds_read_b64 v[0:1], v0
1086 ; SI-NEXT: s_mov_b32 s0, 0
1087 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1088 ; SI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
1089 ; SI-NEXT: s_cbranch_vccnz .LBB19_2
1090 ; SI-NEXT: ; %bb.1: ; %else
1091 ; SI-NEXT: s_mov_b32 s0, 1
1092 ; SI-NEXT: .LBB19_2: ; %done
1093 ; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x9
1094 ; SI-NEXT: s_mov_b32 s7, 0xf000
1095 ; SI-NEXT: s_mov_b32 s6, -1
1096 ; SI-NEXT: v_mov_b32_e32 v0, s0
1097 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1098 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1101 ; VI-LABEL: move_to_valu_i64_ne:
1103 ; VI-NEXT: s_mov_b32 m0, -1
1104 ; VI-NEXT: ds_read_b64 v[0:1], v0
1105 ; VI-NEXT: s_mov_b32 s0, 0
1106 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1107 ; VI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
1108 ; VI-NEXT: s_cbranch_vccnz .LBB19_2
1109 ; VI-NEXT: ; %bb.1: ; %else
1110 ; VI-NEXT: s_mov_b32 s0, 1
1111 ; VI-NEXT: .LBB19_2: ; %done
1112 ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
1113 ; VI-NEXT: s_mov_b32 s7, 0xf000
1114 ; VI-NEXT: s_mov_b32 s6, -1
1115 ; VI-NEXT: v_mov_b32_e32 v0, s0
1116 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1117 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1119 %cond = load volatile i64, ptr addrspace(3) undef
1120 %cmp0 = icmp ne i64 %cond, 0
1121 br i1 %cmp0, label %if, label %else
1130 %value = phi i32 [0, %if], [1, %else]
1131 store i32 %value, ptr addrspace(1) %out
1135 define void @move_to_valu_vgpr_operand_phi(ptr addrspace(3) %out) {
1136 ; SI-LABEL: move_to_valu_vgpr_operand_phi:
1137 ; SI: ; %bb.0: ; %bb0
1138 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1139 ; SI-NEXT: v_add_i32_e32 v0, vcc, 28, v0
1140 ; SI-NEXT: v_mov_b32_e32 v1, 1
1141 ; SI-NEXT: s_and_b64 vcc, exec, 0
1142 ; SI-NEXT: s_mov_b32 m0, -1
1143 ; SI-NEXT: s_branch .LBB20_2
1144 ; SI-NEXT: .LBB20_1: ; %bb3
1145 ; SI-NEXT: ; in Loop: Header=BB20_2 Depth=1
1146 ; SI-NEXT: v_add_i32_e64 v0, s[4:5], 8, v0
1147 ; SI-NEXT: .LBB20_2: ; %bb1
1148 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1
1149 ; SI-NEXT: s_cbranch_scc1 .LBB20_1
1150 ; SI-NEXT: ; %bb.3: ; %bb2
1151 ; SI-NEXT: ; in Loop: Header=BB20_2 Depth=1
1152 ; SI-NEXT: ds_write_b32 v0, v1
1153 ; SI-NEXT: s_mov_b64 vcc, vcc
1154 ; SI-NEXT: s_cbranch_vccz .LBB20_1
1155 ; SI-NEXT: ; %bb.4: ; %DummyReturnBlock
1156 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1157 ; SI-NEXT: s_setpc_b64 s[30:31]
1159 ; VI-LABEL: move_to_valu_vgpr_operand_phi:
1160 ; VI: ; %bb.0: ; %bb0
1161 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1162 ; VI-NEXT: v_add_u32_e32 v0, vcc, 28, v0
1163 ; VI-NEXT: v_mov_b32_e32 v1, 1
1164 ; VI-NEXT: s_and_b64 vcc, exec, 0
1165 ; VI-NEXT: s_mov_b32 m0, -1
1166 ; VI-NEXT: s_branch .LBB20_2
1167 ; VI-NEXT: .LBB20_1: ; %bb3
1168 ; VI-NEXT: ; in Loop: Header=BB20_2 Depth=1
1169 ; VI-NEXT: v_add_u32_e64 v0, s[4:5], 8, v0
1170 ; VI-NEXT: .LBB20_2: ; %bb1
1171 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1
1172 ; VI-NEXT: s_cbranch_scc1 .LBB20_1
1173 ; VI-NEXT: ; %bb.3: ; %bb2
1174 ; VI-NEXT: ; in Loop: Header=BB20_2 Depth=1
1175 ; VI-NEXT: ds_write_b32 v0, v1
1176 ; VI-NEXT: s_mov_b64 vcc, vcc
1177 ; VI-NEXT: s_cbranch_vccz .LBB20_1
1178 ; VI-NEXT: ; %bb.4: ; %DummyReturnBlock
1179 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1180 ; VI-NEXT: s_setpc_b64 s[30:31]
1184 bb1: ; preds = %bb3, %bb0
1185 %tmp0 = phi i32 [ 8, %bb0 ], [ %tmp4, %bb3 ]
1186 %tmp1 = add nsw i32 %tmp0, -1
1187 %tmp2 = getelementptr inbounds i32, ptr addrspace(3) %out, i32 %tmp1
1188 br i1 undef, label %bb2, label %bb3
1191 store volatile i32 1, ptr addrspace(3) %tmp2, align 4
1194 bb3: ; preds = %bb2, %bb1
1195 %tmp4 = add nsw i32 %tmp0, 2
1199 declare i32 @llvm.amdgcn.workitem.id.x() #0
1201 attributes #0 = { nounwind readnone }
1202 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: