1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=verde -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -structurizecfg-skip-uniform-regions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -structurizecfg-skip-uniform-regions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
5 define amdgpu_kernel void @uniform_if_scc(i32 %cond, ptr addrspace(1) %out) {
6 ; SI-LABEL: uniform_if_scc:
7 ; SI: ; %bb.0: ; %entry
8 ; SI-NEXT: s_load_dword s0, s[4:5], 0x9
9 ; SI-NEXT: s_waitcnt lgkmcnt(0)
10 ; SI-NEXT: s_cmp_eq_u32 s0, 0
11 ; SI-NEXT: s_mov_b32 s0, 0
12 ; SI-NEXT: s_cbranch_scc1 .LBB0_2
13 ; SI-NEXT: ; %bb.1: ; %else
14 ; SI-NEXT: s_mov_b32 s0, 1
15 ; SI-NEXT: .LBB0_2: ; %done
16 ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xb
17 ; SI-NEXT: s_mov_b32 s7, 0xf000
18 ; SI-NEXT: s_mov_b32 s6, -1
19 ; SI-NEXT: v_mov_b32_e32 v0, s0
20 ; SI-NEXT: s_waitcnt lgkmcnt(0)
21 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
24 ; VI-LABEL: uniform_if_scc:
25 ; VI: ; %bb.0: ; %entry
26 ; VI-NEXT: s_load_dword s0, s[4:5], 0x24
27 ; VI-NEXT: s_waitcnt lgkmcnt(0)
28 ; VI-NEXT: s_cmp_eq_u32 s0, 0
29 ; VI-NEXT: s_mov_b32 s0, 0
30 ; VI-NEXT: s_cbranch_scc1 .LBB0_2
31 ; VI-NEXT: ; %bb.1: ; %else
32 ; VI-NEXT: s_mov_b32 s0, 1
33 ; VI-NEXT: .LBB0_2: ; %done
34 ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2c
35 ; VI-NEXT: s_mov_b32 s7, 0xf000
36 ; VI-NEXT: s_mov_b32 s6, -1
37 ; VI-NEXT: v_mov_b32_e32 v0, s0
38 ; VI-NEXT: s_waitcnt lgkmcnt(0)
39 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
42 %cmp0 = icmp eq i32 %cond, 0
43 br i1 %cmp0, label %if, label %else
52 %value = phi i32 [0, %if], [1, %else]
53 store i32 %value, ptr addrspace(1) %out
57 define amdgpu_kernel void @uniform_if_vcc(float %cond, ptr addrspace(1) %out) {
58 ; SI-LABEL: uniform_if_vcc:
59 ; SI: ; %bb.0: ; %entry
60 ; SI-NEXT: s_load_dword s1, s[4:5], 0x9
61 ; SI-NEXT: s_mov_b32 s0, 0
62 ; SI-NEXT: s_waitcnt lgkmcnt(0)
63 ; SI-NEXT: v_cmp_eq_f32_e64 s[2:3], s1, 0
64 ; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
65 ; SI-NEXT: s_cbranch_vccnz .LBB1_2
66 ; SI-NEXT: ; %bb.1: ; %else
67 ; SI-NEXT: s_mov_b32 s0, 1
68 ; SI-NEXT: .LBB1_2: ; %done
69 ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xb
70 ; SI-NEXT: s_mov_b32 s7, 0xf000
71 ; SI-NEXT: s_mov_b32 s6, -1
72 ; SI-NEXT: v_mov_b32_e32 v0, s0
73 ; SI-NEXT: s_waitcnt lgkmcnt(0)
74 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
77 ; VI-LABEL: uniform_if_vcc:
78 ; VI: ; %bb.0: ; %entry
79 ; VI-NEXT: s_load_dword s1, s[4:5], 0x24
80 ; VI-NEXT: s_mov_b32 s0, 0
81 ; VI-NEXT: s_waitcnt lgkmcnt(0)
82 ; VI-NEXT: v_cmp_eq_f32_e64 s[2:3], s1, 0
83 ; VI-NEXT: s_and_b64 vcc, exec, s[2:3]
84 ; VI-NEXT: s_cbranch_vccnz .LBB1_2
85 ; VI-NEXT: ; %bb.1: ; %else
86 ; VI-NEXT: s_mov_b32 s0, 1
87 ; VI-NEXT: .LBB1_2: ; %done
88 ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2c
89 ; VI-NEXT: s_mov_b32 s7, 0xf000
90 ; VI-NEXT: s_mov_b32 s6, -1
91 ; VI-NEXT: v_mov_b32_e32 v0, s0
92 ; VI-NEXT: s_waitcnt lgkmcnt(0)
93 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
96 %cmp0 = fcmp oeq float %cond, 0.0
97 br i1 %cmp0, label %if, label %else
106 %value = phi i32 [0, %if], [1, %else]
107 store i32 %value, ptr addrspace(1) %out
111 define amdgpu_kernel void @uniform_if_swap_br_targets_scc(i32 %cond, ptr addrspace(1) %out) {
112 ; SI-LABEL: uniform_if_swap_br_targets_scc:
113 ; SI: ; %bb.0: ; %entry
114 ; SI-NEXT: s_load_dword s0, s[4:5], 0x9
115 ; SI-NEXT: s_waitcnt lgkmcnt(0)
116 ; SI-NEXT: s_cmp_lg_u32 s0, 0
117 ; SI-NEXT: s_mov_b32 s0, 0
118 ; SI-NEXT: s_cbranch_scc1 .LBB2_2
119 ; SI-NEXT: ; %bb.1: ; %else
120 ; SI-NEXT: s_mov_b32 s0, 1
121 ; SI-NEXT: .LBB2_2: ; %done
122 ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xb
123 ; SI-NEXT: s_mov_b32 s7, 0xf000
124 ; SI-NEXT: s_mov_b32 s6, -1
125 ; SI-NEXT: v_mov_b32_e32 v0, s0
126 ; SI-NEXT: s_waitcnt lgkmcnt(0)
127 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
130 ; VI-LABEL: uniform_if_swap_br_targets_scc:
131 ; VI: ; %bb.0: ; %entry
132 ; VI-NEXT: s_load_dword s0, s[4:5], 0x24
133 ; VI-NEXT: s_waitcnt lgkmcnt(0)
134 ; VI-NEXT: s_cmp_lg_u32 s0, 0
135 ; VI-NEXT: s_mov_b32 s0, 0
136 ; VI-NEXT: s_cbranch_scc1 .LBB2_2
137 ; VI-NEXT: ; %bb.1: ; %else
138 ; VI-NEXT: s_mov_b32 s0, 1
139 ; VI-NEXT: .LBB2_2: ; %done
140 ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2c
141 ; VI-NEXT: s_mov_b32 s7, 0xf000
142 ; VI-NEXT: s_mov_b32 s6, -1
143 ; VI-NEXT: v_mov_b32_e32 v0, s0
144 ; VI-NEXT: s_waitcnt lgkmcnt(0)
145 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
148 %cmp0 = icmp eq i32 %cond, 0
149 br i1 %cmp0, label %else, label %if
158 %value = phi i32 [0, %if], [1, %else]
159 store i32 %value, ptr addrspace(1) %out
163 define amdgpu_kernel void @uniform_if_swap_br_targets_vcc(float %cond, ptr addrspace(1) %out) {
164 ; SI-LABEL: uniform_if_swap_br_targets_vcc:
165 ; SI: ; %bb.0: ; %entry
166 ; SI-NEXT: s_load_dword s1, s[4:5], 0x9
167 ; SI-NEXT: s_mov_b32 s0, 0
168 ; SI-NEXT: s_waitcnt lgkmcnt(0)
169 ; SI-NEXT: v_cmp_neq_f32_e64 s[2:3], s1, 0
170 ; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
171 ; SI-NEXT: s_cbranch_vccnz .LBB3_2
172 ; SI-NEXT: ; %bb.1: ; %else
173 ; SI-NEXT: s_mov_b32 s0, 1
174 ; SI-NEXT: .LBB3_2: ; %done
175 ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xb
176 ; SI-NEXT: s_mov_b32 s7, 0xf000
177 ; SI-NEXT: s_mov_b32 s6, -1
178 ; SI-NEXT: v_mov_b32_e32 v0, s0
179 ; SI-NEXT: s_waitcnt lgkmcnt(0)
180 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
183 ; VI-LABEL: uniform_if_swap_br_targets_vcc:
184 ; VI: ; %bb.0: ; %entry
185 ; VI-NEXT: s_load_dword s1, s[4:5], 0x24
186 ; VI-NEXT: s_mov_b32 s0, 0
187 ; VI-NEXT: s_waitcnt lgkmcnt(0)
188 ; VI-NEXT: v_cmp_neq_f32_e64 s[2:3], s1, 0
189 ; VI-NEXT: s_and_b64 vcc, exec, s[2:3]
190 ; VI-NEXT: s_cbranch_vccnz .LBB3_2
191 ; VI-NEXT: ; %bb.1: ; %else
192 ; VI-NEXT: s_mov_b32 s0, 1
193 ; VI-NEXT: .LBB3_2: ; %done
194 ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2c
195 ; VI-NEXT: s_mov_b32 s7, 0xf000
196 ; VI-NEXT: s_mov_b32 s6, -1
197 ; VI-NEXT: v_mov_b32_e32 v0, s0
198 ; VI-NEXT: s_waitcnt lgkmcnt(0)
199 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
202 %cmp0 = fcmp oeq float %cond, 0.0
203 br i1 %cmp0, label %else, label %if
212 %value = phi i32 [0, %if], [1, %else]
213 store i32 %value, ptr addrspace(1) %out
217 ; Using a floating-point value in an integer compare will cause the compare to
218 ; be selected for the SALU and then later moved to the VALU.
219 define amdgpu_kernel void @uniform_if_move_valu(ptr addrspace(1) %out, float %a) {
220 ; SI-LABEL: uniform_if_move_valu:
221 ; SI: ; %bb.0: ; %entry
222 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
223 ; SI-NEXT: v_mov_b32_e32 v0, 0x41200000
224 ; SI-NEXT: s_waitcnt lgkmcnt(0)
225 ; SI-NEXT: v_add_f32_e32 v0, s0, v0
226 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 5, v0
227 ; SI-NEXT: s_cbranch_vccnz .LBB4_2
228 ; SI-NEXT: ; %bb.1: ; %if
229 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
230 ; SI-NEXT: s_mov_b32 s3, 0xf000
231 ; SI-NEXT: s_mov_b32 s2, -1
232 ; SI-NEXT: v_mov_b32_e32 v0, 0
233 ; SI-NEXT: s_waitcnt lgkmcnt(0)
234 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
235 ; SI-NEXT: .LBB4_2: ; %endif
238 ; VI-LABEL: uniform_if_move_valu:
239 ; VI: ; %bb.0: ; %entry
240 ; VI-NEXT: s_load_dword s0, s[4:5], 0x2c
241 ; VI-NEXT: v_mov_b32_e32 v0, 0x41200000
242 ; VI-NEXT: s_waitcnt lgkmcnt(0)
243 ; VI-NEXT: v_add_f32_e32 v0, s0, v0
244 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 5, v0
245 ; VI-NEXT: s_cbranch_vccnz .LBB4_2
246 ; VI-NEXT: ; %bb.1: ; %if
247 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
248 ; VI-NEXT: s_mov_b32 s3, 0xf000
249 ; VI-NEXT: s_mov_b32 s2, -1
250 ; VI-NEXT: v_mov_b32_e32 v0, 0
251 ; VI-NEXT: s_waitcnt lgkmcnt(0)
252 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
253 ; VI-NEXT: .LBB4_2: ; %endif
256 %a.0 = fadd float %a, 10.0
257 %cond = bitcast float %a.0 to i32
258 %cmp = icmp eq i32 %cond, 5
259 br i1 %cmp, label %if, label %endif
262 store i32 0, ptr addrspace(1) %out
269 ; Using a floating-point value in an integer compare will cause the compare to
270 ; be selected for the SALU and then later moved to the VALU.
271 define amdgpu_kernel void @uniform_if_move_valu_commute(ptr addrspace(1) %out, float %a) {
272 ; SI-LABEL: uniform_if_move_valu_commute:
273 ; SI: ; %bb.0: ; %entry
274 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
275 ; SI-NEXT: v_mov_b32_e32 v0, 0x41200000
276 ; SI-NEXT: s_waitcnt lgkmcnt(0)
277 ; SI-NEXT: v_add_f32_e32 v0, s0, v0
278 ; SI-NEXT: v_cmp_gt_u32_e32 vcc, 6, v0
279 ; SI-NEXT: s_cbranch_vccnz .LBB5_2
280 ; SI-NEXT: ; %bb.1: ; %if
281 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
282 ; SI-NEXT: s_mov_b32 s3, 0xf000
283 ; SI-NEXT: s_mov_b32 s2, -1
284 ; SI-NEXT: v_mov_b32_e32 v0, 0
285 ; SI-NEXT: s_waitcnt lgkmcnt(0)
286 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
287 ; SI-NEXT: .LBB5_2: ; %endif
290 ; VI-LABEL: uniform_if_move_valu_commute:
291 ; VI: ; %bb.0: ; %entry
292 ; VI-NEXT: s_load_dword s0, s[4:5], 0x2c
293 ; VI-NEXT: v_mov_b32_e32 v0, 0x41200000
294 ; VI-NEXT: s_waitcnt lgkmcnt(0)
295 ; VI-NEXT: v_add_f32_e32 v0, s0, v0
296 ; VI-NEXT: v_cmp_gt_u32_e32 vcc, 6, v0
297 ; VI-NEXT: s_cbranch_vccnz .LBB5_2
298 ; VI-NEXT: ; %bb.1: ; %if
299 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
300 ; VI-NEXT: s_mov_b32 s3, 0xf000
301 ; VI-NEXT: s_mov_b32 s2, -1
302 ; VI-NEXT: v_mov_b32_e32 v0, 0
303 ; VI-NEXT: s_waitcnt lgkmcnt(0)
304 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
305 ; VI-NEXT: .LBB5_2: ; %endif
308 %a.0 = fadd float %a, 10.0
309 %cond = bitcast float %a.0 to i32
310 %cmp = icmp ugt i32 %cond, 5
311 br i1 %cmp, label %if, label %endif
314 store i32 0, ptr addrspace(1) %out
322 define amdgpu_kernel void @uniform_if_else_ret(ptr addrspace(1) nocapture %out, i32 %a) {
323 ; SI-LABEL: uniform_if_else_ret:
324 ; SI: ; %bb.0: ; %entry
325 ; SI-NEXT: s_load_dword s2, s[4:5], 0xb
326 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
327 ; SI-NEXT: s_mov_b32 s3, 0xf000
328 ; SI-NEXT: s_waitcnt lgkmcnt(0)
329 ; SI-NEXT: s_cmp_lg_u32 s2, 0
330 ; SI-NEXT: s_cbranch_scc0 .LBB6_2
331 ; SI-NEXT: ; %bb.1: ; %if.else
332 ; SI-NEXT: s_mov_b32 s2, -1
333 ; SI-NEXT: v_mov_b32_e32 v0, 2
334 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
336 ; SI-NEXT: .LBB6_2: ; %if.then
337 ; SI-NEXT: s_mov_b32 s2, -1
338 ; SI-NEXT: v_mov_b32_e32 v0, 1
339 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
342 ; VI-LABEL: uniform_if_else_ret:
343 ; VI: ; %bb.0: ; %entry
344 ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c
345 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
346 ; VI-NEXT: s_mov_b32 s3, 0xf000
347 ; VI-NEXT: s_waitcnt lgkmcnt(0)
348 ; VI-NEXT: s_cmp_lg_u32 s2, 0
349 ; VI-NEXT: s_cbranch_scc0 .LBB6_2
350 ; VI-NEXT: ; %bb.1: ; %if.else
351 ; VI-NEXT: s_mov_b32 s2, -1
352 ; VI-NEXT: v_mov_b32_e32 v0, 2
353 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
355 ; VI-NEXT: .LBB6_2: ; %if.then
356 ; VI-NEXT: s_mov_b32 s2, -1
357 ; VI-NEXT: v_mov_b32_e32 v0, 1
358 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
361 %cmp = icmp eq i32 %a, 0
362 br i1 %cmp, label %if.then, label %if.else
364 if.then: ; preds = %entry
365 store i32 1, ptr addrspace(1) %out
368 if.else: ; preds = %entry
369 store i32 2, ptr addrspace(1) %out
372 if.end: ; preds = %if.else, %if.then
376 define amdgpu_kernel void @uniform_if_else(ptr addrspace(1) nocapture %out0, ptr addrspace(1) nocapture %out1, i32 %a) {
377 ; SI-LABEL: uniform_if_else:
378 ; SI: ; %bb.0: ; %entry
379 ; SI-NEXT: s_load_dword s6, s[4:5], 0xd
380 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
381 ; SI-NEXT: s_mov_b32 s7, 0xf000
382 ; SI-NEXT: s_waitcnt lgkmcnt(0)
383 ; SI-NEXT: s_cmp_lg_u32 s6, 0
384 ; SI-NEXT: s_cbranch_scc0 .LBB7_2
385 ; SI-NEXT: ; %bb.1: ; %if.else
386 ; SI-NEXT: s_mov_b32 s6, -1
387 ; SI-NEXT: s_mov_b32 s4, s0
388 ; SI-NEXT: s_mov_b32 s5, s1
389 ; SI-NEXT: v_mov_b32_e32 v0, 2
390 ; SI-NEXT: s_branch .LBB7_3
391 ; SI-NEXT: .LBB7_2: ; %if.then
392 ; SI-NEXT: s_mov_b32 s6, -1
393 ; SI-NEXT: s_mov_b32 s4, s0
394 ; SI-NEXT: s_mov_b32 s5, s1
395 ; SI-NEXT: v_mov_b32_e32 v0, 1
396 ; SI-NEXT: .LBB7_3: ; %if.end
397 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
398 ; SI-NEXT: s_mov_b32 s4, s2
399 ; SI-NEXT: s_mov_b32 s5, s3
400 ; SI-NEXT: s_waitcnt expcnt(0)
401 ; SI-NEXT: v_mov_b32_e32 v0, 3
402 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
405 ; VI-LABEL: uniform_if_else:
406 ; VI: ; %bb.0: ; %entry
407 ; VI-NEXT: s_load_dword s6, s[4:5], 0x34
408 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
409 ; VI-NEXT: s_mov_b32 s7, 0xf000
410 ; VI-NEXT: s_waitcnt lgkmcnt(0)
411 ; VI-NEXT: s_cmp_lg_u32 s6, 0
412 ; VI-NEXT: s_cbranch_scc0 .LBB7_2
413 ; VI-NEXT: ; %bb.1: ; %if.else
414 ; VI-NEXT: s_mov_b32 s6, -1
415 ; VI-NEXT: s_mov_b32 s4, s0
416 ; VI-NEXT: s_mov_b32 s5, s1
417 ; VI-NEXT: v_mov_b32_e32 v0, 2
418 ; VI-NEXT: s_branch .LBB7_3
419 ; VI-NEXT: .LBB7_2: ; %if.then
420 ; VI-NEXT: s_mov_b32 s6, -1
421 ; VI-NEXT: s_mov_b32 s4, s0
422 ; VI-NEXT: s_mov_b32 s5, s1
423 ; VI-NEXT: v_mov_b32_e32 v0, 1
424 ; VI-NEXT: .LBB7_3: ; %if.end
425 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
426 ; VI-NEXT: s_mov_b32 s4, s2
427 ; VI-NEXT: s_mov_b32 s5, s3
428 ; VI-NEXT: v_mov_b32_e32 v0, 3
429 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
432 %cmp = icmp eq i32 %a, 0
433 br i1 %cmp, label %if.then, label %if.else
435 if.then: ; preds = %entry
436 store i32 1, ptr addrspace(1) %out0
439 if.else: ; preds = %entry
440 store i32 2, ptr addrspace(1) %out0
443 if.end: ; preds = %if.else, %if.then
444 store i32 3, ptr addrspace(1) %out1
448 define amdgpu_kernel void @icmp_2_users(ptr addrspace(1) %out, i32 %cond) {
449 ; SI-LABEL: icmp_2_users:
450 ; SI: ; %bb.0: ; %main_body
451 ; SI-NEXT: s_load_dword s2, s[4:5], 0xb
452 ; SI-NEXT: s_waitcnt lgkmcnt(0)
453 ; SI-NEXT: s_cmp_gt_i32 s2, 0
454 ; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
455 ; SI-NEXT: s_cmp_lt_i32 s2, 1
456 ; SI-NEXT: s_cbranch_scc1 .LBB8_2
457 ; SI-NEXT: ; %bb.1: ; %IF
458 ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
459 ; SI-NEXT: s_mov_b32 s7, 0xf000
460 ; SI-NEXT: s_mov_b32 s6, -1
461 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[0:1]
462 ; SI-NEXT: s_waitcnt lgkmcnt(0)
463 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
464 ; SI-NEXT: .LBB8_2: ; %ENDIF
467 ; VI-LABEL: icmp_2_users:
468 ; VI: ; %bb.0: ; %main_body
469 ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c
470 ; VI-NEXT: s_waitcnt lgkmcnt(0)
471 ; VI-NEXT: s_cmp_gt_i32 s2, 0
472 ; VI-NEXT: s_cselect_b64 s[0:1], -1, 0
473 ; VI-NEXT: s_cmp_lt_i32 s2, 1
474 ; VI-NEXT: s_cbranch_scc1 .LBB8_2
475 ; VI-NEXT: ; %bb.1: ; %IF
476 ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
477 ; VI-NEXT: s_mov_b32 s7, 0xf000
478 ; VI-NEXT: s_mov_b32 s6, -1
479 ; VI-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[0:1]
480 ; VI-NEXT: s_waitcnt lgkmcnt(0)
481 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
482 ; VI-NEXT: .LBB8_2: ; %ENDIF
485 %0 = icmp sgt i32 %cond, 0
486 %1 = sext i1 %0 to i32
487 br i1 %0, label %IF, label %ENDIF
490 store i32 %1, ptr addrspace(1) %out
493 ENDIF: ; preds = %IF, %main_body
497 define amdgpu_kernel void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, ptr addrspace(1) %out) {
498 ; SI-LABEL: icmp_users_different_blocks:
500 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
501 ; SI-NEXT: s_waitcnt lgkmcnt(0)
502 ; SI-NEXT: s_cmp_lt_i32 s0, 1
503 ; SI-NEXT: s_cbranch_scc1 .LBB9_2
504 ; SI-NEXT: ; %bb.1: ; %bb2
505 ; SI-NEXT: s_cmp_gt_i32 s1, 0
506 ; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
507 ; SI-NEXT: s_and_b64 vcc, exec, s[0:1]
508 ; SI-NEXT: s_cbranch_vccz .LBB9_3
509 ; SI-NEXT: .LBB9_2: ; %bb9
511 ; SI-NEXT: .LBB9_3: ; %bb7
512 ; SI-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
513 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
514 ; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
515 ; SI-NEXT: s_mov_b32 s3, 0xf000
516 ; SI-NEXT: s_mov_b32 s2, -1
517 ; SI-NEXT: s_waitcnt lgkmcnt(0)
518 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
521 ; VI-LABEL: icmp_users_different_blocks:
523 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
524 ; VI-NEXT: s_waitcnt lgkmcnt(0)
525 ; VI-NEXT: s_cmp_lt_i32 s0, 1
526 ; VI-NEXT: s_cbranch_scc1 .LBB9_2
527 ; VI-NEXT: ; %bb.1: ; %bb2
528 ; VI-NEXT: s_cmp_gt_i32 s1, 0
529 ; VI-NEXT: s_cselect_b64 s[0:1], -1, 0
530 ; VI-NEXT: s_and_b64 vcc, exec, s[0:1]
531 ; VI-NEXT: s_cbranch_vccz .LBB9_3
532 ; VI-NEXT: .LBB9_2: ; %bb9
534 ; VI-NEXT: .LBB9_3: ; %bb7
535 ; VI-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
536 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
537 ; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
538 ; VI-NEXT: s_mov_b32 s3, 0xf000
539 ; VI-NEXT: s_mov_b32 s2, -1
540 ; VI-NEXT: s_waitcnt lgkmcnt(0)
541 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
544 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
545 %cmp0 = icmp sgt i32 %cond0, 0
546 br i1 %cmp0, label %bb2, label %bb9
549 %cmp1 = icmp sgt i32 %cond1, 0
550 %tmp2 = sext i1 %cmp1 to i32
551 %tmp3 = add i32 %tmp2, %tmp
552 br i1 %cmp1, label %bb9, label %bb7
555 store i32 %tmp3, ptr addrspace(1) %out
558 bb9: ; preds = %bb8, %bb4
562 define amdgpu_kernel void @uniform_loop(ptr addrspace(1) %out, i32 %a) {
563 ; SI-LABEL: uniform_loop:
564 ; SI: ; %bb.0: ; %entry
565 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
566 ; SI-NEXT: .LBB10_1: ; %loop
567 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1
568 ; SI-NEXT: s_waitcnt lgkmcnt(0)
569 ; SI-NEXT: s_add_i32 s0, s0, -1
570 ; SI-NEXT: s_cmp_lg_u32 s0, 0
571 ; SI-NEXT: s_cbranch_scc1 .LBB10_1
572 ; SI-NEXT: ; %bb.2: ; %done
575 ; VI-LABEL: uniform_loop:
576 ; VI: ; %bb.0: ; %entry
577 ; VI-NEXT: s_load_dword s0, s[4:5], 0x2c
578 ; VI-NEXT: .LBB10_1: ; %loop
579 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1
580 ; VI-NEXT: s_waitcnt lgkmcnt(0)
581 ; VI-NEXT: s_add_i32 s0, s0, -1
582 ; VI-NEXT: s_cmp_lg_u32 s0, 0
583 ; VI-NEXT: s_cbranch_scc1 .LBB10_1
584 ; VI-NEXT: ; %bb.2: ; %done
590 %i = phi i32 [0, %entry], [%i.i, %loop]
592 %cmp = icmp eq i32 %a, %i.i
593 br i1 %cmp, label %done, label %loop
599 ; Test uniform and divergent.
601 define amdgpu_kernel void @uniform_inside_divergent(ptr addrspace(1) %out, i32 %cond) {
602 ; SI-LABEL: uniform_inside_divergent:
603 ; SI: ; %bb.0: ; %entry
604 ; SI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
605 ; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc
606 ; SI-NEXT: s_cbranch_execz .LBB11_2
607 ; SI-NEXT: ; %bb.1: ; %if
608 ; SI-NEXT: s_load_dword s6, s[4:5], 0xb
609 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
610 ; SI-NEXT: s_mov_b32 s3, 0xf000
611 ; SI-NEXT: s_mov_b32 s2, -1
612 ; SI-NEXT: v_mov_b32_e32 v0, 0
613 ; SI-NEXT: s_waitcnt lgkmcnt(0)
614 ; SI-NEXT: s_cmp_lg_u32 s6, 0
615 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
616 ; SI-NEXT: s_cbranch_scc0 .LBB11_3
617 ; SI-NEXT: .LBB11_2: ; %endif
619 ; SI-NEXT: .LBB11_3: ; %if_uniform
620 ; SI-NEXT: s_waitcnt expcnt(0)
621 ; SI-NEXT: v_mov_b32_e32 v0, 1
622 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
625 ; VI-LABEL: uniform_inside_divergent:
626 ; VI: ; %bb.0: ; %entry
627 ; VI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
628 ; VI-NEXT: s_and_saveexec_b64 s[0:1], vcc
629 ; VI-NEXT: s_cbranch_execz .LBB11_2
630 ; VI-NEXT: ; %bb.1: ; %if
631 ; VI-NEXT: s_load_dword s6, s[4:5], 0x2c
632 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
633 ; VI-NEXT: s_mov_b32 s3, 0xf000
634 ; VI-NEXT: s_mov_b32 s2, -1
635 ; VI-NEXT: v_mov_b32_e32 v0, 0
636 ; VI-NEXT: s_waitcnt lgkmcnt(0)
637 ; VI-NEXT: s_cmp_lg_u32 s6, 0
638 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
639 ; VI-NEXT: s_cbranch_scc0 .LBB11_3
640 ; VI-NEXT: .LBB11_2: ; %endif
642 ; VI-NEXT: .LBB11_3: ; %if_uniform
643 ; VI-NEXT: v_mov_b32_e32 v0, 1
644 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
647 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
648 %d_cmp = icmp ult i32 %tid, 16
649 br i1 %d_cmp, label %if, label %endif
652 store i32 0, ptr addrspace(1) %out
653 %u_cmp = icmp eq i32 %cond, 0
654 br i1 %u_cmp, label %if_uniform, label %endif
657 store i32 1, ptr addrspace(1) %out
664 define amdgpu_kernel void @divergent_inside_uniform(ptr addrspace(1) %out, i32 %cond) {
665 ; SI-LABEL: divergent_inside_uniform:
666 ; SI: ; %bb.0: ; %entry
667 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
668 ; SI-NEXT: s_waitcnt lgkmcnt(0)
669 ; SI-NEXT: s_cmp_lg_u32 s0, 0
670 ; SI-NEXT: s_cbranch_scc0 .LBB12_2
671 ; SI-NEXT: .LBB12_1: ; %endif
673 ; SI-NEXT: .LBB12_2: ; %if
674 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
675 ; SI-NEXT: s_mov_b32 s3, 0xf000
676 ; SI-NEXT: s_mov_b32 s2, -1
677 ; SI-NEXT: v_mov_b32_e32 v1, 0
678 ; SI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
679 ; SI-NEXT: s_waitcnt lgkmcnt(0)
680 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0
681 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
682 ; SI-NEXT: s_cbranch_execz .LBB12_1
683 ; SI-NEXT: ; %bb.3: ; %if_uniform
684 ; SI-NEXT: v_mov_b32_e32 v0, 1
685 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
688 ; VI-LABEL: divergent_inside_uniform:
689 ; VI: ; %bb.0: ; %entry
690 ; VI-NEXT: s_load_dword s0, s[4:5], 0x2c
691 ; VI-NEXT: s_waitcnt lgkmcnt(0)
692 ; VI-NEXT: s_cmp_lg_u32 s0, 0
693 ; VI-NEXT: s_cbranch_scc0 .LBB12_2
694 ; VI-NEXT: .LBB12_1: ; %endif
696 ; VI-NEXT: .LBB12_2: ; %if
697 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
698 ; VI-NEXT: s_mov_b32 s3, 0xf000
699 ; VI-NEXT: s_mov_b32 s2, -1
700 ; VI-NEXT: v_mov_b32_e32 v1, 0
701 ; VI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
702 ; VI-NEXT: s_waitcnt lgkmcnt(0)
703 ; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0
704 ; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc
705 ; VI-NEXT: s_cbranch_execz .LBB12_1
706 ; VI-NEXT: ; %bb.3: ; %if_uniform
707 ; VI-NEXT: v_mov_b32_e32 v0, 1
708 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
711 %u_cmp = icmp eq i32 %cond, 0
712 br i1 %u_cmp, label %if, label %endif
715 store i32 0, ptr addrspace(1) %out
716 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
717 %d_cmp = icmp ult i32 %tid, 16
718 br i1 %d_cmp, label %if_uniform, label %endif
721 store i32 1, ptr addrspace(1) %out
728 define amdgpu_kernel void @divergent_if_uniform_if(ptr addrspace(1) %out, i32 %cond) {
729 ; SI-LABEL: divergent_if_uniform_if:
730 ; SI: ; %bb.0: ; %entry
731 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
732 ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
733 ; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc
734 ; SI-NEXT: s_cbranch_execz .LBB13_2
735 ; SI-NEXT: ; %bb.1: ; %if
736 ; SI-NEXT: s_mov_b32 s3, 0xf000
737 ; SI-NEXT: s_mov_b32 s2, -1
738 ; SI-NEXT: v_mov_b32_e32 v0, 1
739 ; SI-NEXT: s_waitcnt lgkmcnt(0)
740 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
741 ; SI-NEXT: .LBB13_2: ; %endif
742 ; SI-NEXT: s_or_b64 exec, exec, s[6:7]
743 ; SI-NEXT: s_load_dword s2, s[4:5], 0xb
744 ; SI-NEXT: s_waitcnt lgkmcnt(0)
745 ; SI-NEXT: s_cmp_lg_u32 s2, 0
746 ; SI-NEXT: s_cbranch_scc0 .LBB13_4
747 ; SI-NEXT: ; %bb.3: ; %exit
749 ; SI-NEXT: .LBB13_4: ; %if_uniform
750 ; SI-NEXT: s_mov_b32 s3, 0xf000
751 ; SI-NEXT: s_mov_b32 s2, -1
752 ; SI-NEXT: s_waitcnt expcnt(0)
753 ; SI-NEXT: v_mov_b32_e32 v0, 2
754 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
757 ; VI-LABEL: divergent_if_uniform_if:
758 ; VI: ; %bb.0: ; %entry
759 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
760 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
761 ; VI-NEXT: s_and_saveexec_b64 s[6:7], vcc
762 ; VI-NEXT: s_cbranch_execz .LBB13_2
763 ; VI-NEXT: ; %bb.1: ; %if
764 ; VI-NEXT: s_mov_b32 s3, 0xf000
765 ; VI-NEXT: s_mov_b32 s2, -1
766 ; VI-NEXT: v_mov_b32_e32 v0, 1
767 ; VI-NEXT: s_waitcnt lgkmcnt(0)
768 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
769 ; VI-NEXT: .LBB13_2: ; %endif
770 ; VI-NEXT: s_or_b64 exec, exec, s[6:7]
771 ; VI-NEXT: s_load_dword s2, s[4:5], 0x2c
772 ; VI-NEXT: s_waitcnt lgkmcnt(0)
773 ; VI-NEXT: s_cmp_lg_u32 s2, 0
774 ; VI-NEXT: s_cbranch_scc0 .LBB13_4
775 ; VI-NEXT: ; %bb.3: ; %exit
777 ; VI-NEXT: .LBB13_4: ; %if_uniform
778 ; VI-NEXT: s_mov_b32 s3, 0xf000
779 ; VI-NEXT: s_mov_b32 s2, -1
780 ; VI-NEXT: v_mov_b32_e32 v0, 2
781 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
784 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
785 %d_cmp = icmp eq i32 %tid, 0
786 br i1 %d_cmp, label %if, label %endif
789 store i32 1, ptr addrspace(1) %out
793 %u_cmp = icmp eq i32 %cond, 0
794 br i1 %u_cmp, label %if_uniform, label %exit
797 store i32 2, ptr addrspace(1) %out
804 ; The condition of the branches in the two blocks are
805 ; uniform. MachineCSE replaces the 2nd condition with the inverse of
806 ; the first, leaving an scc use in a different block than it was
809 define amdgpu_kernel void @cse_uniform_condition_different_blocks(i32 %cond, ptr addrspace(1) %out) {
810 ; SI-LABEL: cse_uniform_condition_different_blocks:
812 ; SI-NEXT: s_load_dword s0, s[4:5], 0x9
813 ; SI-NEXT: s_waitcnt lgkmcnt(0)
814 ; SI-NEXT: s_cmp_lt_i32 s0, 1
815 ; SI-NEXT: s_cbranch_scc1 .LBB14_2
816 ; SI-NEXT: ; %bb.1: ; %bb2
817 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
818 ; SI-NEXT: s_mov_b32 s3, 0xf000
819 ; SI-NEXT: s_mov_b32 s2, -1
820 ; SI-NEXT: v_mov_b32_e32 v1, 0
821 ; SI-NEXT: s_waitcnt lgkmcnt(0)
822 ; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
823 ; SI-NEXT: s_waitcnt vmcnt(0)
824 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0
825 ; SI-NEXT: s_waitcnt vmcnt(0)
826 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
827 ; SI-NEXT: .LBB14_2: ; %bb9
830 ; VI-LABEL: cse_uniform_condition_different_blocks:
832 ; VI-NEXT: s_load_dword s0, s[4:5], 0x24
833 ; VI-NEXT: s_waitcnt lgkmcnt(0)
834 ; VI-NEXT: s_cmp_lt_i32 s0, 1
835 ; VI-NEXT: s_cbranch_scc1 .LBB14_2
836 ; VI-NEXT: ; %bb.1: ; %bb2
837 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
838 ; VI-NEXT: s_mov_b32 s3, 0xf000
839 ; VI-NEXT: s_mov_b32 s2, -1
840 ; VI-NEXT: v_mov_b32_e32 v1, 0
841 ; VI-NEXT: s_waitcnt lgkmcnt(0)
842 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
843 ; VI-NEXT: s_waitcnt vmcnt(0)
844 ; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0
845 ; VI-NEXT: s_waitcnt vmcnt(0)
846 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
847 ; VI-NEXT: .LBB14_2: ; %bb9
850 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
851 %tmp1 = icmp sgt i32 %cond, 0
852 br i1 %tmp1, label %bb2, label %bb9
855 %tmp3 = load volatile i32, ptr addrspace(1) undef
856 store volatile i32 0, ptr addrspace(1) undef
857 %tmp9 = icmp sle i32 %cond, 0
858 br i1 %tmp9, label %bb9, label %bb7
861 store i32 %tmp3, ptr addrspace(1) %out
864 bb9: ; preds = %bb8, %bb4
868 ; Fall-through to the else
869 define amdgpu_kernel void @uniform_if_scc_i64_eq(i64 %cond, ptr addrspace(1) %out) {
870 ; SI-LABEL: uniform_if_scc_i64_eq:
871 ; SI: ; %bb.0: ; %entry
872 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
873 ; SI-NEXT: s_waitcnt lgkmcnt(0)
874 ; SI-NEXT: v_cmp_eq_u64_e64 s[4:5], s[0:1], 0
875 ; SI-NEXT: s_mov_b32 s0, 0
876 ; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
877 ; SI-NEXT: s_cbranch_vccnz .LBB15_2
878 ; SI-NEXT: ; %bb.1: ; %else
879 ; SI-NEXT: s_mov_b32 s0, 1
880 ; SI-NEXT: .LBB15_2: ; %done
881 ; SI-NEXT: s_mov_b32 s7, 0xf000
882 ; SI-NEXT: s_mov_b32 s6, -1
883 ; SI-NEXT: s_mov_b32 s4, s2
884 ; SI-NEXT: s_mov_b32 s5, s3
885 ; SI-NEXT: v_mov_b32_e32 v0, s0
886 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
889 ; VI-LABEL: uniform_if_scc_i64_eq:
890 ; VI: ; %bb.0: ; %entry
891 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
892 ; VI-NEXT: s_waitcnt lgkmcnt(0)
893 ; VI-NEXT: s_cmp_eq_u64 s[0:1], 0
894 ; VI-NEXT: s_mov_b32 s0, 0
895 ; VI-NEXT: s_cbranch_scc1 .LBB15_2
896 ; VI-NEXT: ; %bb.1: ; %else
897 ; VI-NEXT: s_mov_b32 s0, 1
898 ; VI-NEXT: .LBB15_2: ; %done
899 ; VI-NEXT: s_mov_b32 s7, 0xf000
900 ; VI-NEXT: s_mov_b32 s6, -1
901 ; VI-NEXT: s_mov_b32 s4, s2
902 ; VI-NEXT: s_mov_b32 s5, s3
903 ; VI-NEXT: v_mov_b32_e32 v0, s0
904 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
907 %cmp0 = icmp eq i64 %cond, 0
908 br i1 %cmp0, label %if, label %else
917 %value = phi i32 [0, %if], [1, %else]
918 store i32 %value, ptr addrspace(1) %out
922 ; Fall-through to the else
923 define amdgpu_kernel void @uniform_if_scc_i64_ne(i64 %cond, ptr addrspace(1) %out) {
924 ; SI-LABEL: uniform_if_scc_i64_ne:
925 ; SI: ; %bb.0: ; %entry
926 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
927 ; SI-NEXT: s_waitcnt lgkmcnt(0)
928 ; SI-NEXT: v_cmp_ne_u64_e64 s[4:5], s[0:1], 0
929 ; SI-NEXT: s_mov_b32 s0, 0
930 ; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
931 ; SI-NEXT: s_cbranch_vccnz .LBB16_2
932 ; SI-NEXT: ; %bb.1: ; %else
933 ; SI-NEXT: s_mov_b32 s0, 1
934 ; SI-NEXT: .LBB16_2: ; %done
935 ; SI-NEXT: s_mov_b32 s7, 0xf000
936 ; SI-NEXT: s_mov_b32 s6, -1
937 ; SI-NEXT: s_mov_b32 s4, s2
938 ; SI-NEXT: s_mov_b32 s5, s3
939 ; SI-NEXT: v_mov_b32_e32 v0, s0
940 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
943 ; VI-LABEL: uniform_if_scc_i64_ne:
944 ; VI: ; %bb.0: ; %entry
945 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
946 ; VI-NEXT: s_waitcnt lgkmcnt(0)
947 ; VI-NEXT: s_cmp_lg_u64 s[0:1], 0
948 ; VI-NEXT: s_mov_b32 s0, 0
949 ; VI-NEXT: s_cbranch_scc1 .LBB16_2
950 ; VI-NEXT: ; %bb.1: ; %else
951 ; VI-NEXT: s_mov_b32 s0, 1
952 ; VI-NEXT: .LBB16_2: ; %done
953 ; VI-NEXT: s_mov_b32 s7, 0xf000
954 ; VI-NEXT: s_mov_b32 s6, -1
955 ; VI-NEXT: s_mov_b32 s4, s2
956 ; VI-NEXT: s_mov_b32 s5, s3
957 ; VI-NEXT: v_mov_b32_e32 v0, s0
958 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
961 %cmp0 = icmp ne i64 %cond, 0
962 br i1 %cmp0, label %if, label %else
971 %value = phi i32 [0, %if], [1, %else]
972 store i32 %value, ptr addrspace(1) %out
976 ; Fall-through to the else
977 define amdgpu_kernel void @uniform_if_scc_i64_sgt(i64 %cond, ptr addrspace(1) %out) {
978 ; SI-LABEL: uniform_if_scc_i64_sgt:
979 ; SI: ; %bb.0: ; %entry
980 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
981 ; SI-NEXT: s_waitcnt lgkmcnt(0)
982 ; SI-NEXT: v_cmp_gt_i64_e64 s[4:5], s[0:1], 0
983 ; SI-NEXT: s_mov_b32 s0, 0
984 ; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
985 ; SI-NEXT: s_cbranch_vccnz .LBB17_2
986 ; SI-NEXT: ; %bb.1: ; %else
987 ; SI-NEXT: s_mov_b32 s0, 1
988 ; SI-NEXT: .LBB17_2: ; %done
989 ; SI-NEXT: s_mov_b32 s7, 0xf000
990 ; SI-NEXT: s_mov_b32 s6, -1
991 ; SI-NEXT: s_mov_b32 s4, s2
992 ; SI-NEXT: s_mov_b32 s5, s3
993 ; SI-NEXT: v_mov_b32_e32 v0, s0
994 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
997 ; VI-LABEL: uniform_if_scc_i64_sgt:
998 ; VI: ; %bb.0: ; %entry
999 ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
1000 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1001 ; VI-NEXT: v_cmp_gt_i64_e64 s[4:5], s[0:1], 0
1002 ; VI-NEXT: s_mov_b32 s0, 0
1003 ; VI-NEXT: s_and_b64 vcc, exec, s[4:5]
1004 ; VI-NEXT: s_cbranch_vccnz .LBB17_2
1005 ; VI-NEXT: ; %bb.1: ; %else
1006 ; VI-NEXT: s_mov_b32 s0, 1
1007 ; VI-NEXT: .LBB17_2: ; %done
1008 ; VI-NEXT: s_mov_b32 s7, 0xf000
1009 ; VI-NEXT: s_mov_b32 s6, -1
1010 ; VI-NEXT: s_mov_b32 s4, s2
1011 ; VI-NEXT: s_mov_b32 s5, s3
1012 ; VI-NEXT: v_mov_b32_e32 v0, s0
1013 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1016 %cmp0 = icmp sgt i64 %cond, 0
1017 br i1 %cmp0, label %if, label %else
1026 %value = phi i32 [0, %if], [1, %else]
1027 store i32 %value, ptr addrspace(1) %out
1031 define amdgpu_kernel void @move_to_valu_i64_eq(ptr addrspace(1) %out) {
1032 ; SI-LABEL: move_to_valu_i64_eq:
1034 ; SI-NEXT: s_mov_b32 m0, -1
1035 ; SI-NEXT: ds_read_b64 v[0:1], v0
1036 ; SI-NEXT: s_mov_b32 s0, 0
1037 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1038 ; SI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1039 ; SI-NEXT: s_cbranch_vccnz .LBB18_2
1040 ; SI-NEXT: ; %bb.1: ; %else
1041 ; SI-NEXT: s_mov_b32 s0, 1
1042 ; SI-NEXT: .LBB18_2: ; %done
1043 ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
1044 ; SI-NEXT: s_mov_b32 s7, 0xf000
1045 ; SI-NEXT: s_mov_b32 s6, -1
1046 ; SI-NEXT: v_mov_b32_e32 v0, s0
1047 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1048 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1051 ; VI-LABEL: move_to_valu_i64_eq:
1053 ; VI-NEXT: s_mov_b32 m0, -1
1054 ; VI-NEXT: ds_read_b64 v[0:1], v0
1055 ; VI-NEXT: s_mov_b32 s0, 0
1056 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1057 ; VI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1058 ; VI-NEXT: s_cbranch_vccnz .LBB18_2
1059 ; VI-NEXT: ; %bb.1: ; %else
1060 ; VI-NEXT: s_mov_b32 s0, 1
1061 ; VI-NEXT: .LBB18_2: ; %done
1062 ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
1063 ; VI-NEXT: s_mov_b32 s7, 0xf000
1064 ; VI-NEXT: s_mov_b32 s6, -1
1065 ; VI-NEXT: v_mov_b32_e32 v0, s0
1066 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1067 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1069 %cond = load volatile i64, ptr addrspace(3) undef
1070 %cmp0 = icmp eq i64 %cond, 0
1071 br i1 %cmp0, label %if, label %else
1080 %value = phi i32 [0, %if], [1, %else]
1081 store i32 %value, ptr addrspace(1) %out
1085 define amdgpu_kernel void @move_to_valu_i64_ne(ptr addrspace(1) %out) {
1086 ; SI-LABEL: move_to_valu_i64_ne:
1088 ; SI-NEXT: s_mov_b32 m0, -1
1089 ; SI-NEXT: ds_read_b64 v[0:1], v0
1090 ; SI-NEXT: s_mov_b32 s0, 0
1091 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1092 ; SI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
1093 ; SI-NEXT: s_cbranch_vccnz .LBB19_2
1094 ; SI-NEXT: ; %bb.1: ; %else
1095 ; SI-NEXT: s_mov_b32 s0, 1
1096 ; SI-NEXT: .LBB19_2: ; %done
1097 ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
1098 ; SI-NEXT: s_mov_b32 s7, 0xf000
1099 ; SI-NEXT: s_mov_b32 s6, -1
1100 ; SI-NEXT: v_mov_b32_e32 v0, s0
1101 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1102 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1105 ; VI-LABEL: move_to_valu_i64_ne:
1107 ; VI-NEXT: s_mov_b32 m0, -1
1108 ; VI-NEXT: ds_read_b64 v[0:1], v0
1109 ; VI-NEXT: s_mov_b32 s0, 0
1110 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1111 ; VI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
1112 ; VI-NEXT: s_cbranch_vccnz .LBB19_2
1113 ; VI-NEXT: ; %bb.1: ; %else
1114 ; VI-NEXT: s_mov_b32 s0, 1
1115 ; VI-NEXT: .LBB19_2: ; %done
1116 ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
1117 ; VI-NEXT: s_mov_b32 s7, 0xf000
1118 ; VI-NEXT: s_mov_b32 s6, -1
1119 ; VI-NEXT: v_mov_b32_e32 v0, s0
1120 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1121 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1123 %cond = load volatile i64, ptr addrspace(3) undef
1124 %cmp0 = icmp ne i64 %cond, 0
1125 br i1 %cmp0, label %if, label %else
1134 %value = phi i32 [0, %if], [1, %else]
1135 store i32 %value, ptr addrspace(1) %out
1139 define void @move_to_valu_vgpr_operand_phi(ptr addrspace(3) %out) {
1140 ; SI-LABEL: move_to_valu_vgpr_operand_phi:
1141 ; SI: ; %bb.0: ; %bb0
1142 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1143 ; SI-NEXT: v_add_i32_e32 v0, vcc, 28, v0
1144 ; SI-NEXT: v_mov_b32_e32 v1, 1
1145 ; SI-NEXT: s_and_b64 vcc, exec, 0
1146 ; SI-NEXT: s_mov_b32 m0, -1
1147 ; SI-NEXT: s_branch .LBB20_2
1148 ; SI-NEXT: .LBB20_1: ; %bb3
1149 ; SI-NEXT: ; in Loop: Header=BB20_2 Depth=1
1150 ; SI-NEXT: v_add_i32_e64 v0, s[4:5], 8, v0
1151 ; SI-NEXT: .LBB20_2: ; %bb1
1152 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1
1153 ; SI-NEXT: s_cbranch_scc1 .LBB20_1
1154 ; SI-NEXT: ; %bb.3: ; %bb2
1155 ; SI-NEXT: ; in Loop: Header=BB20_2 Depth=1
1156 ; SI-NEXT: ds_write_b32 v0, v1
1157 ; SI-NEXT: s_mov_b64 vcc, vcc
1158 ; SI-NEXT: s_cbranch_vccz .LBB20_1
1159 ; SI-NEXT: ; %bb.4: ; %DummyReturnBlock
1160 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1161 ; SI-NEXT: s_setpc_b64 s[30:31]
1163 ; VI-LABEL: move_to_valu_vgpr_operand_phi:
1164 ; VI: ; %bb.0: ; %bb0
1165 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1166 ; VI-NEXT: v_add_u32_e32 v0, vcc, 28, v0
1167 ; VI-NEXT: v_mov_b32_e32 v1, 1
1168 ; VI-NEXT: s_and_b64 vcc, exec, 0
1169 ; VI-NEXT: s_mov_b32 m0, -1
1170 ; VI-NEXT: s_branch .LBB20_2
1171 ; VI-NEXT: .LBB20_1: ; %bb3
1172 ; VI-NEXT: ; in Loop: Header=BB20_2 Depth=1
1173 ; VI-NEXT: v_add_u32_e64 v0, s[4:5], 8, v0
1174 ; VI-NEXT: .LBB20_2: ; %bb1
1175 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1
1176 ; VI-NEXT: s_cbranch_scc1 .LBB20_1
1177 ; VI-NEXT: ; %bb.3: ; %bb2
1178 ; VI-NEXT: ; in Loop: Header=BB20_2 Depth=1
1179 ; VI-NEXT: ds_write_b32 v0, v1
1180 ; VI-NEXT: s_mov_b64 vcc, vcc
1181 ; VI-NEXT: s_cbranch_vccz .LBB20_1
1182 ; VI-NEXT: ; %bb.4: ; %DummyReturnBlock
1183 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1184 ; VI-NEXT: s_setpc_b64 s[30:31]
1188 bb1: ; preds = %bb3, %bb0
1189 %tmp0 = phi i32 [ 8, %bb0 ], [ %tmp4, %bb3 ]
1190 %tmp1 = add nsw i32 %tmp0, -1
1191 %tmp2 = getelementptr inbounds i32, ptr addrspace(3) %out, i32 %tmp1
1192 br i1 undef, label %bb2, label %bb3
1195 store volatile i32 1, ptr addrspace(3) %tmp2, align 4
1198 bb3: ; preds = %bb2, %bb1
1199 %tmp4 = add nsw i32 %tmp0, 2
1203 declare i32 @llvm.amdgcn.workitem.id.x() #0
1205 attributes #0 = { nounwind readnone }
1206 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: