1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -structurizecfg-skip-uniform-regions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -structurizecfg-skip-uniform-regions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
5 define amdgpu_kernel void @uniform_if_scc(i32 %cond, ptr addrspace(1) %out) {
6 ; SI-LABEL: uniform_if_scc:
7 ; SI: ; %bb.0: ; %entry
8 ; SI-NEXT: s_load_dword s2, s[0:1], 0x9
9 ; SI-NEXT: s_waitcnt lgkmcnt(0)
10 ; SI-NEXT: s_cmp_eq_u32 s2, 0
11 ; SI-NEXT: s_mov_b32 s2, 0
12 ; SI-NEXT: s_cbranch_scc1 .LBB0_2
13 ; SI-NEXT: ; %bb.1: ; %else
14 ; SI-NEXT: s_mov_b32 s2, 1
15 ; SI-NEXT: .LBB0_2: ; %done
16 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
17 ; SI-NEXT: s_mov_b32 s7, 0xf000
18 ; SI-NEXT: s_mov_b32 s6, -1
19 ; SI-NEXT: v_mov_b32_e32 v0, s2
20 ; SI-NEXT: s_waitcnt lgkmcnt(0)
21 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
24 ; VI-LABEL: uniform_if_scc:
25 ; VI: ; %bb.0: ; %entry
26 ; VI-NEXT: s_load_dword s2, s[0:1], 0x24
27 ; VI-NEXT: s_waitcnt lgkmcnt(0)
28 ; VI-NEXT: s_cmp_eq_u32 s2, 0
29 ; VI-NEXT: s_mov_b32 s2, 0
30 ; VI-NEXT: s_cbranch_scc1 .LBB0_2
31 ; VI-NEXT: ; %bb.1: ; %else
32 ; VI-NEXT: s_mov_b32 s2, 1
33 ; VI-NEXT: .LBB0_2: ; %done
34 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
35 ; VI-NEXT: s_mov_b32 s7, 0xf000
36 ; VI-NEXT: s_mov_b32 s6, -1
37 ; VI-NEXT: v_mov_b32_e32 v0, s2
38 ; VI-NEXT: s_waitcnt lgkmcnt(0)
39 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
42 %cmp0 = icmp eq i32 %cond, 0
43 br i1 %cmp0, label %if, label %else
52 %value = phi i32 [0, %if], [1, %else]
53 store i32 %value, ptr addrspace(1) %out
57 define amdgpu_kernel void @uniform_if_vcc(float %cond, ptr addrspace(1) %out) {
58 ; SI-LABEL: uniform_if_vcc:
59 ; SI: ; %bb.0: ; %entry
60 ; SI-NEXT: s_load_dword s3, s[0:1], 0x9
61 ; SI-NEXT: s_mov_b32 s2, 0
62 ; SI-NEXT: s_waitcnt lgkmcnt(0)
63 ; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], s3, 0
64 ; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
65 ; SI-NEXT: s_cbranch_vccnz .LBB1_2
66 ; SI-NEXT: ; %bb.1: ; %else
67 ; SI-NEXT: s_mov_b32 s2, 1
68 ; SI-NEXT: .LBB1_2: ; %done
69 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
70 ; SI-NEXT: s_mov_b32 s7, 0xf000
71 ; SI-NEXT: s_mov_b32 s6, -1
72 ; SI-NEXT: v_mov_b32_e32 v0, s2
73 ; SI-NEXT: s_waitcnt lgkmcnt(0)
74 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
77 ; VI-LABEL: uniform_if_vcc:
78 ; VI: ; %bb.0: ; %entry
79 ; VI-NEXT: s_load_dword s3, s[0:1], 0x24
80 ; VI-NEXT: s_mov_b32 s2, 0
81 ; VI-NEXT: s_waitcnt lgkmcnt(0)
82 ; VI-NEXT: v_cmp_eq_f32_e64 s[4:5], s3, 0
83 ; VI-NEXT: s_and_b64 vcc, exec, s[4:5]
84 ; VI-NEXT: s_cbranch_vccnz .LBB1_2
85 ; VI-NEXT: ; %bb.1: ; %else
86 ; VI-NEXT: s_mov_b32 s2, 1
87 ; VI-NEXT: .LBB1_2: ; %done
88 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
89 ; VI-NEXT: s_mov_b32 s7, 0xf000
90 ; VI-NEXT: s_mov_b32 s6, -1
91 ; VI-NEXT: v_mov_b32_e32 v0, s2
92 ; VI-NEXT: s_waitcnt lgkmcnt(0)
93 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
96 %cmp0 = fcmp oeq float %cond, 0.0
97 br i1 %cmp0, label %if, label %else
106 %value = phi i32 [0, %if], [1, %else]
107 store i32 %value, ptr addrspace(1) %out
111 define amdgpu_kernel void @uniform_if_swap_br_targets_scc(i32 %cond, ptr addrspace(1) %out) {
112 ; SI-LABEL: uniform_if_swap_br_targets_scc:
113 ; SI: ; %bb.0: ; %entry
114 ; SI-NEXT: s_load_dword s2, s[0:1], 0x9
115 ; SI-NEXT: s_waitcnt lgkmcnt(0)
116 ; SI-NEXT: s_cmp_lg_u32 s2, 0
117 ; SI-NEXT: s_mov_b32 s2, 0
118 ; SI-NEXT: s_cbranch_scc1 .LBB2_2
119 ; SI-NEXT: ; %bb.1: ; %else
120 ; SI-NEXT: s_mov_b32 s2, 1
121 ; SI-NEXT: .LBB2_2: ; %done
122 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
123 ; SI-NEXT: s_mov_b32 s7, 0xf000
124 ; SI-NEXT: s_mov_b32 s6, -1
125 ; SI-NEXT: v_mov_b32_e32 v0, s2
126 ; SI-NEXT: s_waitcnt lgkmcnt(0)
127 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
130 ; VI-LABEL: uniform_if_swap_br_targets_scc:
131 ; VI: ; %bb.0: ; %entry
132 ; VI-NEXT: s_load_dword s2, s[0:1], 0x24
133 ; VI-NEXT: s_waitcnt lgkmcnt(0)
134 ; VI-NEXT: s_cmp_lg_u32 s2, 0
135 ; VI-NEXT: s_mov_b32 s2, 0
136 ; VI-NEXT: s_cbranch_scc1 .LBB2_2
137 ; VI-NEXT: ; %bb.1: ; %else
138 ; VI-NEXT: s_mov_b32 s2, 1
139 ; VI-NEXT: .LBB2_2: ; %done
140 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
141 ; VI-NEXT: s_mov_b32 s7, 0xf000
142 ; VI-NEXT: s_mov_b32 s6, -1
143 ; VI-NEXT: v_mov_b32_e32 v0, s2
144 ; VI-NEXT: s_waitcnt lgkmcnt(0)
145 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
148 %cmp0 = icmp eq i32 %cond, 0
149 br i1 %cmp0, label %else, label %if
158 %value = phi i32 [0, %if], [1, %else]
159 store i32 %value, ptr addrspace(1) %out
163 define amdgpu_kernel void @uniform_if_swap_br_targets_vcc(float %cond, ptr addrspace(1) %out) {
164 ; SI-LABEL: uniform_if_swap_br_targets_vcc:
165 ; SI: ; %bb.0: ; %entry
166 ; SI-NEXT: s_load_dword s3, s[0:1], 0x9
167 ; SI-NEXT: s_mov_b32 s2, 0
168 ; SI-NEXT: s_waitcnt lgkmcnt(0)
169 ; SI-NEXT: v_cmp_neq_f32_e64 s[4:5], s3, 0
170 ; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
171 ; SI-NEXT: s_cbranch_vccnz .LBB3_2
172 ; SI-NEXT: ; %bb.1: ; %else
173 ; SI-NEXT: s_mov_b32 s2, 1
174 ; SI-NEXT: .LBB3_2: ; %done
175 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
176 ; SI-NEXT: s_mov_b32 s7, 0xf000
177 ; SI-NEXT: s_mov_b32 s6, -1
178 ; SI-NEXT: v_mov_b32_e32 v0, s2
179 ; SI-NEXT: s_waitcnt lgkmcnt(0)
180 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
183 ; VI-LABEL: uniform_if_swap_br_targets_vcc:
184 ; VI: ; %bb.0: ; %entry
185 ; VI-NEXT: s_load_dword s3, s[0:1], 0x24
186 ; VI-NEXT: s_mov_b32 s2, 0
187 ; VI-NEXT: s_waitcnt lgkmcnt(0)
188 ; VI-NEXT: v_cmp_neq_f32_e64 s[4:5], s3, 0
189 ; VI-NEXT: s_and_b64 vcc, exec, s[4:5]
190 ; VI-NEXT: s_cbranch_vccnz .LBB3_2
191 ; VI-NEXT: ; %bb.1: ; %else
192 ; VI-NEXT: s_mov_b32 s2, 1
193 ; VI-NEXT: .LBB3_2: ; %done
194 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
195 ; VI-NEXT: s_mov_b32 s7, 0xf000
196 ; VI-NEXT: s_mov_b32 s6, -1
197 ; VI-NEXT: v_mov_b32_e32 v0, s2
198 ; VI-NEXT: s_waitcnt lgkmcnt(0)
199 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
202 %cmp0 = fcmp oeq float %cond, 0.0
203 br i1 %cmp0, label %else, label %if
212 %value = phi i32 [0, %if], [1, %else]
213 store i32 %value, ptr addrspace(1) %out
217 ; Using a floating-point value in an integer compare will cause the compare to
218 ; be selected for the SALU and then later moved to the VALU.
219 define amdgpu_kernel void @uniform_if_move_valu(ptr addrspace(1) %out, float %a) {
220 ; SI-LABEL: uniform_if_move_valu:
221 ; SI: ; %bb.0: ; %entry
222 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
223 ; SI-NEXT: v_mov_b32_e32 v0, 0x41200000
224 ; SI-NEXT: s_waitcnt lgkmcnt(0)
225 ; SI-NEXT: v_add_f32_e32 v0, s2, v0
226 ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 5, v0
227 ; SI-NEXT: s_cbranch_vccnz .LBB4_2
228 ; SI-NEXT: ; %bb.1: ; %if
229 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
230 ; SI-NEXT: s_mov_b32 s3, 0xf000
231 ; SI-NEXT: s_mov_b32 s2, -1
232 ; SI-NEXT: v_mov_b32_e32 v0, 0
233 ; SI-NEXT: s_waitcnt lgkmcnt(0)
234 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
235 ; SI-NEXT: .LBB4_2: ; %endif
238 ; VI-LABEL: uniform_if_move_valu:
239 ; VI: ; %bb.0: ; %entry
240 ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
241 ; VI-NEXT: v_mov_b32_e32 v0, 0x41200000
242 ; VI-NEXT: s_waitcnt lgkmcnt(0)
243 ; VI-NEXT: v_add_f32_e32 v0, s2, v0
244 ; VI-NEXT: v_cmp_ne_u32_e32 vcc, 5, v0
245 ; VI-NEXT: s_cbranch_vccnz .LBB4_2
246 ; VI-NEXT: ; %bb.1: ; %if
247 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
248 ; VI-NEXT: s_mov_b32 s3, 0xf000
249 ; VI-NEXT: s_mov_b32 s2, -1
250 ; VI-NEXT: v_mov_b32_e32 v0, 0
251 ; VI-NEXT: s_waitcnt lgkmcnt(0)
252 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
253 ; VI-NEXT: .LBB4_2: ; %endif
256 %a.0 = fadd float %a, 10.0
257 %cond = bitcast float %a.0 to i32
258 %cmp = icmp eq i32 %cond, 5
259 br i1 %cmp, label %if, label %endif
262 store i32 0, ptr addrspace(1) %out
269 ; Using a floating-point value in an integer compare will cause the compare to
270 ; be selected for the SALU and then later moved to the VALU.
271 define amdgpu_kernel void @uniform_if_move_valu_commute(ptr addrspace(1) %out, float %a) {
272 ; SI-LABEL: uniform_if_move_valu_commute:
273 ; SI: ; %bb.0: ; %entry
274 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
275 ; SI-NEXT: v_mov_b32_e32 v0, 0x41200000
276 ; SI-NEXT: s_waitcnt lgkmcnt(0)
277 ; SI-NEXT: v_add_f32_e32 v0, s2, v0
278 ; SI-NEXT: v_cmp_gt_u32_e32 vcc, 6, v0
279 ; SI-NEXT: s_cbranch_vccnz .LBB5_2
280 ; SI-NEXT: ; %bb.1: ; %if
281 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
282 ; SI-NEXT: s_mov_b32 s3, 0xf000
283 ; SI-NEXT: s_mov_b32 s2, -1
284 ; SI-NEXT: v_mov_b32_e32 v0, 0
285 ; SI-NEXT: s_waitcnt lgkmcnt(0)
286 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
287 ; SI-NEXT: .LBB5_2: ; %endif
290 ; VI-LABEL: uniform_if_move_valu_commute:
291 ; VI: ; %bb.0: ; %entry
292 ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
293 ; VI-NEXT: v_mov_b32_e32 v0, 0x41200000
294 ; VI-NEXT: s_waitcnt lgkmcnt(0)
295 ; VI-NEXT: v_add_f32_e32 v0, s2, v0
296 ; VI-NEXT: v_cmp_gt_u32_e32 vcc, 6, v0
297 ; VI-NEXT: s_cbranch_vccnz .LBB5_2
298 ; VI-NEXT: ; %bb.1: ; %if
299 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
300 ; VI-NEXT: s_mov_b32 s3, 0xf000
301 ; VI-NEXT: s_mov_b32 s2, -1
302 ; VI-NEXT: v_mov_b32_e32 v0, 0
303 ; VI-NEXT: s_waitcnt lgkmcnt(0)
304 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
305 ; VI-NEXT: .LBB5_2: ; %endif
308 %a.0 = fadd float %a, 10.0
309 %cond = bitcast float %a.0 to i32
310 %cmp = icmp ugt i32 %cond, 5
311 br i1 %cmp, label %if, label %endif
314 store i32 0, ptr addrspace(1) %out
322 define amdgpu_kernel void @uniform_if_else_ret(ptr addrspace(1) nocapture %out, i32 %a) {
323 ; SI-LABEL: uniform_if_else_ret:
324 ; SI: ; %bb.0: ; %entry
325 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
326 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
327 ; SI-NEXT: s_mov_b32 s3, 0xf000
328 ; SI-NEXT: s_waitcnt lgkmcnt(0)
329 ; SI-NEXT: s_cmp_lg_u32 s2, 0
330 ; SI-NEXT: s_cbranch_scc0 .LBB6_2
331 ; SI-NEXT: ; %bb.1: ; %if.else
332 ; SI-NEXT: s_mov_b32 s2, -1
333 ; SI-NEXT: v_mov_b32_e32 v0, 2
334 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
336 ; SI-NEXT: .LBB6_2: ; %if.then
337 ; SI-NEXT: s_mov_b32 s2, -1
338 ; SI-NEXT: v_mov_b32_e32 v0, 1
339 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
342 ; VI-LABEL: uniform_if_else_ret:
343 ; VI: ; %bb.0: ; %entry
344 ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
345 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
346 ; VI-NEXT: s_mov_b32 s3, 0xf000
347 ; VI-NEXT: s_waitcnt lgkmcnt(0)
348 ; VI-NEXT: s_cmp_lg_u32 s2, 0
349 ; VI-NEXT: s_cbranch_scc0 .LBB6_2
350 ; VI-NEXT: ; %bb.1: ; %if.else
351 ; VI-NEXT: s_mov_b32 s2, -1
352 ; VI-NEXT: v_mov_b32_e32 v0, 2
353 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
355 ; VI-NEXT: .LBB6_2: ; %if.then
356 ; VI-NEXT: s_mov_b32 s2, -1
357 ; VI-NEXT: v_mov_b32_e32 v0, 1
358 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
361 %cmp = icmp eq i32 %a, 0
362 br i1 %cmp, label %if.then, label %if.else
364 if.then: ; preds = %entry
365 store i32 1, ptr addrspace(1) %out
368 if.else: ; preds = %entry
369 store i32 2, ptr addrspace(1) %out
372 if.end: ; preds = %if.else, %if.then
376 define amdgpu_kernel void @uniform_if_else(ptr addrspace(1) nocapture %out0, ptr addrspace(1) nocapture %out1, i32 %a) {
377 ; SI-LABEL: uniform_if_else:
378 ; SI: ; %bb.0: ; %entry
379 ; SI-NEXT: s_load_dword s4, s[0:1], 0xd
380 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
381 ; SI-NEXT: s_mov_b32 s7, 0xf000
382 ; SI-NEXT: s_mov_b32 s6, -1
383 ; SI-NEXT: s_waitcnt lgkmcnt(0)
384 ; SI-NEXT: s_cmp_lg_u32 s4, 0
385 ; SI-NEXT: s_cbranch_scc0 .LBB7_2
386 ; SI-NEXT: ; %bb.1: ; %if.else
387 ; SI-NEXT: s_mov_b32 s4, s0
388 ; SI-NEXT: s_mov_b32 s5, s1
389 ; SI-NEXT: v_mov_b32_e32 v0, 2
390 ; SI-NEXT: s_branch .LBB7_3
391 ; SI-NEXT: .LBB7_2: ; %if.then
392 ; SI-NEXT: s_mov_b32 s4, s0
393 ; SI-NEXT: s_mov_b32 s5, s1
394 ; SI-NEXT: v_mov_b32_e32 v0, 1
395 ; SI-NEXT: .LBB7_3: ; %if.end
396 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
397 ; SI-NEXT: s_mov_b32 s4, s2
398 ; SI-NEXT: s_mov_b32 s5, s3
399 ; SI-NEXT: s_waitcnt expcnt(0)
400 ; SI-NEXT: v_mov_b32_e32 v0, 3
401 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
404 ; VI-LABEL: uniform_if_else:
405 ; VI: ; %bb.0: ; %entry
406 ; VI-NEXT: s_load_dword s4, s[0:1], 0x34
407 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
408 ; VI-NEXT: s_mov_b32 s7, 0xf000
409 ; VI-NEXT: s_mov_b32 s6, -1
410 ; VI-NEXT: s_waitcnt lgkmcnt(0)
411 ; VI-NEXT: s_cmp_lg_u32 s4, 0
412 ; VI-NEXT: s_cbranch_scc0 .LBB7_2
413 ; VI-NEXT: ; %bb.1: ; %if.else
414 ; VI-NEXT: s_mov_b32 s4, s0
415 ; VI-NEXT: s_mov_b32 s5, s1
416 ; VI-NEXT: v_mov_b32_e32 v0, 2
417 ; VI-NEXT: s_branch .LBB7_3
418 ; VI-NEXT: .LBB7_2: ; %if.then
419 ; VI-NEXT: s_mov_b32 s4, s0
420 ; VI-NEXT: s_mov_b32 s5, s1
421 ; VI-NEXT: v_mov_b32_e32 v0, 1
422 ; VI-NEXT: .LBB7_3: ; %if.end
423 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
424 ; VI-NEXT: s_mov_b32 s4, s2
425 ; VI-NEXT: s_mov_b32 s5, s3
426 ; VI-NEXT: v_mov_b32_e32 v0, 3
427 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
430 %cmp = icmp eq i32 %a, 0
431 br i1 %cmp, label %if.then, label %if.else
433 if.then: ; preds = %entry
434 store i32 1, ptr addrspace(1) %out0
437 if.else: ; preds = %entry
438 store i32 2, ptr addrspace(1) %out0
441 if.end: ; preds = %if.else, %if.then
442 store i32 3, ptr addrspace(1) %out1
446 define amdgpu_kernel void @icmp_2_users(ptr addrspace(1) %out, i32 %cond) {
447 ; SI-LABEL: icmp_2_users:
448 ; SI: ; %bb.0: ; %main_body
449 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb
450 ; SI-NEXT: s_waitcnt lgkmcnt(0)
451 ; SI-NEXT: s_cmp_gt_i32 s4, 0
452 ; SI-NEXT: s_cselect_b64 s[2:3], -1, 0
453 ; SI-NEXT: s_cmp_lt_i32 s4, 1
454 ; SI-NEXT: s_cbranch_scc1 .LBB8_2
455 ; SI-NEXT: ; %bb.1: ; %IF
456 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
457 ; SI-NEXT: s_mov_b32 s7, 0xf000
458 ; SI-NEXT: s_mov_b32 s6, -1
459 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3]
460 ; SI-NEXT: s_waitcnt lgkmcnt(0)
461 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
462 ; SI-NEXT: .LBB8_2: ; %ENDIF
465 ; VI-LABEL: icmp_2_users:
466 ; VI: ; %bb.0: ; %main_body
467 ; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
468 ; VI-NEXT: s_waitcnt lgkmcnt(0)
469 ; VI-NEXT: s_cmp_gt_i32 s4, 0
470 ; VI-NEXT: s_cselect_b64 s[2:3], -1, 0
471 ; VI-NEXT: s_cmp_lt_i32 s4, 1
472 ; VI-NEXT: s_cbranch_scc1 .LBB8_2
473 ; VI-NEXT: ; %bb.1: ; %IF
474 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
475 ; VI-NEXT: s_mov_b32 s7, 0xf000
476 ; VI-NEXT: s_mov_b32 s6, -1
477 ; VI-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3]
478 ; VI-NEXT: s_waitcnt lgkmcnt(0)
479 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
480 ; VI-NEXT: .LBB8_2: ; %ENDIF
483 %0 = icmp sgt i32 %cond, 0
484 %1 = sext i1 %0 to i32
485 br i1 %0, label %IF, label %ENDIF
488 store i32 %1, ptr addrspace(1) %out
491 ENDIF: ; preds = %IF, %main_body
495 define amdgpu_kernel void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, ptr addrspace(1) %out) {
496 ; SI-LABEL: icmp_users_different_blocks:
498 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9
499 ; SI-NEXT: s_waitcnt lgkmcnt(0)
500 ; SI-NEXT: s_cmp_lt_i32 s2, 1
501 ; SI-NEXT: s_cbranch_scc1 .LBB9_2
502 ; SI-NEXT: ; %bb.1: ; %bb2
503 ; SI-NEXT: s_cmp_gt_i32 s3, 0
504 ; SI-NEXT: s_cselect_b64 s[2:3], -1, 0
505 ; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
506 ; SI-NEXT: s_cbranch_vccz .LBB9_3
507 ; SI-NEXT: .LBB9_2: ; %bb9
509 ; SI-NEXT: .LBB9_3: ; %bb7
510 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
511 ; SI-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
512 ; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
513 ; SI-NEXT: s_mov_b32 s3, 0xf000
514 ; SI-NEXT: s_mov_b32 s2, -1
515 ; SI-NEXT: s_waitcnt lgkmcnt(0)
516 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
519 ; VI-LABEL: icmp_users_different_blocks:
521 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
522 ; VI-NEXT: s_waitcnt lgkmcnt(0)
523 ; VI-NEXT: s_cmp_lt_i32 s2, 1
524 ; VI-NEXT: s_cbranch_scc1 .LBB9_2
525 ; VI-NEXT: ; %bb.1: ; %bb2
526 ; VI-NEXT: s_cmp_gt_i32 s3, 0
527 ; VI-NEXT: s_cselect_b64 s[2:3], -1, 0
528 ; VI-NEXT: s_and_b64 vcc, exec, s[2:3]
529 ; VI-NEXT: s_cbranch_vccz .LBB9_3
530 ; VI-NEXT: .LBB9_2: ; %bb9
532 ; VI-NEXT: .LBB9_3: ; %bb7
533 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
534 ; VI-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
535 ; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
536 ; VI-NEXT: s_mov_b32 s3, 0xf000
537 ; VI-NEXT: s_mov_b32 s2, -1
538 ; VI-NEXT: s_waitcnt lgkmcnt(0)
539 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
542 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
543 %cmp0 = icmp sgt i32 %cond0, 0
544 br i1 %cmp0, label %bb2, label %bb9
547 %cmp1 = icmp sgt i32 %cond1, 0
548 %tmp2 = sext i1 %cmp1 to i32
549 %tmp3 = add i32 %tmp2, %tmp
550 br i1 %cmp1, label %bb9, label %bb7
553 store i32 %tmp3, ptr addrspace(1) %out
556 bb9: ; preds = %bb8, %bb4
560 define amdgpu_kernel void @uniform_loop(ptr addrspace(1) %out, i32 %a) {
561 ; SI-LABEL: uniform_loop:
562 ; SI: ; %bb.0: ; %entry
563 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
564 ; SI-NEXT: .LBB10_1: ; %loop
565 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1
566 ; SI-NEXT: s_waitcnt lgkmcnt(0)
567 ; SI-NEXT: s_add_i32 s0, s0, -1
568 ; SI-NEXT: s_cmp_lg_u32 s0, 0
569 ; SI-NEXT: s_cbranch_scc1 .LBB10_1
570 ; SI-NEXT: ; %bb.2: ; %done
573 ; VI-LABEL: uniform_loop:
574 ; VI: ; %bb.0: ; %entry
575 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
576 ; VI-NEXT: .LBB10_1: ; %loop
577 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1
578 ; VI-NEXT: s_waitcnt lgkmcnt(0)
579 ; VI-NEXT: s_add_i32 s0, s0, -1
580 ; VI-NEXT: s_cmp_lg_u32 s0, 0
581 ; VI-NEXT: s_cbranch_scc1 .LBB10_1
582 ; VI-NEXT: ; %bb.2: ; %done
588 %i = phi i32 [0, %entry], [%i.i, %loop]
590 %cmp = icmp eq i32 %a, %i.i
591 br i1 %cmp, label %done, label %loop
597 ; Test uniform and divergent.
599 define amdgpu_kernel void @uniform_inside_divergent(ptr addrspace(1) %out, i32 %cond) {
600 ; SI-LABEL: uniform_inside_divergent:
601 ; SI: ; %bb.0: ; %entry
602 ; SI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
603 ; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
604 ; SI-NEXT: s_cbranch_execz .LBB11_2
605 ; SI-NEXT: ; %bb.1: ; %if
606 ; SI-NEXT: s_load_dword s4, s[0:1], 0xb
607 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
608 ; SI-NEXT: s_mov_b32 s3, 0xf000
609 ; SI-NEXT: s_mov_b32 s2, -1
610 ; SI-NEXT: v_mov_b32_e32 v0, 0
611 ; SI-NEXT: s_waitcnt lgkmcnt(0)
612 ; SI-NEXT: s_cmp_lg_u32 s4, 0
613 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
614 ; SI-NEXT: s_cbranch_scc0 .LBB11_3
615 ; SI-NEXT: .LBB11_2: ; %endif
617 ; SI-NEXT: .LBB11_3: ; %if_uniform
618 ; SI-NEXT: s_waitcnt expcnt(0)
619 ; SI-NEXT: v_mov_b32_e32 v0, 1
620 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
623 ; VI-LABEL: uniform_inside_divergent:
624 ; VI: ; %bb.0: ; %entry
625 ; VI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
626 ; VI-NEXT: s_and_saveexec_b64 s[2:3], vcc
627 ; VI-NEXT: s_cbranch_execz .LBB11_2
628 ; VI-NEXT: ; %bb.1: ; %if
629 ; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
630 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
631 ; VI-NEXT: s_mov_b32 s3, 0xf000
632 ; VI-NEXT: s_mov_b32 s2, -1
633 ; VI-NEXT: v_mov_b32_e32 v0, 0
634 ; VI-NEXT: s_waitcnt lgkmcnt(0)
635 ; VI-NEXT: s_cmp_lg_u32 s4, 0
636 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
637 ; VI-NEXT: s_cbranch_scc0 .LBB11_3
638 ; VI-NEXT: .LBB11_2: ; %endif
640 ; VI-NEXT: .LBB11_3: ; %if_uniform
641 ; VI-NEXT: v_mov_b32_e32 v0, 1
642 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
645 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
646 %d_cmp = icmp ult i32 %tid, 16
647 br i1 %d_cmp, label %if, label %endif
650 store i32 0, ptr addrspace(1) %out
651 %u_cmp = icmp eq i32 %cond, 0
652 br i1 %u_cmp, label %if_uniform, label %endif
655 store i32 1, ptr addrspace(1) %out
662 define amdgpu_kernel void @divergent_inside_uniform(ptr addrspace(1) %out, i32 %cond) {
663 ; SI-LABEL: divergent_inside_uniform:
664 ; SI: ; %bb.0: ; %entry
665 ; SI-NEXT: s_load_dword s2, s[0:1], 0xb
666 ; SI-NEXT: s_waitcnt lgkmcnt(0)
667 ; SI-NEXT: s_cmp_lg_u32 s2, 0
668 ; SI-NEXT: s_cbranch_scc0 .LBB12_2
669 ; SI-NEXT: .LBB12_1: ; %endif
671 ; SI-NEXT: .LBB12_2: ; %if
672 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
673 ; SI-NEXT: s_mov_b32 s3, 0xf000
674 ; SI-NEXT: s_mov_b32 s2, -1
675 ; SI-NEXT: v_mov_b32_e32 v1, 0
676 ; SI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
677 ; SI-NEXT: s_waitcnt lgkmcnt(0)
678 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0
679 ; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
680 ; SI-NEXT: s_cbranch_execz .LBB12_1
681 ; SI-NEXT: ; %bb.3: ; %if_uniform
682 ; SI-NEXT: v_mov_b32_e32 v0, 1
683 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
686 ; VI-LABEL: divergent_inside_uniform:
687 ; VI: ; %bb.0: ; %entry
688 ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
689 ; VI-NEXT: s_waitcnt lgkmcnt(0)
690 ; VI-NEXT: s_cmp_lg_u32 s2, 0
691 ; VI-NEXT: s_cbranch_scc0 .LBB12_2
692 ; VI-NEXT: .LBB12_1: ; %endif
694 ; VI-NEXT: .LBB12_2: ; %if
695 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
696 ; VI-NEXT: s_mov_b32 s3, 0xf000
697 ; VI-NEXT: s_mov_b32 s2, -1
698 ; VI-NEXT: v_mov_b32_e32 v1, 0
699 ; VI-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0
700 ; VI-NEXT: s_waitcnt lgkmcnt(0)
701 ; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0
702 ; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc
703 ; VI-NEXT: s_cbranch_execz .LBB12_1
704 ; VI-NEXT: ; %bb.3: ; %if_uniform
705 ; VI-NEXT: v_mov_b32_e32 v0, 1
706 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
709 %u_cmp = icmp eq i32 %cond, 0
710 br i1 %u_cmp, label %if, label %endif
713 store i32 0, ptr addrspace(1) %out
714 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
715 %d_cmp = icmp ult i32 %tid, 16
716 br i1 %d_cmp, label %if_uniform, label %endif
719 store i32 1, ptr addrspace(1) %out
726 define amdgpu_kernel void @divergent_if_uniform_if(ptr addrspace(1) %out, i32 %cond) {
727 ; SI-LABEL: divergent_if_uniform_if:
728 ; SI: ; %bb.0: ; %entry
729 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
730 ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
731 ; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
732 ; SI-NEXT: s_cbranch_execz .LBB13_2
733 ; SI-NEXT: ; %bb.1: ; %if
734 ; SI-NEXT: s_mov_b32 s7, 0xf000
735 ; SI-NEXT: s_mov_b32 s6, -1
736 ; SI-NEXT: v_mov_b32_e32 v0, 1
737 ; SI-NEXT: s_waitcnt lgkmcnt(0)
738 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
739 ; SI-NEXT: .LBB13_2: ; %endif
740 ; SI-NEXT: s_or_b64 exec, exec, s[2:3]
741 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
742 ; SI-NEXT: s_waitcnt lgkmcnt(0)
743 ; SI-NEXT: s_cmp_lg_u32 s0, 0
744 ; SI-NEXT: s_cbranch_scc0 .LBB13_4
745 ; SI-NEXT: ; %bb.3: ; %exit
747 ; SI-NEXT: .LBB13_4: ; %if_uniform
748 ; SI-NEXT: s_mov_b32 s7, 0xf000
749 ; SI-NEXT: s_mov_b32 s6, -1
750 ; SI-NEXT: s_waitcnt expcnt(0)
751 ; SI-NEXT: v_mov_b32_e32 v0, 2
752 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
755 ; VI-LABEL: divergent_if_uniform_if:
756 ; VI: ; %bb.0: ; %entry
757 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
758 ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
759 ; VI-NEXT: s_and_saveexec_b64 s[2:3], vcc
760 ; VI-NEXT: s_cbranch_execz .LBB13_2
761 ; VI-NEXT: ; %bb.1: ; %if
762 ; VI-NEXT: s_mov_b32 s7, 0xf000
763 ; VI-NEXT: s_mov_b32 s6, -1
764 ; VI-NEXT: v_mov_b32_e32 v0, 1
765 ; VI-NEXT: s_waitcnt lgkmcnt(0)
766 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
767 ; VI-NEXT: .LBB13_2: ; %endif
768 ; VI-NEXT: s_or_b64 exec, exec, s[2:3]
769 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
770 ; VI-NEXT: s_waitcnt lgkmcnt(0)
771 ; VI-NEXT: s_cmp_lg_u32 s0, 0
772 ; VI-NEXT: s_cbranch_scc0 .LBB13_4
773 ; VI-NEXT: ; %bb.3: ; %exit
775 ; VI-NEXT: .LBB13_4: ; %if_uniform
776 ; VI-NEXT: s_mov_b32 s7, 0xf000
777 ; VI-NEXT: s_mov_b32 s6, -1
778 ; VI-NEXT: v_mov_b32_e32 v0, 2
779 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
782 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
783 %d_cmp = icmp eq i32 %tid, 0
784 br i1 %d_cmp, label %if, label %endif
787 store i32 1, ptr addrspace(1) %out
791 %u_cmp = icmp eq i32 %cond, 0
792 br i1 %u_cmp, label %if_uniform, label %exit
795 store i32 2, ptr addrspace(1) %out
802 ; The condition of the branches in the two blocks are
803 ; uniform. MachineCSE replaces the 2nd condition with the inverse of
804 ; the first, leaving an scc use in a different block than it was
807 define amdgpu_kernel void @cse_uniform_condition_different_blocks(i32 %cond, ptr addrspace(1) %out) {
808 ; SI-LABEL: cse_uniform_condition_different_blocks:
810 ; SI-NEXT: s_load_dword s2, s[0:1], 0x9
811 ; SI-NEXT: s_waitcnt lgkmcnt(0)
812 ; SI-NEXT: s_cmp_lt_i32 s2, 1
813 ; SI-NEXT: s_cbranch_scc1 .LBB14_2
814 ; SI-NEXT: ; %bb.1: ; %bb2
815 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
816 ; SI-NEXT: s_mov_b32 s3, 0xf000
817 ; SI-NEXT: s_mov_b32 s2, -1
818 ; SI-NEXT: v_mov_b32_e32 v1, 0
819 ; SI-NEXT: s_waitcnt lgkmcnt(0)
820 ; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
821 ; SI-NEXT: s_waitcnt vmcnt(0)
822 ; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0
823 ; SI-NEXT: s_waitcnt vmcnt(0)
824 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
825 ; SI-NEXT: .LBB14_2: ; %bb9
828 ; VI-LABEL: cse_uniform_condition_different_blocks:
830 ; VI-NEXT: s_load_dword s2, s[0:1], 0x24
831 ; VI-NEXT: s_waitcnt lgkmcnt(0)
832 ; VI-NEXT: s_cmp_lt_i32 s2, 1
833 ; VI-NEXT: s_cbranch_scc1 .LBB14_2
834 ; VI-NEXT: ; %bb.1: ; %bb2
835 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
836 ; VI-NEXT: s_mov_b32 s3, 0xf000
837 ; VI-NEXT: s_mov_b32 s2, -1
838 ; VI-NEXT: v_mov_b32_e32 v1, 0
839 ; VI-NEXT: s_waitcnt lgkmcnt(0)
840 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc
841 ; VI-NEXT: s_waitcnt vmcnt(0)
842 ; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0
843 ; VI-NEXT: s_waitcnt vmcnt(0)
844 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
845 ; VI-NEXT: .LBB14_2: ; %bb9
848 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
849 %tmp1 = icmp sgt i32 %cond, 0
850 br i1 %tmp1, label %bb2, label %bb9
853 %tmp3 = load volatile i32, ptr addrspace(1) undef
854 store volatile i32 0, ptr addrspace(1) undef
855 %tmp9 = icmp sle i32 %cond, 0
856 br i1 %tmp9, label %bb9, label %bb7
859 store i32 %tmp3, ptr addrspace(1) %out
862 bb9: ; preds = %bb8, %bb4
866 ; Fall-through to the else
867 define amdgpu_kernel void @uniform_if_scc_i64_eq(i64 %cond, ptr addrspace(1) %out) {
868 ; SI-LABEL: uniform_if_scc_i64_eq:
869 ; SI: ; %bb.0: ; %entry
870 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
871 ; SI-NEXT: s_waitcnt lgkmcnt(0)
872 ; SI-NEXT: v_cmp_eq_u64_e64 s[4:5], s[0:1], 0
873 ; SI-NEXT: s_mov_b32 s0, 0
874 ; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
875 ; SI-NEXT: s_cbranch_vccnz .LBB15_2
876 ; SI-NEXT: ; %bb.1: ; %else
877 ; SI-NEXT: s_mov_b32 s0, 1
878 ; SI-NEXT: .LBB15_2: ; %done
879 ; SI-NEXT: s_mov_b32 s7, 0xf000
880 ; SI-NEXT: s_mov_b32 s6, -1
881 ; SI-NEXT: s_mov_b32 s4, s2
882 ; SI-NEXT: s_mov_b32 s5, s3
883 ; SI-NEXT: v_mov_b32_e32 v0, s0
884 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
887 ; VI-LABEL: uniform_if_scc_i64_eq:
888 ; VI: ; %bb.0: ; %entry
889 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
890 ; VI-NEXT: s_waitcnt lgkmcnt(0)
891 ; VI-NEXT: s_cmp_eq_u64 s[0:1], 0
892 ; VI-NEXT: s_mov_b32 s0, 0
893 ; VI-NEXT: s_cbranch_scc1 .LBB15_2
894 ; VI-NEXT: ; %bb.1: ; %else
895 ; VI-NEXT: s_mov_b32 s0, 1
896 ; VI-NEXT: .LBB15_2: ; %done
897 ; VI-NEXT: s_mov_b32 s7, 0xf000
898 ; VI-NEXT: s_mov_b32 s6, -1
899 ; VI-NEXT: s_mov_b32 s4, s2
900 ; VI-NEXT: s_mov_b32 s5, s3
901 ; VI-NEXT: v_mov_b32_e32 v0, s0
902 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
905 %cmp0 = icmp eq i64 %cond, 0
906 br i1 %cmp0, label %if, label %else
915 %value = phi i32 [0, %if], [1, %else]
916 store i32 %value, ptr addrspace(1) %out
920 ; Fall-through to the else
921 define amdgpu_kernel void @uniform_if_scc_i64_ne(i64 %cond, ptr addrspace(1) %out) {
922 ; SI-LABEL: uniform_if_scc_i64_ne:
923 ; SI: ; %bb.0: ; %entry
924 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
925 ; SI-NEXT: s_waitcnt lgkmcnt(0)
926 ; SI-NEXT: v_cmp_ne_u64_e64 s[4:5], s[0:1], 0
927 ; SI-NEXT: s_mov_b32 s0, 0
928 ; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
929 ; SI-NEXT: s_cbranch_vccnz .LBB16_2
930 ; SI-NEXT: ; %bb.1: ; %else
931 ; SI-NEXT: s_mov_b32 s0, 1
932 ; SI-NEXT: .LBB16_2: ; %done
933 ; SI-NEXT: s_mov_b32 s7, 0xf000
934 ; SI-NEXT: s_mov_b32 s6, -1
935 ; SI-NEXT: s_mov_b32 s4, s2
936 ; SI-NEXT: s_mov_b32 s5, s3
937 ; SI-NEXT: v_mov_b32_e32 v0, s0
938 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
941 ; VI-LABEL: uniform_if_scc_i64_ne:
942 ; VI: ; %bb.0: ; %entry
943 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
944 ; VI-NEXT: s_waitcnt lgkmcnt(0)
945 ; VI-NEXT: s_cmp_lg_u64 s[0:1], 0
946 ; VI-NEXT: s_mov_b32 s0, 0
947 ; VI-NEXT: s_cbranch_scc1 .LBB16_2
948 ; VI-NEXT: ; %bb.1: ; %else
949 ; VI-NEXT: s_mov_b32 s0, 1
950 ; VI-NEXT: .LBB16_2: ; %done
951 ; VI-NEXT: s_mov_b32 s7, 0xf000
952 ; VI-NEXT: s_mov_b32 s6, -1
953 ; VI-NEXT: s_mov_b32 s4, s2
954 ; VI-NEXT: s_mov_b32 s5, s3
955 ; VI-NEXT: v_mov_b32_e32 v0, s0
956 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
959 %cmp0 = icmp ne i64 %cond, 0
960 br i1 %cmp0, label %if, label %else
969 %value = phi i32 [0, %if], [1, %else]
970 store i32 %value, ptr addrspace(1) %out
974 ; Fall-through to the else
975 define amdgpu_kernel void @uniform_if_scc_i64_sgt(i64 %cond, ptr addrspace(1) %out) {
976 ; SI-LABEL: uniform_if_scc_i64_sgt:
977 ; SI: ; %bb.0: ; %entry
978 ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
979 ; SI-NEXT: s_waitcnt lgkmcnt(0)
980 ; SI-NEXT: v_cmp_gt_i64_e64 s[4:5], s[0:1], 0
981 ; SI-NEXT: s_mov_b32 s0, 0
982 ; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
983 ; SI-NEXT: s_cbranch_vccnz .LBB17_2
984 ; SI-NEXT: ; %bb.1: ; %else
985 ; SI-NEXT: s_mov_b32 s0, 1
986 ; SI-NEXT: .LBB17_2: ; %done
987 ; SI-NEXT: s_mov_b32 s7, 0xf000
988 ; SI-NEXT: s_mov_b32 s6, -1
989 ; SI-NEXT: s_mov_b32 s4, s2
990 ; SI-NEXT: s_mov_b32 s5, s3
991 ; SI-NEXT: v_mov_b32_e32 v0, s0
992 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
995 ; VI-LABEL: uniform_if_scc_i64_sgt:
996 ; VI: ; %bb.0: ; %entry
997 ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
998 ; VI-NEXT: s_waitcnt lgkmcnt(0)
999 ; VI-NEXT: v_cmp_gt_i64_e64 s[4:5], s[0:1], 0
1000 ; VI-NEXT: s_mov_b32 s0, 0
1001 ; VI-NEXT: s_and_b64 vcc, exec, s[4:5]
1002 ; VI-NEXT: s_cbranch_vccnz .LBB17_2
1003 ; VI-NEXT: ; %bb.1: ; %else
1004 ; VI-NEXT: s_mov_b32 s0, 1
1005 ; VI-NEXT: .LBB17_2: ; %done
1006 ; VI-NEXT: s_mov_b32 s7, 0xf000
1007 ; VI-NEXT: s_mov_b32 s6, -1
1008 ; VI-NEXT: s_mov_b32 s4, s2
1009 ; VI-NEXT: s_mov_b32 s5, s3
1010 ; VI-NEXT: v_mov_b32_e32 v0, s0
1011 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1014 %cmp0 = icmp sgt i64 %cond, 0
1015 br i1 %cmp0, label %if, label %else
1024 %value = phi i32 [0, %if], [1, %else]
1025 store i32 %value, ptr addrspace(1) %out
1029 define amdgpu_kernel void @move_to_valu_i64_eq(ptr addrspace(1) %out) {
1030 ; SI-LABEL: move_to_valu_i64_eq:
1032 ; SI-NEXT: s_mov_b32 m0, -1
1033 ; SI-NEXT: ds_read_b64 v[0:1], v0
1034 ; SI-NEXT: s_mov_b32 s2, 0
1035 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1036 ; SI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1037 ; SI-NEXT: s_cbranch_vccnz .LBB18_2
1038 ; SI-NEXT: ; %bb.1: ; %else
1039 ; SI-NEXT: s_mov_b32 s2, 1
1040 ; SI-NEXT: .LBB18_2: ; %done
1041 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
1042 ; SI-NEXT: s_mov_b32 s7, 0xf000
1043 ; SI-NEXT: s_mov_b32 s6, -1
1044 ; SI-NEXT: v_mov_b32_e32 v0, s2
1045 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1046 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1049 ; VI-LABEL: move_to_valu_i64_eq:
1051 ; VI-NEXT: s_mov_b32 m0, -1
1052 ; VI-NEXT: ds_read_b64 v[0:1], v0
1053 ; VI-NEXT: s_mov_b32 s2, 0
1054 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1055 ; VI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1056 ; VI-NEXT: s_cbranch_vccnz .LBB18_2
1057 ; VI-NEXT: ; %bb.1: ; %else
1058 ; VI-NEXT: s_mov_b32 s2, 1
1059 ; VI-NEXT: .LBB18_2: ; %done
1060 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
1061 ; VI-NEXT: s_mov_b32 s7, 0xf000
1062 ; VI-NEXT: s_mov_b32 s6, -1
1063 ; VI-NEXT: v_mov_b32_e32 v0, s2
1064 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1065 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1067 %cond = load volatile i64, ptr addrspace(3) undef
1068 %cmp0 = icmp eq i64 %cond, 0
1069 br i1 %cmp0, label %if, label %else
1078 %value = phi i32 [0, %if], [1, %else]
1079 store i32 %value, ptr addrspace(1) %out
1083 define amdgpu_kernel void @move_to_valu_i64_ne(ptr addrspace(1) %out) {
1084 ; SI-LABEL: move_to_valu_i64_ne:
1086 ; SI-NEXT: s_mov_b32 m0, -1
1087 ; SI-NEXT: ds_read_b64 v[0:1], v0
1088 ; SI-NEXT: s_mov_b32 s2, 0
1089 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1090 ; SI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
1091 ; SI-NEXT: s_cbranch_vccnz .LBB19_2
1092 ; SI-NEXT: ; %bb.1: ; %else
1093 ; SI-NEXT: s_mov_b32 s2, 1
1094 ; SI-NEXT: .LBB19_2: ; %done
1095 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
1096 ; SI-NEXT: s_mov_b32 s7, 0xf000
1097 ; SI-NEXT: s_mov_b32 s6, -1
1098 ; SI-NEXT: v_mov_b32_e32 v0, s2
1099 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1100 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1103 ; VI-LABEL: move_to_valu_i64_ne:
1105 ; VI-NEXT: s_mov_b32 m0, -1
1106 ; VI-NEXT: ds_read_b64 v[0:1], v0
1107 ; VI-NEXT: s_mov_b32 s2, 0
1108 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1109 ; VI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
1110 ; VI-NEXT: s_cbranch_vccnz .LBB19_2
1111 ; VI-NEXT: ; %bb.1: ; %else
1112 ; VI-NEXT: s_mov_b32 s2, 1
1113 ; VI-NEXT: .LBB19_2: ; %done
1114 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
1115 ; VI-NEXT: s_mov_b32 s7, 0xf000
1116 ; VI-NEXT: s_mov_b32 s6, -1
1117 ; VI-NEXT: v_mov_b32_e32 v0, s2
1118 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1119 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
1121 %cond = load volatile i64, ptr addrspace(3) undef
1122 %cmp0 = icmp ne i64 %cond, 0
1123 br i1 %cmp0, label %if, label %else
1132 %value = phi i32 [0, %if], [1, %else]
1133 store i32 %value, ptr addrspace(1) %out
1137 define void @move_to_valu_vgpr_operand_phi(ptr addrspace(3) %out) {
1138 ; SI-LABEL: move_to_valu_vgpr_operand_phi:
1139 ; SI: ; %bb.0: ; %bb0
1140 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1141 ; SI-NEXT: v_add_i32_e32 v0, vcc, 28, v0
1142 ; SI-NEXT: v_mov_b32_e32 v1, 1
1143 ; SI-NEXT: s_and_b64 vcc, exec, 0
1144 ; SI-NEXT: s_mov_b32 m0, -1
1145 ; SI-NEXT: s_branch .LBB20_2
1146 ; SI-NEXT: .LBB20_1: ; %bb3
1147 ; SI-NEXT: ; in Loop: Header=BB20_2 Depth=1
1148 ; SI-NEXT: v_add_i32_e64 v0, s[4:5], 8, v0
1149 ; SI-NEXT: .LBB20_2: ; %bb1
1150 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1
1151 ; SI-NEXT: s_cbranch_scc1 .LBB20_1
1152 ; SI-NEXT: ; %bb.3: ; %bb2
1153 ; SI-NEXT: ; in Loop: Header=BB20_2 Depth=1
1154 ; SI-NEXT: ds_write_b32 v0, v1
1155 ; SI-NEXT: s_mov_b64 vcc, vcc
1156 ; SI-NEXT: s_cbranch_vccz .LBB20_1
1157 ; SI-NEXT: ; %bb.4: ; %DummyReturnBlock
1158 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1159 ; SI-NEXT: s_setpc_b64 s[30:31]
1161 ; VI-LABEL: move_to_valu_vgpr_operand_phi:
1162 ; VI: ; %bb.0: ; %bb0
1163 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1164 ; VI-NEXT: v_add_u32_e32 v0, vcc, 28, v0
1165 ; VI-NEXT: v_mov_b32_e32 v1, 1
1166 ; VI-NEXT: s_and_b64 vcc, exec, 0
1167 ; VI-NEXT: s_mov_b32 m0, -1
1168 ; VI-NEXT: s_branch .LBB20_2
1169 ; VI-NEXT: .LBB20_1: ; %bb3
1170 ; VI-NEXT: ; in Loop: Header=BB20_2 Depth=1
1171 ; VI-NEXT: v_add_u32_e64 v0, s[4:5], 8, v0
1172 ; VI-NEXT: .LBB20_2: ; %bb1
1173 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1
1174 ; VI-NEXT: s_cbranch_scc1 .LBB20_1
1175 ; VI-NEXT: ; %bb.3: ; %bb2
1176 ; VI-NEXT: ; in Loop: Header=BB20_2 Depth=1
1177 ; VI-NEXT: ds_write_b32 v0, v1
1178 ; VI-NEXT: s_mov_b64 vcc, vcc
1179 ; VI-NEXT: s_cbranch_vccz .LBB20_1
1180 ; VI-NEXT: ; %bb.4: ; %DummyReturnBlock
1181 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1182 ; VI-NEXT: s_setpc_b64 s[30:31]
1186 bb1: ; preds = %bb3, %bb0
1187 %tmp0 = phi i32 [ 8, %bb0 ], [ %tmp4, %bb3 ]
1188 %tmp1 = add nsw i32 %tmp0, -1
1189 %tmp2 = getelementptr inbounds i32, ptr addrspace(3) %out, i32 %tmp1
1190 br i1 undef, label %bb2, label %bb3
1193 store volatile i32 1, ptr addrspace(3) %tmp2, align 4
1196 bb3: ; preds = %bb2, %bb1
1197 %tmp4 = add nsw i32 %tmp0, 2
1201 declare i32 @llvm.amdgcn.workitem.id.x() #0
1203 attributes #0 = { nounwind readnone }
1204 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: