1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
4 ; Make sure the branch targets are correct after lowering llvm.amdgcn.if
6 define i32 @divergent_if_swap_brtarget_order0(i32 %value) {
7 ; CHECK-LABEL: divergent_if_swap_brtarget_order0:
8 ; CHECK: ; %bb.0: ; %entry
9 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
11 ; CHECK-NEXT: ; implicit-def: $vgpr0
12 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
13 ; CHECK-NEXT: s_cbranch_execz .LBB0_2
14 ; CHECK-NEXT: ; %bb.1: ; %if.true
15 ; CHECK-NEXT: global_load_dword v0, v[0:1], off glc
16 ; CHECK-NEXT: s_waitcnt vmcnt(0)
17 ; CHECK-NEXT: .LBB0_2: ; %endif
18 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
19 ; CHECK-NEXT: s_waitcnt vmcnt(0)
20 ; CHECK-NEXT: s_setpc_b64 s[30:31]
22 %c = icmp ne i32 %value, 0
23 br i1 %c, label %if.true, label %endif
26 %val = load volatile i32, ptr addrspace(1) undef
30 %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
34 define i32 @divergent_if_swap_brtarget_order1(i32 %value) {
35 ; CHECK-LABEL: divergent_if_swap_brtarget_order1:
36 ; CHECK: ; %bb.0: ; %entry
37 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
39 ; CHECK-NEXT: ; implicit-def: $vgpr0
40 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
41 ; CHECK-NEXT: s_cbranch_execz .LBB1_2
42 ; CHECK-NEXT: ; %bb.1: ; %if.true
43 ; CHECK-NEXT: global_load_dword v0, v[0:1], off glc
44 ; CHECK-NEXT: s_waitcnt vmcnt(0)
45 ; CHECK-NEXT: .LBB1_2: ; %endif
46 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
47 ; CHECK-NEXT: s_waitcnt vmcnt(0)
48 ; CHECK-NEXT: s_setpc_b64 s[30:31]
50 %c = icmp ne i32 %value, 0
51 br i1 %c, label %if.true, label %endif
54 %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
58 %val = load volatile i32, ptr addrspace(1) undef
62 ; Make sure and 1 is inserted on llvm.amdgcn.if
63 define i32 @divergent_if_nonboolean_condition0(i32 %value) {
64 ; CHECK-LABEL: divergent_if_nonboolean_condition0:
65 ; CHECK: ; %bb.0: ; %entry
66 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67 ; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
68 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
69 ; CHECK-NEXT: ; implicit-def: $vgpr0
70 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
71 ; CHECK-NEXT: s_cbranch_execz .LBB2_2
72 ; CHECK-NEXT: ; %bb.1: ; %if.true
73 ; CHECK-NEXT: global_load_dword v0, v[0:1], off glc
74 ; CHECK-NEXT: s_waitcnt vmcnt(0)
75 ; CHECK-NEXT: .LBB2_2: ; %endif
76 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
77 ; CHECK-NEXT: s_waitcnt vmcnt(0)
78 ; CHECK-NEXT: s_setpc_b64 s[30:31]
80 %c = trunc i32 %value to i1
81 br i1 %c, label %if.true, label %endif
84 %val = load volatile i32, ptr addrspace(1) undef
88 %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
92 ; Make sure and 1 is inserted on llvm.amdgcn.if
93 define i32 @divergent_if_nonboolean_condition1(ptr addrspace(1) %ptr) {
94 ; CHECK-LABEL: divergent_if_nonboolean_condition1:
95 ; CHECK: ; %bb.0: ; %entry
96 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97 ; CHECK-NEXT: global_load_dword v0, v[0:1], off
98 ; CHECK-NEXT: s_waitcnt vmcnt(0)
99 ; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
100 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
101 ; CHECK-NEXT: ; implicit-def: $vgpr0
102 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
103 ; CHECK-NEXT: s_cbranch_execz .LBB3_2
104 ; CHECK-NEXT: ; %bb.1: ; %if.true
105 ; CHECK-NEXT: global_load_dword v0, v[0:1], off glc
106 ; CHECK-NEXT: s_waitcnt vmcnt(0)
107 ; CHECK-NEXT: .LBB3_2: ; %endif
108 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
109 ; CHECK-NEXT: s_waitcnt vmcnt(0)
110 ; CHECK-NEXT: s_setpc_b64 s[30:31]
112 %value = load i32, ptr addrspace(1) %ptr
113 %c = trunc i32 %value to i1
114 br i1 %c, label %if.true, label %endif
117 %val = load volatile i32, ptr addrspace(1) undef
121 %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
125 @external_constant = external addrspace(4) constant i32, align 4
126 @const.ptr = external addrspace(4) constant ptr, align 4
128 ; Make sure this case compiles. G_ICMP was mis-mapped due to having
129 ; the result register class constrained by llvm.amdgcn.if lowering.
130 define void @constrained_if_register_class() {
131 ; CHECK-LABEL: constrained_if_register_class:
132 ; CHECK: ; %bb.0: ; %bb
133 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134 ; CHECK-NEXT: s_getpc_b64 s[4:5]
135 ; CHECK-NEXT: s_add_u32 s4, s4, external_constant@gotpcrel32@lo+4
136 ; CHECK-NEXT: s_addc_u32 s5, s5, external_constant@gotpcrel32@hi+12
137 ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
138 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
139 ; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0
140 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
141 ; CHECK-NEXT: s_cmp_lg_u32 s4, 0
142 ; CHECK-NEXT: s_cbranch_scc0 .LBB4_2
143 ; CHECK-NEXT: .LBB4_1: ; %bb12
144 ; CHECK-NEXT: s_setpc_b64 s[30:31]
145 ; CHECK-NEXT: .LBB4_2: ; %bb2
146 ; CHECK-NEXT: s_getpc_b64 s[4:5]
147 ; CHECK-NEXT: s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4
148 ; CHECK-NEXT: s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+12
149 ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
150 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
151 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
152 ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
153 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
154 ; CHECK-NEXT: global_load_dword v0, v0, s[4:5]
155 ; CHECK-NEXT: s_mov_b32 s4, -1
156 ; CHECK-NEXT: s_waitcnt vmcnt(0)
157 ; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0
158 ; CHECK-NEXT: s_cbranch_vccnz .LBB4_4
159 ; CHECK-NEXT: ; %bb.3: ; %bb7
160 ; CHECK-NEXT: s_mov_b32 s4, 0
161 ; CHECK-NEXT: .LBB4_4: ; %bb8
162 ; CHECK-NEXT: s_cmp_lg_u32 s4, 0
163 ; CHECK-NEXT: s_cbranch_scc1 .LBB4_1
164 ; CHECK-NEXT: ; %bb.5: ; %bb11
165 ; CHECK-NEXT: v_mov_b32_e32 v0, 4.0
166 ; CHECK-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen
167 ; CHECK-NEXT: s_waitcnt vmcnt(0)
168 ; CHECK-NEXT: s_setpc_b64 s[30:31]
170 %tmp = load i32, ptr addrspace(4) @external_constant
171 %tmp1 = icmp ne i32 %tmp, 0
172 br i1 %tmp1, label %bb12, label %bb2
175 %ptr = load ptr, ptr addrspace(4) @const.ptr
176 %tmp4 = load float, ptr %ptr, align 4
177 %tmp5 = fcmp olt float %tmp4, 1.0
178 %tmp6 = or i1 %tmp5, false
179 br i1 %tmp6, label %bb8, label %bb7
185 %tmp9 = phi i32 [ 0, %bb7 ], [ -1, %bb2 ]
186 %tmp10 = icmp eq i32 %tmp9, 0
187 br i1 %tmp10, label %bb11, label %bb12
190 store float 4.0, ptr addrspace(5) undef, align 4
197 define amdgpu_kernel void @break_loop(i32 %arg) {
198 ; CHECK-LABEL: break_loop:
199 ; CHECK: ; %bb.0: ; %bb
200 ; CHECK-NEXT: s_load_dword s2, s[4:5], 0x0
201 ; CHECK-NEXT: s_mov_b64 s[0:1], 0
202 ; CHECK-NEXT: ; implicit-def: $vgpr1
203 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
204 ; CHECK-NEXT: v_subrev_u32_e32 v0, s2, v0
205 ; CHECK-NEXT: s_branch .LBB5_2
206 ; CHECK-NEXT: .LBB5_1: ; %Flow
207 ; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1
208 ; CHECK-NEXT: s_and_b64 s[2:3], exec, s[2:3]
209 ; CHECK-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
210 ; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1]
211 ; CHECK-NEXT: s_cbranch_execz .LBB5_4
212 ; CHECK-NEXT: .LBB5_2: ; %bb1
213 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
214 ; CHECK-NEXT: v_add_u32_e32 v1, 1, v1
215 ; CHECK-NEXT: v_cmp_le_i32_e32 vcc, 0, v1
216 ; CHECK-NEXT: s_mov_b64 s[2:3], -1
217 ; CHECK-NEXT: s_cbranch_vccnz .LBB5_1
218 ; CHECK-NEXT: ; %bb.3: ; %bb4
219 ; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1
220 ; CHECK-NEXT: global_load_dword v2, v[0:1], off glc
221 ; CHECK-NEXT: s_waitcnt vmcnt(0)
222 ; CHECK-NEXT: v_cmp_ge_i32_e64 s[2:3], v0, v2
223 ; CHECK-NEXT: s_branch .LBB5_1
224 ; CHECK-NEXT: .LBB5_4: ; %bb9
225 ; CHECK-NEXT: s_endpgm
227 %id = call i32 @llvm.amdgcn.workitem.id.x()
228 %tmp = sub i32 %id, %arg
232 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
233 %lsr.iv.next = add i32 %lsr.iv, 1
234 %cmp0 = icmp slt i32 %lsr.iv.next, 0
235 br i1 %cmp0, label %bb4, label %bb9
238 %load = load volatile i32, ptr addrspace(1) undef, align 4
239 %cmp1 = icmp slt i32 %tmp, %load
240 br i1 %cmp1, label %bb1, label %bb9
246 declare i32 @llvm.amdgcn.workitem.id.x()