1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
4 ; Make sure the branch targets are correct after lowering llvm.amdgcn.if
6 define i32 @divergent_if_swap_brtarget_order0(i32 %value) {
7 ; CHECK-LABEL: divergent_if_swap_brtarget_order0:
8 ; CHECK: ; %bb.0: ; %entry
9 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
11 ; CHECK-NEXT: ; implicit-def: $vgpr0
12 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
13 ; CHECK-NEXT: s_cbranch_execz .LBB0_2
14 ; CHECK-NEXT: ; %bb.1: ; %if.true
15 ; CHECK-NEXT: global_load_dword v0, v[0:1], off glc
16 ; CHECK-NEXT: s_waitcnt vmcnt(0)
17 ; CHECK-NEXT: .LBB0_2: ; %endif
18 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
19 ; CHECK-NEXT: s_setpc_b64 s[30:31]
21 %c = icmp ne i32 %value, 0
22 br i1 %c, label %if.true, label %endif
25 %val = load volatile i32, ptr addrspace(1) undef
29 %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
33 define i32 @divergent_if_swap_brtarget_order1(i32 %value) {
34 ; CHECK-LABEL: divergent_if_swap_brtarget_order1:
35 ; CHECK: ; %bb.0: ; %entry
36 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
38 ; CHECK-NEXT: ; implicit-def: $vgpr0
39 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
40 ; CHECK-NEXT: s_cbranch_execz .LBB1_2
41 ; CHECK-NEXT: ; %bb.1: ; %if.true
42 ; CHECK-NEXT: global_load_dword v0, v[0:1], off glc
43 ; CHECK-NEXT: s_waitcnt vmcnt(0)
44 ; CHECK-NEXT: .LBB1_2: ; %endif
45 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
46 ; CHECK-NEXT: s_setpc_b64 s[30:31]
48 %c = icmp ne i32 %value, 0
49 br i1 %c, label %if.true, label %endif
52 %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
56 %val = load volatile i32, ptr addrspace(1) undef
60 ; Make sure and 1 is inserted on llvm.amdgcn.if
61 define i32 @divergent_if_nonboolean_condition0(i32 %value) {
62 ; CHECK-LABEL: divergent_if_nonboolean_condition0:
63 ; CHECK: ; %bb.0: ; %entry
64 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65 ; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
66 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
67 ; CHECK-NEXT: ; implicit-def: $vgpr0
68 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
69 ; CHECK-NEXT: s_cbranch_execz .LBB2_2
70 ; CHECK-NEXT: ; %bb.1: ; %if.true
71 ; CHECK-NEXT: global_load_dword v0, v[0:1], off glc
72 ; CHECK-NEXT: s_waitcnt vmcnt(0)
73 ; CHECK-NEXT: .LBB2_2: ; %endif
74 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
75 ; CHECK-NEXT: s_setpc_b64 s[30:31]
77 %c = trunc i32 %value to i1
78 br i1 %c, label %if.true, label %endif
81 %val = load volatile i32, ptr addrspace(1) undef
85 %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
89 ; Make sure and 1 is inserted on llvm.amdgcn.if
90 define i32 @divergent_if_nonboolean_condition1(ptr addrspace(1) %ptr) {
91 ; CHECK-LABEL: divergent_if_nonboolean_condition1:
92 ; CHECK: ; %bb.0: ; %entry
93 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94 ; CHECK-NEXT: global_load_dword v0, v[0:1], off
95 ; CHECK-NEXT: s_waitcnt vmcnt(0)
96 ; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
97 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
98 ; CHECK-NEXT: ; implicit-def: $vgpr0
99 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
100 ; CHECK-NEXT: s_cbranch_execz .LBB3_2
101 ; CHECK-NEXT: ; %bb.1: ; %if.true
102 ; CHECK-NEXT: global_load_dword v0, v[0:1], off glc
103 ; CHECK-NEXT: s_waitcnt vmcnt(0)
104 ; CHECK-NEXT: .LBB3_2: ; %endif
105 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
106 ; CHECK-NEXT: s_setpc_b64 s[30:31]
108 %value = load i32, ptr addrspace(1) %ptr
109 %c = trunc i32 %value to i1
110 br i1 %c, label %if.true, label %endif
113 %val = load volatile i32, ptr addrspace(1) undef
117 %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
121 @external_constant = external addrspace(4) constant i32, align 4
122 @const.ptr = external addrspace(4) constant ptr, align 4
124 ; Make sure this case compiles. G_ICMP was mis-mapped due to having
125 ; the result register class constrained by llvm.amdgcn.if lowering.
126 define void @constrained_if_register_class() {
127 ; CHECK-LABEL: constrained_if_register_class:
128 ; CHECK: ; %bb.0: ; %bb
129 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130 ; CHECK-NEXT: s_getpc_b64 s[4:5]
131 ; CHECK-NEXT: s_add_u32 s4, s4, external_constant@gotpcrel32@lo+4
132 ; CHECK-NEXT: s_addc_u32 s5, s5, external_constant@gotpcrel32@hi+12
133 ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
134 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
135 ; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0
136 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
137 ; CHECK-NEXT: s_cmp_lg_u32 s4, 0
138 ; CHECK-NEXT: s_cbranch_scc0 .LBB4_2
139 ; CHECK-NEXT: .LBB4_1: ; %bb12
140 ; CHECK-NEXT: s_setpc_b64 s[30:31]
141 ; CHECK-NEXT: .LBB4_2: ; %bb2
142 ; CHECK-NEXT: s_getpc_b64 s[4:5]
143 ; CHECK-NEXT: s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4
144 ; CHECK-NEXT: s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+12
145 ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
146 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
147 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
148 ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
149 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
150 ; CHECK-NEXT: global_load_dword v0, v0, s[4:5]
151 ; CHECK-NEXT: s_mov_b32 s4, -1
152 ; CHECK-NEXT: s_waitcnt vmcnt(0)
153 ; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0
154 ; CHECK-NEXT: s_cbranch_vccnz .LBB4_4
155 ; CHECK-NEXT: ; %bb.3: ; %bb7
156 ; CHECK-NEXT: s_mov_b32 s4, 0
157 ; CHECK-NEXT: .LBB4_4: ; %bb8
158 ; CHECK-NEXT: s_cmp_lg_u32 s4, 0
159 ; CHECK-NEXT: s_cbranch_scc1 .LBB4_1
160 ; CHECK-NEXT: ; %bb.5: ; %bb11
161 ; CHECK-NEXT: v_mov_b32_e32 v0, 4.0
162 ; CHECK-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen
163 ; CHECK-NEXT: s_waitcnt vmcnt(0)
164 ; CHECK-NEXT: s_setpc_b64 s[30:31]
166 %tmp = load i32, ptr addrspace(4) @external_constant
167 %tmp1 = icmp ne i32 %tmp, 0
168 br i1 %tmp1, label %bb12, label %bb2
171 %ptr = load ptr, ptr addrspace(4) @const.ptr
172 %tmp4 = load float, ptr %ptr, align 4
173 %tmp5 = fcmp olt float %tmp4, 1.0
174 %tmp6 = or i1 %tmp5, false
175 br i1 %tmp6, label %bb8, label %bb7
181 %tmp9 = phi i32 [ 0, %bb7 ], [ -1, %bb2 ]
182 %tmp10 = icmp eq i32 %tmp9, 0
183 br i1 %tmp10, label %bb11, label %bb12
186 store float 4.0, ptr addrspace(5) undef, align 4
193 define amdgpu_kernel void @break_loop(i32 %arg) {
194 ; CHECK-LABEL: break_loop:
195 ; CHECK: ; %bb.0: ; %bb
196 ; CHECK-NEXT: s_load_dword s2, s[6:7], 0x0
197 ; CHECK-NEXT: s_mov_b64 s[0:1], 0
198 ; CHECK-NEXT: ; implicit-def: $vgpr1
199 ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
200 ; CHECK-NEXT: v_subrev_u32_e32 v0, s2, v0
201 ; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
202 ; CHECK-NEXT: s_branch .LBB5_3
203 ; CHECK-NEXT: .LBB5_1: ; %bb4
204 ; CHECK-NEXT: ; in Loop: Header=BB5_3 Depth=1
205 ; CHECK-NEXT: global_load_dword v2, v[0:1], off glc
206 ; CHECK-NEXT: s_waitcnt vmcnt(0)
207 ; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
208 ; CHECK-NEXT: v_cmp_ge_i32_e32 vcc, v0, v2
209 ; CHECK-NEXT: s_and_b64 s[4:5], exec, vcc
210 ; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
211 ; CHECK-NEXT: .LBB5_2: ; %Flow
212 ; CHECK-NEXT: ; in Loop: Header=BB5_3 Depth=1
213 ; CHECK-NEXT: s_and_b64 s[4:5], exec, s[2:3]
214 ; CHECK-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
215 ; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1]
216 ; CHECK-NEXT: s_cbranch_execz .LBB5_5
217 ; CHECK-NEXT: .LBB5_3: ; %bb1
218 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
219 ; CHECK-NEXT: v_add_u32_e32 v1, 1, v1
220 ; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
221 ; CHECK-NEXT: s_and_b64 s[4:5], exec, -1
222 ; CHECK-NEXT: v_cmp_le_i32_e32 vcc, 0, v1
223 ; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
224 ; CHECK-NEXT: s_cbranch_vccz .LBB5_1
225 ; CHECK-NEXT: ; %bb.4: ; in Loop: Header=BB5_3 Depth=1
226 ; CHECK-NEXT: ; implicit-def: $vgpr1
227 ; CHECK-NEXT: s_branch .LBB5_2
228 ; CHECK-NEXT: .LBB5_5: ; %bb9
229 ; CHECK-NEXT: s_endpgm
231 %id = call i32 @llvm.amdgcn.workitem.id.x()
232 %tmp = sub i32 %id, %arg
236 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
237 %lsr.iv.next = add i32 %lsr.iv, 1
238 %cmp0 = icmp slt i32 %lsr.iv.next, 0
239 br i1 %cmp0, label %bb4, label %bb9
242 %load = load volatile i32, ptr addrspace(1) undef, align 4
243 %cmp1 = icmp slt i32 %tmp, %load
244 br i1 %cmp1, label %bb1, label %bb9
250 declare i32 @llvm.amdgcn.workitem.id.x()