1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-SDAG %s
3 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SDAG %s
4 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
5 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-GISEL %s
6 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
7 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
8 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
10 define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
11 ; CIT-LABEL: is_local_vgpr:
13 ; CIT-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
14 ; CIT-NEXT: s_load_dword s4, s[6:7], 0x33
15 ; CIT-NEXT: s_mov_b32 s2, 0
16 ; CIT-NEXT: s_mov_b32 s3, 0x100f000
17 ; CIT-NEXT: v_lshlrev_b32_e32 v0, 3, v0
18 ; CIT-NEXT: v_mov_b32_e32 v1, 0
19 ; CIT-NEXT: s_waitcnt lgkmcnt(0)
20 ; CIT-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64 glc
21 ; CIT-NEXT: s_waitcnt vmcnt(0)
22 ; CIT-NEXT: s_mov_b32 s2, -1
23 ; CIT-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1
24 ; CIT-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
25 ; CIT-NEXT: buffer_store_dword v0, off, s[0:3], 0
28 ; CIH-LABEL: is_local_vgpr:
30 ; CIH-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
31 ; CIH-NEXT: s_load_dword s2, s[6:7], 0x33
32 ; CIH-NEXT: v_lshlrev_b32_e32 v0, 3, v0
33 ; CIH-NEXT: s_waitcnt lgkmcnt(0)
34 ; CIH-NEXT: v_mov_b32_e32 v1, s1
35 ; CIH-NEXT: v_add_i32_e32 v0, vcc, s0, v0
36 ; CIH-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
37 ; CIH-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc
38 ; CIH-NEXT: s_waitcnt vmcnt(0)
39 ; CIH-NEXT: v_cmp_eq_u32_e32 vcc, s2, v1
40 ; CIH-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
41 ; CIH-NEXT: flat_store_dword v[0:1], v0
44 ; SI-LABEL: is_local_vgpr:
46 ; SI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
47 ; SI-NEXT: s_load_dword s4, s[8:9], 0x33
48 ; SI-NEXT: s_mov_b32 s2, 0
49 ; SI-NEXT: s_mov_b32 s3, 0x100f000
50 ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
51 ; SI-NEXT: v_mov_b32_e32 v1, 0
52 ; SI-NEXT: s_waitcnt lgkmcnt(0)
53 ; SI-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64 glc
54 ; SI-NEXT: s_waitcnt vmcnt(0)
55 ; SI-NEXT: s_mov_b32 s2, -1
56 ; SI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1
57 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
58 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
61 ; CI-SDAG-LABEL: is_local_vgpr:
63 ; CI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
64 ; CI-SDAG-NEXT: s_load_dword s2, s[8:9], 0x33
65 ; CI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0
66 ; CI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
67 ; CI-SDAG-NEXT: v_mov_b32_e32 v1, s1
68 ; CI-SDAG-NEXT: v_add_i32_e32 v0, vcc, s0, v0
69 ; CI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
70 ; CI-SDAG-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc
71 ; CI-SDAG-NEXT: s_waitcnt vmcnt(0)
72 ; CI-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, s2, v1
73 ; CI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
74 ; CI-SDAG-NEXT: flat_store_dword v[0:1], v0
75 ; CI-SDAG-NEXT: s_endpgm
77 ; GFX9-LABEL: is_local_vgpr:
79 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
80 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0
81 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
82 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc
83 ; GFX9-NEXT: s_waitcnt vmcnt(0)
84 ; GFX9-NEXT: s_mov_b64 s[0:1], src_shared_base
85 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s1, v1
86 ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
87 ; GFX9-NEXT: global_store_dword v[0:1], v0, off
90 ; CI-GISEL-LABEL: is_local_vgpr:
92 ; CI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
93 ; CI-GISEL-NEXT: s_load_dword s2, s[8:9], 0x33
94 ; CI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 3, v0
95 ; CI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
96 ; CI-GISEL-NEXT: v_mov_b32_e32 v0, s0
97 ; CI-GISEL-NEXT: v_mov_b32_e32 v1, s1
98 ; CI-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
99 ; CI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
100 ; CI-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc
101 ; CI-GISEL-NEXT: s_waitcnt vmcnt(0)
102 ; CI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s2, v1
103 ; CI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
104 ; CI-GISEL-NEXT: flat_store_dword v[0:1], v0
105 ; CI-GISEL-NEXT: s_endpgm
107 ; GFX10-LABEL: is_local_vgpr:
109 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
110 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
111 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
112 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
113 ; GFX10-NEXT: s_waitcnt vmcnt(0)
114 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3
115 ; GFX10-NEXT: s_mov_b64 s[0:1], src_shared_base
116 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
117 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
118 ; GFX10-NEXT: global_store_dword v[0:1], v0, off
119 ; GFX10-NEXT: s_endpgm
121 ; GFX11-LABEL: is_local_vgpr:
123 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
124 ; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
125 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
126 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
127 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
128 ; GFX11-NEXT: global_load_b64 v[0:1], v0, s[0:1] glc dlc
129 ; GFX11-NEXT: s_waitcnt vmcnt(0)
130 ; GFX11-NEXT: s_mov_b64 s[0:1], src_shared_base
131 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1
132 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
133 ; GFX11-NEXT: global_store_b32 v[0:1], v0, off
134 ; GFX11-NEXT: s_endpgm
135 %id = call i32 @llvm.amdgcn.workitem.id.x()
136 %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id
137 %ptr = load volatile ptr, ptr addrspace(1) %gep
138 %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
139 %ext = zext i1 %val to i32
140 store i32 %ext, ptr addrspace(1) undef
144 ; FIXME: setcc (zero_extend (setcc)), 1) not folded out, resulting in
145 ; select and vcc branch.
146 define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
147 ; CIT-LABEL: is_local_sgpr:
149 ; CIT-NEXT: s_load_dword s0, s[6:7], 0x1
150 ; CIT-NEXT: s_load_dword s1, s[6:7], 0x33
151 ; CIT-NEXT: s_waitcnt lgkmcnt(0)
152 ; CIT-NEXT: s_cmp_eq_u32 s0, s1
153 ; CIT-NEXT: s_cselect_b64 s[0:1], -1, 0
154 ; CIT-NEXT: s_andn2_b64 vcc, exec, s[0:1]
155 ; CIT-NEXT: s_cbranch_vccnz .LBB1_2
156 ; CIT-NEXT: ; %bb.1: ; %bb0
157 ; CIT-NEXT: s_mov_b32 s3, 0x100f000
158 ; CIT-NEXT: s_mov_b32 s2, -1
159 ; CIT-NEXT: v_mov_b32_e32 v0, 0
160 ; CIT-NEXT: buffer_store_dword v0, off, s[0:3], 0
161 ; CIT-NEXT: s_waitcnt vmcnt(0)
162 ; CIT-NEXT: .LBB1_2: ; %bb1
165 ; CIH-LABEL: is_local_sgpr:
167 ; CIH-NEXT: s_load_dword s0, s[6:7], 0x1
168 ; CIH-NEXT: s_load_dword s1, s[6:7], 0x33
169 ; CIH-NEXT: s_waitcnt lgkmcnt(0)
170 ; CIH-NEXT: s_cmp_eq_u32 s0, s1
171 ; CIH-NEXT: s_cselect_b64 s[0:1], -1, 0
172 ; CIH-NEXT: s_andn2_b64 vcc, exec, s[0:1]
173 ; CIH-NEXT: s_cbranch_vccnz .LBB1_2
174 ; CIH-NEXT: ; %bb.1: ; %bb0
175 ; CIH-NEXT: v_mov_b32_e32 v0, 0
176 ; CIH-NEXT: flat_store_dword v[0:1], v0
177 ; CIH-NEXT: s_waitcnt vmcnt(0)
178 ; CIH-NEXT: .LBB1_2: ; %bb1
181 ; SI-LABEL: is_local_sgpr:
183 ; SI-NEXT: s_load_dword s0, s[8:9], 0x1
184 ; SI-NEXT: s_load_dword s1, s[8:9], 0x33
185 ; SI-NEXT: s_waitcnt lgkmcnt(0)
186 ; SI-NEXT: s_cmp_eq_u32 s0, s1
187 ; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
188 ; SI-NEXT: s_andn2_b64 vcc, exec, s[0:1]
189 ; SI-NEXT: s_cbranch_vccnz .LBB1_2
190 ; SI-NEXT: ; %bb.1: ; %bb0
191 ; SI-NEXT: s_mov_b32 s3, 0x100f000
192 ; SI-NEXT: s_mov_b32 s2, -1
193 ; SI-NEXT: v_mov_b32_e32 v0, 0
194 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
195 ; SI-NEXT: s_waitcnt vmcnt(0)
196 ; SI-NEXT: .LBB1_2: ; %bb1
199 ; CI-SDAG-LABEL: is_local_sgpr:
201 ; CI-SDAG-NEXT: s_load_dword s0, s[8:9], 0x1
202 ; CI-SDAG-NEXT: s_load_dword s1, s[8:9], 0x33
203 ; CI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
204 ; CI-SDAG-NEXT: s_cmp_eq_u32 s0, s1
205 ; CI-SDAG-NEXT: s_cselect_b64 s[0:1], -1, 0
206 ; CI-SDAG-NEXT: s_andn2_b64 vcc, exec, s[0:1]
207 ; CI-SDAG-NEXT: s_cbranch_vccnz .LBB1_2
208 ; CI-SDAG-NEXT: ; %bb.1: ; %bb0
209 ; CI-SDAG-NEXT: v_mov_b32_e32 v0, 0
210 ; CI-SDAG-NEXT: flat_store_dword v[0:1], v0
211 ; CI-SDAG-NEXT: s_waitcnt vmcnt(0)
212 ; CI-SDAG-NEXT: .LBB1_2: ; %bb1
213 ; CI-SDAG-NEXT: s_endpgm
215 ; GFX9-SDAG-LABEL: is_local_sgpr:
216 ; GFX9-SDAG: ; %bb.0:
217 ; GFX9-SDAG-NEXT: s_load_dword s2, s[8:9], 0x4
218 ; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], src_shared_base
219 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
220 ; GFX9-SDAG-NEXT: s_cmp_eq_u32 s2, s1
221 ; GFX9-SDAG-NEXT: s_cselect_b64 s[0:1], -1, 0
222 ; GFX9-SDAG-NEXT: s_andn2_b64 vcc, exec, s[0:1]
223 ; GFX9-SDAG-NEXT: s_cbranch_vccnz .LBB1_2
224 ; GFX9-SDAG-NEXT: ; %bb.1: ; %bb0
225 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
226 ; GFX9-SDAG-NEXT: global_store_dword v[0:1], v0, off
227 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
228 ; GFX9-SDAG-NEXT: .LBB1_2: ; %bb1
229 ; GFX9-SDAG-NEXT: s_endpgm
231 ; CI-GISEL-LABEL: is_local_sgpr:
233 ; CI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
234 ; CI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
235 ; CI-GISEL-NEXT: s_load_dword s0, s[8:9], 0x33
236 ; CI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
237 ; CI-GISEL-NEXT: s_cmp_lg_u32 s1, s0
238 ; CI-GISEL-NEXT: s_cbranch_scc1 .LBB1_2
239 ; CI-GISEL-NEXT: ; %bb.1: ; %bb0
240 ; CI-GISEL-NEXT: v_mov_b32_e32 v0, 0
241 ; CI-GISEL-NEXT: flat_store_dword v[0:1], v0
242 ; CI-GISEL-NEXT: s_waitcnt vmcnt(0)
243 ; CI-GISEL-NEXT: .LBB1_2: ; %bb1
244 ; CI-GISEL-NEXT: s_endpgm
246 ; GFX9-GISEL-LABEL: is_local_sgpr:
247 ; GFX9-GISEL: ; %bb.0:
248 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
249 ; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], src_shared_base
250 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
251 ; GFX9-GISEL-NEXT: s_cmp_lg_u32 s1, s3
252 ; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB1_2
253 ; GFX9-GISEL-NEXT: ; %bb.1: ; %bb0
254 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0
255 ; GFX9-GISEL-NEXT: global_store_dword v[0:1], v0, off
256 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
257 ; GFX9-GISEL-NEXT: .LBB1_2: ; %bb1
258 ; GFX9-GISEL-NEXT: s_endpgm
260 ; GFX10-LABEL: is_local_sgpr:
262 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
263 ; GFX10-NEXT: s_mov_b64 s[2:3], src_shared_base
264 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
265 ; GFX10-NEXT: s_cmp_lg_u32 s1, s3
266 ; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
267 ; GFX10-NEXT: ; %bb.1: ; %bb0
268 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
269 ; GFX10-NEXT: global_store_dword v[0:1], v0, off
270 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
271 ; GFX10-NEXT: .LBB1_2: ; %bb1
272 ; GFX10-NEXT: s_endpgm
274 ; GFX11-LABEL: is_local_sgpr:
276 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
277 ; GFX11-NEXT: s_mov_b64 s[2:3], src_shared_base
278 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
279 ; GFX11-NEXT: s_cmp_lg_u32 s1, s3
280 ; GFX11-NEXT: s_cbranch_scc1 .LBB1_2
281 ; GFX11-NEXT: ; %bb.1: ; %bb0
282 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
283 ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
284 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
285 ; GFX11-NEXT: .LBB1_2: ; %bb1
286 ; GFX11-NEXT: s_endpgm
287 %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
288 br i1 %val, label %bb0, label %bb1
291 store volatile i32 0, ptr addrspace(1) undef
298 !llvm.module.flags = !{!0}
299 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
300 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
302 ; GFX10-GISEL: {{.*}}
303 ; GFX11-GISEL: {{.*}}