1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V3 %s
3 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s
4 ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s
6 ; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V3 %s
7 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s
8 ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s
10 define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) {
11 ; GFX8V3-LABEL: addrspacecast:
13 ; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
14 ; GFX8V3-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x40
15 ; GFX8V3-NEXT: v_mov_b32_e32 v4, 1
16 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
17 ; GFX8V3-NEXT: s_cmp_lg_u32 s0, -1
18 ; GFX8V3-NEXT: s_cselect_b32 s3, s3, 0
19 ; GFX8V3-NEXT: s_cselect_b32 s0, s0, 0
20 ; GFX8V3-NEXT: s_cmp_lg_u32 s1, -1
21 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
22 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s3
23 ; GFX8V3-NEXT: s_cselect_b32 s0, s2, 0
24 ; GFX8V3-NEXT: s_cselect_b32 s1, s1, 0
25 ; GFX8V3-NEXT: v_mov_b32_e32 v2, s1
26 ; GFX8V3-NEXT: v_mov_b32_e32 v3, s0
27 ; GFX8V3-NEXT: flat_store_dword v[0:1], v4
28 ; GFX8V3-NEXT: s_waitcnt vmcnt(0)
29 ; GFX8V3-NEXT: v_mov_b32_e32 v0, 2
30 ; GFX8V3-NEXT: flat_store_dword v[2:3], v0
31 ; GFX8V3-NEXT: s_waitcnt vmcnt(0)
32 ; GFX8V3-NEXT: s_endpgm
34 ; GFX8V4-LABEL: addrspacecast:
36 ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
37 ; GFX8V4-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x40
38 ; GFX8V4-NEXT: v_mov_b32_e32 v4, 1
39 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
40 ; GFX8V4-NEXT: s_cmp_lg_u32 s0, -1
41 ; GFX8V4-NEXT: s_cselect_b32 s3, s3, 0
42 ; GFX8V4-NEXT: s_cselect_b32 s0, s0, 0
43 ; GFX8V4-NEXT: s_cmp_lg_u32 s1, -1
44 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0
45 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s3
46 ; GFX8V4-NEXT: s_cselect_b32 s0, s2, 0
47 ; GFX8V4-NEXT: s_cselect_b32 s1, s1, 0
48 ; GFX8V4-NEXT: v_mov_b32_e32 v2, s1
49 ; GFX8V4-NEXT: v_mov_b32_e32 v3, s0
50 ; GFX8V4-NEXT: flat_store_dword v[0:1], v4
51 ; GFX8V4-NEXT: s_waitcnt vmcnt(0)
52 ; GFX8V4-NEXT: v_mov_b32_e32 v0, 2
53 ; GFX8V4-NEXT: flat_store_dword v[2:3], v0
54 ; GFX8V4-NEXT: s_waitcnt vmcnt(0)
55 ; GFX8V4-NEXT: s_endpgm
57 ; GFX8V5-LABEL: addrspacecast:
59 ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
60 ; GFX8V5-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0xc8
61 ; GFX8V5-NEXT: v_mov_b32_e32 v4, 1
62 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
63 ; GFX8V5-NEXT: s_cmp_lg_u32 s0, -1
64 ; GFX8V5-NEXT: s_cselect_b32 s2, s2, 0
65 ; GFX8V5-NEXT: s_cselect_b32 s0, s0, 0
66 ; GFX8V5-NEXT: s_cmp_lg_u32 s1, -1
67 ; GFX8V5-NEXT: v_mov_b32_e32 v0, s0
68 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s2
69 ; GFX8V5-NEXT: s_cselect_b32 s0, s3, 0
70 ; GFX8V5-NEXT: s_cselect_b32 s1, s1, 0
71 ; GFX8V5-NEXT: v_mov_b32_e32 v2, s1
72 ; GFX8V5-NEXT: v_mov_b32_e32 v3, s0
73 ; GFX8V5-NEXT: flat_store_dword v[0:1], v4
74 ; GFX8V5-NEXT: s_waitcnt vmcnt(0)
75 ; GFX8V5-NEXT: v_mov_b32_e32 v0, 2
76 ; GFX8V5-NEXT: flat_store_dword v[2:3], v0
77 ; GFX8V5-NEXT: s_waitcnt vmcnt(0)
78 ; GFX8V5-NEXT: s_endpgm
80 ; GFX9V3-LABEL: addrspacecast:
82 ; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
83 ; GFX9V3-NEXT: s_mov_b64 s[2:3], src_private_base
84 ; GFX9V3-NEXT: s_mov_b64 s[4:5], src_shared_base
85 ; GFX9V3-NEXT: v_mov_b32_e32 v4, 1
86 ; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
87 ; GFX9V3-NEXT: s_cmp_lg_u32 s0, -1
88 ; GFX9V3-NEXT: s_cselect_b32 s2, s3, 0
89 ; GFX9V3-NEXT: s_cselect_b32 s0, s0, 0
90 ; GFX9V3-NEXT: s_cmp_lg_u32 s1, -1
91 ; GFX9V3-NEXT: v_mov_b32_e32 v0, s0
92 ; GFX9V3-NEXT: v_mov_b32_e32 v1, s2
93 ; GFX9V3-NEXT: s_cselect_b32 s0, s5, 0
94 ; GFX9V3-NEXT: s_cselect_b32 s1, s1, 0
95 ; GFX9V3-NEXT: v_mov_b32_e32 v2, s1
96 ; GFX9V3-NEXT: v_mov_b32_e32 v3, s0
97 ; GFX9V3-NEXT: flat_store_dword v[0:1], v4
98 ; GFX9V3-NEXT: s_waitcnt vmcnt(0)
99 ; GFX9V3-NEXT: v_mov_b32_e32 v0, 2
100 ; GFX9V3-NEXT: flat_store_dword v[2:3], v0
101 ; GFX9V3-NEXT: s_waitcnt vmcnt(0)
102 ; GFX9V3-NEXT: s_endpgm
104 ; GFX9V4-LABEL: addrspacecast:
106 ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
107 ; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
108 ; GFX9V4-NEXT: s_mov_b64 s[4:5], src_shared_base
109 ; GFX9V4-NEXT: v_mov_b32_e32 v4, 1
110 ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
111 ; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1
112 ; GFX9V4-NEXT: s_cselect_b32 s2, s3, 0
113 ; GFX9V4-NEXT: s_cselect_b32 s0, s0, 0
114 ; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1
115 ; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
116 ; GFX9V4-NEXT: v_mov_b32_e32 v1, s2
117 ; GFX9V4-NEXT: s_cselect_b32 s0, s5, 0
118 ; GFX9V4-NEXT: s_cselect_b32 s1, s1, 0
119 ; GFX9V4-NEXT: v_mov_b32_e32 v2, s1
120 ; GFX9V4-NEXT: v_mov_b32_e32 v3, s0
121 ; GFX9V4-NEXT: flat_store_dword v[0:1], v4
122 ; GFX9V4-NEXT: s_waitcnt vmcnt(0)
123 ; GFX9V4-NEXT: v_mov_b32_e32 v0, 2
124 ; GFX9V4-NEXT: flat_store_dword v[2:3], v0
125 ; GFX9V4-NEXT: s_waitcnt vmcnt(0)
126 ; GFX9V4-NEXT: s_endpgm
128 ; GFX9V5-LABEL: addrspacecast:
130 ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
131 ; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
132 ; GFX9V5-NEXT: s_mov_b64 s[4:5], src_shared_base
133 ; GFX9V5-NEXT: v_mov_b32_e32 v4, 1
134 ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
135 ; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1
136 ; GFX9V5-NEXT: s_cselect_b32 s2, s3, 0
137 ; GFX9V5-NEXT: s_cselect_b32 s0, s0, 0
138 ; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1
139 ; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
140 ; GFX9V5-NEXT: v_mov_b32_e32 v1, s2
141 ; GFX9V5-NEXT: s_cselect_b32 s0, s5, 0
142 ; GFX9V5-NEXT: s_cselect_b32 s1, s1, 0
143 ; GFX9V5-NEXT: v_mov_b32_e32 v2, s1
144 ; GFX9V5-NEXT: v_mov_b32_e32 v3, s0
145 ; GFX9V5-NEXT: flat_store_dword v[0:1], v4
146 ; GFX9V5-NEXT: s_waitcnt vmcnt(0)
147 ; GFX9V5-NEXT: v_mov_b32_e32 v0, 2
148 ; GFX9V5-NEXT: flat_store_dword v[2:3], v0
149 ; GFX9V5-NEXT: s_waitcnt vmcnt(0)
150 ; GFX9V5-NEXT: s_endpgm
151 %flat.private = addrspacecast ptr addrspace(5) %ptr.private to ptr
152 %flat.local = addrspacecast ptr addrspace(3) %ptr.local to ptr
153 store volatile i32 1, ptr %flat.private
154 store volatile i32 2, ptr %flat.local
158 define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
159 ; GFX8V3-LABEL: llvm_amdgcn_is_shared:
161 ; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x40
162 ; GFX8V3-NEXT: s_load_dword s1, s[6:7], 0x4
163 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
164 ; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0
165 ; GFX8V3-NEXT: s_cselect_b64 s[0:1], -1, 0
166 ; GFX8V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
167 ; GFX8V3-NEXT: flat_store_dword v[0:1], v0
168 ; GFX8V3-NEXT: s_waitcnt vmcnt(0)
169 ; GFX8V3-NEXT: s_endpgm
171 ; GFX8V4-LABEL: llvm_amdgcn_is_shared:
173 ; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x40
174 ; GFX8V4-NEXT: s_load_dword s1, s[6:7], 0x4
175 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
176 ; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0
177 ; GFX8V4-NEXT: s_cselect_b64 s[0:1], -1, 0
178 ; GFX8V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
179 ; GFX8V4-NEXT: flat_store_dword v[0:1], v0
180 ; GFX8V4-NEXT: s_waitcnt vmcnt(0)
181 ; GFX8V4-NEXT: s_endpgm
183 ; GFX8V5-LABEL: llvm_amdgcn_is_shared:
185 ; GFX8V5-NEXT: s_load_dword s0, s[4:5], 0xcc
186 ; GFX8V5-NEXT: s_load_dword s1, s[4:5], 0x4
187 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
188 ; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0
189 ; GFX8V5-NEXT: s_cselect_b64 s[0:1], -1, 0
190 ; GFX8V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
191 ; GFX8V5-NEXT: flat_store_dword v[0:1], v0
192 ; GFX8V5-NEXT: s_waitcnt vmcnt(0)
193 ; GFX8V5-NEXT: s_endpgm
195 ; GFX9V3-LABEL: llvm_amdgcn_is_shared:
197 ; GFX9V3-NEXT: s_load_dword s2, s[4:5], 0x4
198 ; GFX9V3-NEXT: s_mov_b64 s[0:1], src_shared_base
199 ; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
200 ; GFX9V3-NEXT: s_cmp_eq_u32 s2, s1
201 ; GFX9V3-NEXT: s_cselect_b64 s[0:1], -1, 0
202 ; GFX9V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
203 ; GFX9V3-NEXT: global_store_dword v[0:1], v0, off
204 ; GFX9V3-NEXT: s_waitcnt vmcnt(0)
205 ; GFX9V3-NEXT: s_endpgm
207 ; GFX9V4-LABEL: llvm_amdgcn_is_shared:
209 ; GFX9V4-NEXT: s_load_dword s2, s[4:5], 0x4
210 ; GFX9V4-NEXT: s_mov_b64 s[0:1], src_shared_base
211 ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
212 ; GFX9V4-NEXT: s_cmp_eq_u32 s2, s1
213 ; GFX9V4-NEXT: s_cselect_b64 s[0:1], -1, 0
214 ; GFX9V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
215 ; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
216 ; GFX9V4-NEXT: s_waitcnt vmcnt(0)
217 ; GFX9V4-NEXT: s_endpgm
219 ; GFX9V5-LABEL: llvm_amdgcn_is_shared:
221 ; GFX9V5-NEXT: s_load_dword s2, s[4:5], 0x4
222 ; GFX9V5-NEXT: s_mov_b64 s[0:1], src_shared_base
223 ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
224 ; GFX9V5-NEXT: s_cmp_eq_u32 s2, s1
225 ; GFX9V5-NEXT: s_cselect_b64 s[0:1], -1, 0
226 ; GFX9V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
227 ; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
228 ; GFX9V5-NEXT: s_waitcnt vmcnt(0)
229 ; GFX9V5-NEXT: s_endpgm
230 %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
231 %zext = zext i1 %is.shared to i32
232 store volatile i32 %zext, ptr addrspace(1) undef
236 define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
237 ; GFX8V3-LABEL: llvm_amdgcn_is_private:
239 ; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x44
240 ; GFX8V3-NEXT: s_load_dword s1, s[6:7], 0x4
241 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
242 ; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0
243 ; GFX8V3-NEXT: s_cselect_b64 s[0:1], -1, 0
244 ; GFX8V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
245 ; GFX8V3-NEXT: flat_store_dword v[0:1], v0
246 ; GFX8V3-NEXT: s_waitcnt vmcnt(0)
247 ; GFX8V3-NEXT: s_endpgm
249 ; GFX8V4-LABEL: llvm_amdgcn_is_private:
251 ; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x44
252 ; GFX8V4-NEXT: s_load_dword s1, s[6:7], 0x4
253 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
254 ; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0
255 ; GFX8V4-NEXT: s_cselect_b64 s[0:1], -1, 0
256 ; GFX8V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
257 ; GFX8V4-NEXT: flat_store_dword v[0:1], v0
258 ; GFX8V4-NEXT: s_waitcnt vmcnt(0)
259 ; GFX8V4-NEXT: s_endpgm
261 ; GFX8V5-LABEL: llvm_amdgcn_is_private:
263 ; GFX8V5-NEXT: s_load_dword s0, s[4:5], 0xc8
264 ; GFX8V5-NEXT: s_load_dword s1, s[4:5], 0x4
265 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
266 ; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0
267 ; GFX8V5-NEXT: s_cselect_b64 s[0:1], -1, 0
268 ; GFX8V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
269 ; GFX8V5-NEXT: flat_store_dword v[0:1], v0
270 ; GFX8V5-NEXT: s_waitcnt vmcnt(0)
271 ; GFX8V5-NEXT: s_endpgm
273 ; GFX9V3-LABEL: llvm_amdgcn_is_private:
275 ; GFX9V3-NEXT: s_load_dword s2, s[4:5], 0x4
276 ; GFX9V3-NEXT: s_mov_b64 s[0:1], src_private_base
277 ; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
278 ; GFX9V3-NEXT: s_cmp_eq_u32 s2, s1
279 ; GFX9V3-NEXT: s_cselect_b64 s[0:1], -1, 0
280 ; GFX9V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
281 ; GFX9V3-NEXT: global_store_dword v[0:1], v0, off
282 ; GFX9V3-NEXT: s_waitcnt vmcnt(0)
283 ; GFX9V3-NEXT: s_endpgm
285 ; GFX9V4-LABEL: llvm_amdgcn_is_private:
287 ; GFX9V4-NEXT: s_load_dword s2, s[4:5], 0x4
288 ; GFX9V4-NEXT: s_mov_b64 s[0:1], src_private_base
289 ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
290 ; GFX9V4-NEXT: s_cmp_eq_u32 s2, s1
291 ; GFX9V4-NEXT: s_cselect_b64 s[0:1], -1, 0
292 ; GFX9V4-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
293 ; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
294 ; GFX9V4-NEXT: s_waitcnt vmcnt(0)
295 ; GFX9V4-NEXT: s_endpgm
297 ; GFX9V5-LABEL: llvm_amdgcn_is_private:
299 ; GFX9V5-NEXT: s_load_dword s2, s[4:5], 0x4
300 ; GFX9V5-NEXT: s_mov_b64 s[0:1], src_private_base
301 ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
302 ; GFX9V5-NEXT: s_cmp_eq_u32 s2, s1
303 ; GFX9V5-NEXT: s_cselect_b64 s[0:1], -1, 0
304 ; GFX9V5-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
305 ; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
306 ; GFX9V5-NEXT: s_waitcnt vmcnt(0)
307 ; GFX9V5-NEXT: s_endpgm
308 %is.private = call i1 @llvm.amdgcn.is.private(ptr %ptr)
309 %zext = zext i1 %is.private to i32
310 store volatile i32 %zext, ptr addrspace(1) undef
314 define amdgpu_kernel void @llvm_trap() {
315 ; GFX8V3-LABEL: llvm_trap:
317 ; GFX8V3-NEXT: s_mov_b64 s[0:1], s[4:5]
318 ; GFX8V3-NEXT: s_trap 2
320 ; GFX8V4-LABEL: llvm_trap:
322 ; GFX8V4-NEXT: s_mov_b64 s[0:1], s[4:5]
323 ; GFX8V4-NEXT: s_trap 2
325 ; GFX8V5-LABEL: llvm_trap:
327 ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xc8
328 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
329 ; GFX8V5-NEXT: s_trap 2
331 ; GFX9V3-LABEL: llvm_trap:
333 ; GFX9V3-NEXT: s_mov_b64 s[0:1], s[4:5]
334 ; GFX9V3-NEXT: s_trap 2
336 ; GFX9V4-LABEL: llvm_trap:
338 ; GFX9V4-NEXT: s_trap 2
340 ; GFX9V5-LABEL: llvm_trap:
342 ; GFX9V5-NEXT: s_trap 2
343 call void @llvm.trap()
347 define amdgpu_kernel void @llvm_debugtrap() {
348 ; GFX8V3-LABEL: llvm_debugtrap:
350 ; GFX8V3-NEXT: s_trap 3
352 ; GFX8V4-LABEL: llvm_debugtrap:
354 ; GFX8V4-NEXT: s_trap 3
356 ; GFX8V5-LABEL: llvm_debugtrap:
358 ; GFX8V5-NEXT: s_trap 3
360 ; GFX9V3-LABEL: llvm_debugtrap:
362 ; GFX9V3-NEXT: s_trap 3
364 ; GFX9V4-LABEL: llvm_debugtrap:
366 ; GFX9V4-NEXT: s_trap 3
368 ; GFX9V5-LABEL: llvm_debugtrap:
370 ; GFX9V5-NEXT: s_trap 3
371 call void @llvm.debugtrap()
375 define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) {
376 ; GFX8V3-LABEL: llvm_amdgcn_queue_ptr:
378 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s6
379 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s7
380 ; GFX8V3-NEXT: s_add_u32 s0, s8, 8
381 ; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc
382 ; GFX8V3-NEXT: s_addc_u32 s1, s9, 0
383 ; GFX8V3-NEXT: s_waitcnt vmcnt(0)
384 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
385 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s1
386 ; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc
387 ; GFX8V3-NEXT: s_waitcnt vmcnt(0)
388 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s4
389 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s5
390 ; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc
391 ; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
392 ; GFX8V3-NEXT: v_mov_b32_e32 v2, s10
393 ; GFX8V3-NEXT: v_mov_b32_e32 v3, s11
394 ; GFX8V3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
395 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
396 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s1
397 ; GFX8V3-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
398 ; GFX8V3-NEXT: s_waitcnt vmcnt(0)
399 ; GFX8V3-NEXT: s_endpgm
401 ; GFX8V4-LABEL: llvm_amdgcn_queue_ptr:
403 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s6
404 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s7
405 ; GFX8V4-NEXT: s_add_u32 s0, s8, 8
406 ; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc
407 ; GFX8V4-NEXT: s_addc_u32 s1, s9, 0
408 ; GFX8V4-NEXT: s_waitcnt vmcnt(0)
409 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0
410 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s1
411 ; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc
412 ; GFX8V4-NEXT: s_waitcnt vmcnt(0)
413 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s4
414 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s5
415 ; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc
416 ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
417 ; GFX8V4-NEXT: v_mov_b32_e32 v2, s10
418 ; GFX8V4-NEXT: v_mov_b32_e32 v3, s11
419 ; GFX8V4-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
420 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0
421 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s1
422 ; GFX8V4-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
423 ; GFX8V4-NEXT: s_waitcnt vmcnt(0)
424 ; GFX8V4-NEXT: s_endpgm
426 ; GFX8V5-LABEL: llvm_amdgcn_queue_ptr:
428 ; GFX8V5-NEXT: s_add_u32 s0, s6, 8
429 ; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc
430 ; GFX8V5-NEXT: s_addc_u32 s1, s7, 0
431 ; GFX8V5-NEXT: s_waitcnt vmcnt(0)
432 ; GFX8V5-NEXT: v_mov_b32_e32 v0, s0
433 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s1
434 ; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc
435 ; GFX8V5-NEXT: s_waitcnt vmcnt(0)
436 ; GFX8V5-NEXT: v_mov_b32_e32 v0, s4
437 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s5
438 ; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc
439 ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
440 ; GFX8V5-NEXT: v_mov_b32_e32 v2, s8
441 ; GFX8V5-NEXT: v_mov_b32_e32 v3, s9
442 ; GFX8V5-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
443 ; GFX8V5-NEXT: v_mov_b32_e32 v0, s0
444 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s1
445 ; GFX8V5-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
446 ; GFX8V5-NEXT: s_waitcnt vmcnt(0)
447 ; GFX8V5-NEXT: s_endpgm
449 ; GFX9V3-LABEL: llvm_amdgcn_queue_ptr:
451 ; GFX9V3-NEXT: v_mov_b32_e32 v2, 0
452 ; GFX9V3-NEXT: global_load_ubyte v0, v2, s[6:7] glc
453 ; GFX9V3-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc
454 ; GFX9V3-NEXT: global_load_ubyte v0, v2, s[4:5] glc
455 ; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
456 ; GFX9V3-NEXT: s_waitcnt vmcnt(0)
457 ; GFX9V3-NEXT: v_mov_b32_e32 v0, s10
458 ; GFX9V3-NEXT: v_mov_b32_e32 v1, s11
459 ; GFX9V3-NEXT: ; kill: killed $sgpr6_sgpr7
460 ; GFX9V3-NEXT: ; kill: killed $sgpr4_sgpr5
461 ; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
462 ; GFX9V3-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
463 ; GFX9V3-NEXT: s_waitcnt vmcnt(0)
464 ; GFX9V3-NEXT: s_endpgm
466 ; GFX9V4-LABEL: llvm_amdgcn_queue_ptr:
468 ; GFX9V4-NEXT: v_mov_b32_e32 v2, 0
469 ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[6:7] glc
470 ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc
471 ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[4:5] glc
472 ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
473 ; GFX9V4-NEXT: s_waitcnt vmcnt(0)
474 ; GFX9V4-NEXT: v_mov_b32_e32 v0, s10
475 ; GFX9V4-NEXT: v_mov_b32_e32 v1, s11
476 ; GFX9V4-NEXT: ; kill: killed $sgpr6_sgpr7
477 ; GFX9V4-NEXT: ; kill: killed $sgpr4_sgpr5
478 ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
479 ; GFX9V4-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
480 ; GFX9V4-NEXT: s_waitcnt vmcnt(0)
481 ; GFX9V4-NEXT: s_endpgm
483 ; GFX9V5-LABEL: llvm_amdgcn_queue_ptr:
485 ; GFX9V5-NEXT: v_mov_b32_e32 v2, 0
486 ; GFX9V5-NEXT: global_load_ubyte v0, v2, s[0:1] glc
487 ; GFX9V5-NEXT: global_load_ubyte v0, v2, s[6:7] offset:8 glc
488 ; GFX9V5-NEXT: global_load_ubyte v0, v2, s[4:5] glc
489 ; GFX9V5-NEXT: ; kill: killed $sgpr0_sgpr1
490 ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
491 ; GFX9V5-NEXT: s_waitcnt vmcnt(0)
492 ; GFX9V5-NEXT: v_mov_b32_e32 v0, s8
493 ; GFX9V5-NEXT: v_mov_b32_e32 v1, s9
494 ; GFX9V5-NEXT: ; kill: killed $sgpr4_sgpr5
495 ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
496 ; GFX9V5-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
497 ; GFX9V5-NEXT: s_waitcnt vmcnt(0)
498 ; GFX9V5-NEXT: s_endpgm
499 %queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
500 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
501 %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
502 %dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
503 %queue.load = load volatile i8, ptr addrspace(4) %queue.ptr
504 %implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr
505 %dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr
506 store volatile i64 %dispatch.id, ptr addrspace(1) %ptr
510 declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr()
511 declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
512 declare i64 @llvm.amdgcn.dispatch.id()
513 declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
514 declare i1 @llvm.amdgcn.is.shared(ptr)
515 declare i1 @llvm.amdgcn.is.private(ptr)
516 declare void @llvm.trap()
517 declare void @llvm.debugtrap()
519 !llvm.module.flags = !{!0}
520 !0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}