1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V3 %s
3 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s
4 ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s
6 ; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V3 %s
7 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s
8 ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s
10 define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) {
11 ; GFX8V3-LABEL: addrspacecast:
13 ; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
14 ; GFX8V3-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x40
15 ; GFX8V3-NEXT: v_mov_b32_e32 v2, 1
16 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
17 ; GFX8V3-NEXT: s_mov_b32 s4, s0
18 ; GFX8V3-NEXT: s_mov_b32 s5, s3
19 ; GFX8V3-NEXT: s_cmp_lg_u32 s0, -1
20 ; GFX8V3-NEXT: s_cselect_b64 s[4:5], s[4:5], 0
21 ; GFX8V3-NEXT: s_mov_b32 s6, s1
22 ; GFX8V3-NEXT: s_mov_b32 s7, s2
23 ; GFX8V3-NEXT: s_cmp_lg_u32 s1, -1
24 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s4
25 ; GFX8V3-NEXT: s_cselect_b64 s[0:1], s[6:7], 0
26 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s5
27 ; GFX8V3-NEXT: flat_store_dword v[0:1], v2
28 ; GFX8V3-NEXT: s_waitcnt vmcnt(0)
29 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
30 ; GFX8V3-NEXT: v_mov_b32_e32 v2, 2
31 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s1
32 ; GFX8V3-NEXT: flat_store_dword v[0:1], v2
33 ; GFX8V3-NEXT: s_waitcnt vmcnt(0)
34 ; GFX8V3-NEXT: s_endpgm
36 ; GFX8V4-LABEL: addrspacecast:
38 ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
39 ; GFX8V4-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x40
40 ; GFX8V4-NEXT: v_mov_b32_e32 v2, 1
41 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
42 ; GFX8V4-NEXT: s_mov_b32 s4, s0
43 ; GFX8V4-NEXT: s_mov_b32 s5, s3
44 ; GFX8V4-NEXT: s_cmp_lg_u32 s0, -1
45 ; GFX8V4-NEXT: s_cselect_b64 s[4:5], s[4:5], 0
46 ; GFX8V4-NEXT: s_mov_b32 s6, s1
47 ; GFX8V4-NEXT: s_mov_b32 s7, s2
48 ; GFX8V4-NEXT: s_cmp_lg_u32 s1, -1
49 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s4
50 ; GFX8V4-NEXT: s_cselect_b64 s[0:1], s[6:7], 0
51 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s5
52 ; GFX8V4-NEXT: flat_store_dword v[0:1], v2
53 ; GFX8V4-NEXT: s_waitcnt vmcnt(0)
54 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0
55 ; GFX8V4-NEXT: v_mov_b32_e32 v2, 2
56 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s1
57 ; GFX8V4-NEXT: flat_store_dword v[0:1], v2
58 ; GFX8V4-NEXT: s_waitcnt vmcnt(0)
59 ; GFX8V4-NEXT: s_endpgm
61 ; GFX8V5-LABEL: addrspacecast:
63 ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
64 ; GFX8V5-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0xc8
65 ; GFX8V5-NEXT: v_mov_b32_e32 v2, 1
66 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
67 ; GFX8V5-NEXT: s_mov_b32 s4, s0
68 ; GFX8V5-NEXT: s_mov_b32 s5, s2
69 ; GFX8V5-NEXT: s_cmp_lg_u32 s0, -1
70 ; GFX8V5-NEXT: s_cselect_b64 s[4:5], s[4:5], 0
71 ; GFX8V5-NEXT: s_mov_b32 s2, s1
72 ; GFX8V5-NEXT: s_cmp_lg_u32 s1, -1
73 ; GFX8V5-NEXT: v_mov_b32_e32 v0, s4
74 ; GFX8V5-NEXT: s_cselect_b64 s[0:1], s[2:3], 0
75 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s5
76 ; GFX8V5-NEXT: flat_store_dword v[0:1], v2
77 ; GFX8V5-NEXT: s_waitcnt vmcnt(0)
78 ; GFX8V5-NEXT: v_mov_b32_e32 v0, s0
79 ; GFX8V5-NEXT: v_mov_b32_e32 v2, 2
80 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s1
81 ; GFX8V5-NEXT: flat_store_dword v[0:1], v2
82 ; GFX8V5-NEXT: s_waitcnt vmcnt(0)
83 ; GFX8V5-NEXT: s_endpgm
85 ; GFX9V3-LABEL: addrspacecast:
87 ; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
88 ; GFX9V3-NEXT: s_mov_b64 s[2:3], src_private_base
89 ; GFX9V3-NEXT: s_mov_b64 s[4:5], src_shared_base
90 ; GFX9V3-NEXT: v_mov_b32_e32 v2, 1
91 ; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
92 ; GFX9V3-NEXT: s_mov_b32 s2, s0
93 ; GFX9V3-NEXT: s_cmp_lg_u32 s0, -1
94 ; GFX9V3-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
95 ; GFX9V3-NEXT: s_mov_b32 s4, s1
96 ; GFX9V3-NEXT: s_cmp_lg_u32 s1, -1
97 ; GFX9V3-NEXT: v_mov_b32_e32 v0, s2
98 ; GFX9V3-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
99 ; GFX9V3-NEXT: v_mov_b32_e32 v1, s3
100 ; GFX9V3-NEXT: flat_store_dword v[0:1], v2
101 ; GFX9V3-NEXT: s_waitcnt vmcnt(0)
102 ; GFX9V3-NEXT: v_mov_b32_e32 v0, s0
103 ; GFX9V3-NEXT: v_mov_b32_e32 v2, 2
104 ; GFX9V3-NEXT: v_mov_b32_e32 v1, s1
105 ; GFX9V3-NEXT: flat_store_dword v[0:1], v2
106 ; GFX9V3-NEXT: s_waitcnt vmcnt(0)
107 ; GFX9V3-NEXT: s_endpgm
109 ; GFX9V4-LABEL: addrspacecast:
111 ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
112 ; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
113 ; GFX9V4-NEXT: s_mov_b64 s[4:5], src_shared_base
114 ; GFX9V4-NEXT: v_mov_b32_e32 v2, 1
115 ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
116 ; GFX9V4-NEXT: s_mov_b32 s2, s0
117 ; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1
118 ; GFX9V4-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
119 ; GFX9V4-NEXT: s_mov_b32 s4, s1
120 ; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1
121 ; GFX9V4-NEXT: v_mov_b32_e32 v0, s2
122 ; GFX9V4-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
123 ; GFX9V4-NEXT: v_mov_b32_e32 v1, s3
124 ; GFX9V4-NEXT: flat_store_dword v[0:1], v2
125 ; GFX9V4-NEXT: s_waitcnt vmcnt(0)
126 ; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
127 ; GFX9V4-NEXT: v_mov_b32_e32 v2, 2
128 ; GFX9V4-NEXT: v_mov_b32_e32 v1, s1
129 ; GFX9V4-NEXT: flat_store_dword v[0:1], v2
130 ; GFX9V4-NEXT: s_waitcnt vmcnt(0)
131 ; GFX9V4-NEXT: s_endpgm
133 ; GFX9V5-LABEL: addrspacecast:
135 ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
136 ; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
137 ; GFX9V5-NEXT: s_mov_b64 s[4:5], src_shared_base
138 ; GFX9V5-NEXT: v_mov_b32_e32 v2, 1
139 ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
140 ; GFX9V5-NEXT: s_mov_b32 s2, s0
141 ; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1
142 ; GFX9V5-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
143 ; GFX9V5-NEXT: s_mov_b32 s4, s1
144 ; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1
145 ; GFX9V5-NEXT: v_mov_b32_e32 v0, s2
146 ; GFX9V5-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
147 ; GFX9V5-NEXT: v_mov_b32_e32 v1, s3
148 ; GFX9V5-NEXT: flat_store_dword v[0:1], v2
149 ; GFX9V5-NEXT: s_waitcnt vmcnt(0)
150 ; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
151 ; GFX9V5-NEXT: v_mov_b32_e32 v2, 2
152 ; GFX9V5-NEXT: v_mov_b32_e32 v1, s1
153 ; GFX9V5-NEXT: flat_store_dword v[0:1], v2
154 ; GFX9V5-NEXT: s_waitcnt vmcnt(0)
155 ; GFX9V5-NEXT: s_endpgm
156 %flat.private = addrspacecast ptr addrspace(5) %ptr.private to ptr
157 %flat.local = addrspacecast ptr addrspace(3) %ptr.local to ptr
158 store volatile i32 1, ptr %flat.private
159 store volatile i32 2, ptr %flat.local
163 define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
164 ; GFX8V3-LABEL: llvm_amdgcn_is_shared:
166 ; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
167 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
168 ; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x40
169 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
170 ; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0
171 ; GFX8V3-NEXT: s_cselect_b32 s0, 1, 0
172 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
173 ; GFX8V3-NEXT: flat_store_dword v[0:1], v0
174 ; GFX8V3-NEXT: s_waitcnt vmcnt(0)
175 ; GFX8V3-NEXT: s_endpgm
177 ; GFX8V4-LABEL: llvm_amdgcn_is_shared:
179 ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
180 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
181 ; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x40
182 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
183 ; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0
184 ; GFX8V4-NEXT: s_cselect_b32 s0, 1, 0
185 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0
186 ; GFX8V4-NEXT: flat_store_dword v[0:1], v0
187 ; GFX8V4-NEXT: s_waitcnt vmcnt(0)
188 ; GFX8V4-NEXT: s_endpgm
190 ; GFX8V5-LABEL: llvm_amdgcn_is_shared:
192 ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
193 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
194 ; GFX8V5-NEXT: s_load_dword s0, s[4:5], 0xcc
195 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
196 ; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0
197 ; GFX8V5-NEXT: s_cselect_b32 s0, 1, 0
198 ; GFX8V5-NEXT: v_mov_b32_e32 v0, s0
199 ; GFX8V5-NEXT: flat_store_dword v[0:1], v0
200 ; GFX8V5-NEXT: s_waitcnt vmcnt(0)
201 ; GFX8V5-NEXT: s_endpgm
203 ; GFX9V3-LABEL: llvm_amdgcn_is_shared:
205 ; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
206 ; GFX9V3-NEXT: s_mov_b64 s[2:3], src_shared_base
207 ; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
208 ; GFX9V3-NEXT: s_cmp_eq_u32 s1, s3
209 ; GFX9V3-NEXT: s_cselect_b32 s0, 1, 0
210 ; GFX9V3-NEXT: v_mov_b32_e32 v0, s0
211 ; GFX9V3-NEXT: global_store_dword v[0:1], v0, off
212 ; GFX9V3-NEXT: s_waitcnt vmcnt(0)
213 ; GFX9V3-NEXT: s_endpgm
215 ; GFX9V4-LABEL: llvm_amdgcn_is_shared:
217 ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
218 ; GFX9V4-NEXT: s_mov_b64 s[2:3], src_shared_base
219 ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
220 ; GFX9V4-NEXT: s_cmp_eq_u32 s1, s3
221 ; GFX9V4-NEXT: s_cselect_b32 s0, 1, 0
222 ; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
223 ; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
224 ; GFX9V4-NEXT: s_waitcnt vmcnt(0)
225 ; GFX9V4-NEXT: s_endpgm
227 ; GFX9V5-LABEL: llvm_amdgcn_is_shared:
229 ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
230 ; GFX9V5-NEXT: s_mov_b64 s[2:3], src_shared_base
231 ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
232 ; GFX9V5-NEXT: s_cmp_eq_u32 s1, s3
233 ; GFX9V5-NEXT: s_cselect_b32 s0, 1, 0
234 ; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
235 ; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
236 ; GFX9V5-NEXT: s_waitcnt vmcnt(0)
237 ; GFX9V5-NEXT: s_endpgm
238 %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
239 %zext = zext i1 %is.shared to i32
240 store volatile i32 %zext, ptr addrspace(1) undef
244 define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
245 ; GFX8V3-LABEL: llvm_amdgcn_is_private:
247 ; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
248 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
249 ; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x44
250 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
251 ; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0
252 ; GFX8V3-NEXT: s_cselect_b32 s0, 1, 0
253 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
254 ; GFX8V3-NEXT: flat_store_dword v[0:1], v0
255 ; GFX8V3-NEXT: s_waitcnt vmcnt(0)
256 ; GFX8V3-NEXT: s_endpgm
258 ; GFX8V4-LABEL: llvm_amdgcn_is_private:
260 ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
261 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
262 ; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x44
263 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
264 ; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0
265 ; GFX8V4-NEXT: s_cselect_b32 s0, 1, 0
266 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0
267 ; GFX8V4-NEXT: flat_store_dword v[0:1], v0
268 ; GFX8V4-NEXT: s_waitcnt vmcnt(0)
269 ; GFX8V4-NEXT: s_endpgm
271 ; GFX8V5-LABEL: llvm_amdgcn_is_private:
273 ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
274 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
275 ; GFX8V5-NEXT: s_load_dword s0, s[4:5], 0xc8
276 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
277 ; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0
278 ; GFX8V5-NEXT: s_cselect_b32 s0, 1, 0
279 ; GFX8V5-NEXT: v_mov_b32_e32 v0, s0
280 ; GFX8V5-NEXT: flat_store_dword v[0:1], v0
281 ; GFX8V5-NEXT: s_waitcnt vmcnt(0)
282 ; GFX8V5-NEXT: s_endpgm
284 ; GFX9V3-LABEL: llvm_amdgcn_is_private:
286 ; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
287 ; GFX9V3-NEXT: s_mov_b64 s[2:3], src_private_base
288 ; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
289 ; GFX9V3-NEXT: s_cmp_eq_u32 s1, s3
290 ; GFX9V3-NEXT: s_cselect_b32 s0, 1, 0
291 ; GFX9V3-NEXT: v_mov_b32_e32 v0, s0
292 ; GFX9V3-NEXT: global_store_dword v[0:1], v0, off
293 ; GFX9V3-NEXT: s_waitcnt vmcnt(0)
294 ; GFX9V3-NEXT: s_endpgm
296 ; GFX9V4-LABEL: llvm_amdgcn_is_private:
298 ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
299 ; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
300 ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
301 ; GFX9V4-NEXT: s_cmp_eq_u32 s1, s3
302 ; GFX9V4-NEXT: s_cselect_b32 s0, 1, 0
303 ; GFX9V4-NEXT: v_mov_b32_e32 v0, s0
304 ; GFX9V4-NEXT: global_store_dword v[0:1], v0, off
305 ; GFX9V4-NEXT: s_waitcnt vmcnt(0)
306 ; GFX9V4-NEXT: s_endpgm
308 ; GFX9V5-LABEL: llvm_amdgcn_is_private:
310 ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
311 ; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
312 ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
313 ; GFX9V5-NEXT: s_cmp_eq_u32 s1, s3
314 ; GFX9V5-NEXT: s_cselect_b32 s0, 1, 0
315 ; GFX9V5-NEXT: v_mov_b32_e32 v0, s0
316 ; GFX9V5-NEXT: global_store_dword v[0:1], v0, off
317 ; GFX9V5-NEXT: s_waitcnt vmcnt(0)
318 ; GFX9V5-NEXT: s_endpgm
319 %is.private = call i1 @llvm.amdgcn.is.private(ptr %ptr)
320 %zext = zext i1 %is.private to i32
321 store volatile i32 %zext, ptr addrspace(1) undef
325 define amdgpu_kernel void @llvm_trap() {
326 ; GFX8V3-LABEL: llvm_trap:
328 ; GFX8V3-NEXT: s_mov_b64 s[0:1], s[4:5]
329 ; GFX8V3-NEXT: s_trap 2
331 ; GFX8V4-LABEL: llvm_trap:
333 ; GFX8V4-NEXT: s_mov_b64 s[0:1], s[4:5]
334 ; GFX8V4-NEXT: s_trap 2
336 ; GFX8V5-LABEL: llvm_trap:
338 ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xc8
339 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
340 ; GFX8V5-NEXT: s_trap 2
342 ; GFX9V3-LABEL: llvm_trap:
344 ; GFX9V3-NEXT: s_mov_b64 s[0:1], s[4:5]
345 ; GFX9V3-NEXT: s_trap 2
347 ; GFX9V4-LABEL: llvm_trap:
349 ; GFX9V4-NEXT: s_trap 2
351 ; GFX9V5-LABEL: llvm_trap:
353 ; GFX9V5-NEXT: s_trap 2
354 call void @llvm.trap()
358 define amdgpu_kernel void @llvm_debugtrap() {
359 ; GFX8V3-LABEL: llvm_debugtrap:
361 ; GFX8V3-NEXT: s_trap 3
363 ; GFX8V4-LABEL: llvm_debugtrap:
365 ; GFX8V4-NEXT: s_trap 3
367 ; GFX8V5-LABEL: llvm_debugtrap:
369 ; GFX8V5-NEXT: s_trap 3
371 ; GFX9V3-LABEL: llvm_debugtrap:
373 ; GFX9V3-NEXT: s_trap 3
375 ; GFX9V4-LABEL: llvm_debugtrap:
377 ; GFX9V4-NEXT: s_trap 3
379 ; GFX9V5-LABEL: llvm_debugtrap:
381 ; GFX9V5-NEXT: s_trap 3
382 call void @llvm.debugtrap()
386 define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) {
387 ; GFX8V3-LABEL: llvm_amdgcn_queue_ptr:
389 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s6
390 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s7
391 ; GFX8V3-NEXT: s_add_u32 s0, s8, 8
392 ; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc
393 ; GFX8V3-NEXT: s_addc_u32 s1, s9, 0
394 ; GFX8V3-NEXT: s_waitcnt vmcnt(0)
395 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
396 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s1
397 ; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc
398 ; GFX8V3-NEXT: s_waitcnt vmcnt(0)
399 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s4
400 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s5
401 ; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc
402 ; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
403 ; GFX8V3-NEXT: s_waitcnt vmcnt(0)
404 ; GFX8V3-NEXT: v_mov_b32_e32 v0, s10
405 ; GFX8V3-NEXT: v_mov_b32_e32 v1, s11
406 ; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
407 ; GFX8V3-NEXT: v_mov_b32_e32 v3, s1
408 ; GFX8V3-NEXT: v_mov_b32_e32 v2, s0
409 ; GFX8V3-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
410 ; GFX8V3-NEXT: s_waitcnt vmcnt(0)
411 ; GFX8V3-NEXT: s_endpgm
413 ; GFX8V4-LABEL: llvm_amdgcn_queue_ptr:
415 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s6
416 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s7
417 ; GFX8V4-NEXT: s_add_u32 s0, s8, 8
418 ; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc
419 ; GFX8V4-NEXT: s_addc_u32 s1, s9, 0
420 ; GFX8V4-NEXT: s_waitcnt vmcnt(0)
421 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s0
422 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s1
423 ; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc
424 ; GFX8V4-NEXT: s_waitcnt vmcnt(0)
425 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s4
426 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s5
427 ; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc
428 ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
429 ; GFX8V4-NEXT: s_waitcnt vmcnt(0)
430 ; GFX8V4-NEXT: v_mov_b32_e32 v0, s10
431 ; GFX8V4-NEXT: v_mov_b32_e32 v1, s11
432 ; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
433 ; GFX8V4-NEXT: v_mov_b32_e32 v3, s1
434 ; GFX8V4-NEXT: v_mov_b32_e32 v2, s0
435 ; GFX8V4-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
436 ; GFX8V4-NEXT: s_waitcnt vmcnt(0)
437 ; GFX8V4-NEXT: s_endpgm
439 ; GFX8V5-LABEL: llvm_amdgcn_queue_ptr:
441 ; GFX8V5-NEXT: s_add_u32 s0, s6, 8
442 ; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc
443 ; GFX8V5-NEXT: s_addc_u32 s1, s7, 0
444 ; GFX8V5-NEXT: s_waitcnt vmcnt(0)
445 ; GFX8V5-NEXT: v_mov_b32_e32 v0, s0
446 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s1
447 ; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc
448 ; GFX8V5-NEXT: s_waitcnt vmcnt(0)
449 ; GFX8V5-NEXT: v_mov_b32_e32 v0, s4
450 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s5
451 ; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc
452 ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
453 ; GFX8V5-NEXT: s_waitcnt vmcnt(0)
454 ; GFX8V5-NEXT: v_mov_b32_e32 v0, s8
455 ; GFX8V5-NEXT: v_mov_b32_e32 v1, s9
456 ; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
457 ; GFX8V5-NEXT: v_mov_b32_e32 v3, s1
458 ; GFX8V5-NEXT: v_mov_b32_e32 v2, s0
459 ; GFX8V5-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
460 ; GFX8V5-NEXT: s_waitcnt vmcnt(0)
461 ; GFX8V5-NEXT: s_endpgm
463 ; GFX9V3-LABEL: llvm_amdgcn_queue_ptr:
465 ; GFX9V3-NEXT: v_mov_b32_e32 v2, 0
466 ; GFX9V3-NEXT: global_load_ubyte v0, v2, s[6:7] glc
467 ; GFX9V3-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc
468 ; GFX9V3-NEXT: global_load_ubyte v0, v2, s[4:5] glc
469 ; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
470 ; GFX9V3-NEXT: s_waitcnt vmcnt(0)
471 ; GFX9V3-NEXT: v_mov_b32_e32 v0, s10
472 ; GFX9V3-NEXT: v_mov_b32_e32 v1, s11
473 ; GFX9V3-NEXT: ; kill: killed $sgpr6_sgpr7
474 ; GFX9V3-NEXT: ; kill: killed $sgpr4_sgpr5
475 ; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
476 ; GFX9V3-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
477 ; GFX9V3-NEXT: s_waitcnt vmcnt(0)
478 ; GFX9V3-NEXT: s_endpgm
480 ; GFX9V4-LABEL: llvm_amdgcn_queue_ptr:
482 ; GFX9V4-NEXT: v_mov_b32_e32 v2, 0
483 ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[6:7] glc
484 ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc
485 ; GFX9V4-NEXT: global_load_ubyte v0, v2, s[4:5] glc
486 ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
487 ; GFX9V4-NEXT: s_waitcnt vmcnt(0)
488 ; GFX9V4-NEXT: v_mov_b32_e32 v0, s10
489 ; GFX9V4-NEXT: v_mov_b32_e32 v1, s11
490 ; GFX9V4-NEXT: ; kill: killed $sgpr6_sgpr7
491 ; GFX9V4-NEXT: ; kill: killed $sgpr4_sgpr5
492 ; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
493 ; GFX9V4-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
494 ; GFX9V4-NEXT: s_waitcnt vmcnt(0)
495 ; GFX9V4-NEXT: s_endpgm
497 ; GFX9V5-LABEL: llvm_amdgcn_queue_ptr:
499 ; GFX9V5-NEXT: v_mov_b32_e32 v2, 0
500 ; GFX9V5-NEXT: global_load_ubyte v0, v[0:1], off glc
501 ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
502 ; GFX9V5-NEXT: global_load_ubyte v0, v2, s[6:7] offset:8 glc
503 ; GFX9V5-NEXT: global_load_ubyte v0, v2, s[4:5] glc
504 ; GFX9V5-NEXT: s_waitcnt vmcnt(0)
505 ; GFX9V5-NEXT: v_mov_b32_e32 v0, s8
506 ; GFX9V5-NEXT: v_mov_b32_e32 v1, s9
507 ; GFX9V5-NEXT: ; kill: killed $sgpr4_sgpr5
508 ; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
509 ; GFX9V5-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
510 ; GFX9V5-NEXT: s_waitcnt vmcnt(0)
511 ; GFX9V5-NEXT: s_endpgm
512 %queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
513 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
514 %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
515 %dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
516 %queue.load = load volatile i8, ptr addrspace(4) %queue.ptr
517 %implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr
518 %dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr
519 store volatile i64 %dispatch.id, ptr addrspace(1) %ptr
523 declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr()
524 declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
525 declare i64 @llvm.amdgcn.dispatch.id()
526 declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
527 declare i1 @llvm.amdgcn.is.shared(ptr)
528 declare i1 @llvm.amdgcn.is.private(ptr)
529 declare void @llvm.trap()
530 declare void @llvm.debugtrap()
532 !llvm.module.flags = !{!0}
533 !0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}