1 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI-NOHSA -check-prefix=FUNC %s
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=GCNX3-HSA -check-prefix=FUNC %s
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=GCNX3-NOHSA -check-prefix=FUNC %s
4 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
5 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
6 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
8 ; FUNC-LABEL: {{^}}global_load_i32:
9 ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}
10 ; GCN-HSA: {{flat|global}}_load_dword
12 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
13 define amdgpu_kernel void @global_load_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
15 %ld = load i32, ptr addrspace(1) %in
16 store i32 %ld, ptr addrspace(1) %out
20 ; FUNC-LABEL: {{^}}global_load_v2i32:
21 ; GCN-NOHSA: buffer_load_dwordx2
22 ; GCN-HSA: {{flat|global}}_load_dwordx2
25 define amdgpu_kernel void @global_load_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
27 %ld = load <2 x i32>, ptr addrspace(1) %in
28 store <2 x i32> %ld, ptr addrspace(1) %out
32 ; FUNC-LABEL: {{^}}global_load_v3i32:
33 ; SI-NOHSA: buffer_load_dwordx4
34 ; GCNX3-NOHSA: buffer_load_dwordx3
35 ; GCNX3-HSA: {{flat|global}}_load_dwordx3
38 define amdgpu_kernel void @global_load_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
40 %ld = load <3 x i32>, ptr addrspace(1) %in
41 store <3 x i32> %ld, ptr addrspace(1) %out
45 ; FUNC-LABEL: {{^}}global_load_v4i32:
46 ; GCN-NOHSA: buffer_load_dwordx4
47 ; GCN-HSA: {{flat|global}}_load_dwordx4
50 define amdgpu_kernel void @global_load_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
52 %ld = load <4 x i32>, ptr addrspace(1) %in
53 store <4 x i32> %ld, ptr addrspace(1) %out
57 ; FUNC-LABEL: {{^}}global_load_v8i32:
58 ; GCN-NOHSA: buffer_load_dwordx4
59 ; GCN-NOHSA: buffer_load_dwordx4
60 ; GCN-HSA: {{flat|global}}_load_dwordx4
61 ; GCN-HSA: {{flat|global}}_load_dwordx4
65 define amdgpu_kernel void @global_load_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
67 %ld = load <8 x i32>, ptr addrspace(1) %in
68 store <8 x i32> %ld, ptr addrspace(1) %out
72 ; FUNC-LABEL: {{^}}global_load_v9i32:
73 ; GCN-NOHSA: buffer_load_dwordx4
74 ; GCN-NOHSA: buffer_load_dwordx4
75 ; GCN-NOHSA: buffer_load_dword
76 ; GCN-HSA: {{flat|global}}_load_dwordx4
77 ; GCN-HSA: {{flat|global}}_load_dwordx4
78 ; GCN-HSA: {{flat|global}}_load_dword
79 define amdgpu_kernel void @global_load_v9i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
81 %ld = load <9 x i32>, ptr addrspace(1) %in
82 store <9 x i32> %ld, ptr addrspace(1) %out
86 ; FUNC-LABEL: {{^}}global_load_v10i32:
87 ; GCN-NOHSA: buffer_load_dwordx4
88 ; GCN-NOHSA: buffer_load_dwordx4
89 ; GCN-NOHSA: buffer_load_dwordx2
90 ; GCN-HSA: {{flat|global}}_load_dwordx4
91 ; GCN-HSA: {{flat|global}}_load_dwordx4
92 ; GCN-HSA: {{flat|global}}_load_dwordx2
93 define amdgpu_kernel void @global_load_v10i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
95 %ld = load <10 x i32>, ptr addrspace(1) %in
96 store <10 x i32> %ld, ptr addrspace(1) %out
100 ; FUNC-LABEL: {{^}}global_load_v11i32:
101 ; SI-NOHSA: buffer_load_dwordx4
102 ; SI-NOHSA: buffer_load_dwordx4
103 ; SI-NOHSA: buffer_load_dwordx4
104 ; GCNX3-NOHSA: buffer_load_dwordx4
105 ; GCNX3-NOHSA: buffer_load_dwordx4
106 ; GCNX3-NOHSA: buffer_load_dwordx3
107 ; GCN-HSA: {{flat|global}}_load_dwordx4
108 ; GCN-HSA: {{flat|global}}_load_dwordx4
109 ; GCN-HSA: {{flat|global}}_load_dwordx3
110 define amdgpu_kernel void @global_load_v11i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
112 %ld = load <11 x i32>, ptr addrspace(1) %in
113 store <11 x i32> %ld, ptr addrspace(1) %out
118 ; FUNC-LABEL: {{^}}global_load_v12i32:
119 ; GCN-NOHSA: buffer_load_dwordx4
120 ; GCN-NOHSA: buffer_load_dwordx4
121 ; GCN-NOHSA: buffer_load_dwordx4
122 ; GCN-HSA: {{flat|global}}_load_dwordx4
123 ; GCN-HSA: {{flat|global}}_load_dwordx4
124 ; GCN-HSA: {{flat|global}}_load_dwordx4
125 define amdgpu_kernel void @global_load_v12i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
127 %ld = load <12 x i32>, ptr addrspace(1) %in
128 store <12 x i32> %ld, ptr addrspace(1) %out
132 ; FUNC-LABEL: {{^}}global_load_v16i32:
133 ; GCN-NOHSA: buffer_load_dwordx4
134 ; GCN-NOHSA: buffer_load_dwordx4
135 ; GCN-NOHSA: buffer_load_dwordx4
136 ; GCN-NOHSA: buffer_load_dwordx4
138 ; GCN-HSA: {{flat|global}}_load_dwordx4
139 ; GCN-HSA: {{flat|global}}_load_dwordx4
140 ; GCN-HSA: {{flat|global}}_load_dwordx4
141 ; GCN-HSA: {{flat|global}}_load_dwordx4
147 define amdgpu_kernel void @global_load_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
149 %ld = load <16 x i32>, ptr addrspace(1) %in
150 store <16 x i32> %ld, ptr addrspace(1) %out
154 ; FUNC-LABEL: {{^}}global_zextload_i32_to_i64:
155 ; GCN-NOHSA-DAG: buffer_load_dword v[[LO:[0-9]+]],
156 ; GCN-HSA-DAG: {{flat|global}}_load_dword v[[LO:[0-9]+]],
157 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
159 ; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
160 ; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
162 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
163 define amdgpu_kernel void @global_zextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
164 %ld = load i32, ptr addrspace(1) %in
165 %ext = zext i32 %ld to i64
166 store i64 %ext, ptr addrspace(1) %out
170 ; FUNC-LABEL: {{^}}global_sextload_i32_to_i64:
171 ; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
172 ; GCN-HSA: {{flat|global}}_load_dword v[[LO:[0-9]+]]
173 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
174 ; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
175 ; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
180 ; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.
182 define amdgpu_kernel void @global_sextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
183 %ld = load i32, ptr addrspace(1) %in
184 %ext = sext i32 %ld to i64
185 store i64 %ext, ptr addrspace(1) %out
189 ; FUNC-LABEL: {{^}}global_zextload_v1i32_to_v1i64:
190 ; GCN-NOHSA: buffer_load_dword
191 ; GCN-NOHSA: buffer_store_dwordx2
193 ; GCN-HSA: {{flat|global}}_load_dword
194 ; GCN-HSA: {{flat|global}}_store_dwordx2
195 define amdgpu_kernel void @global_zextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
196 %ld = load <1 x i32>, ptr addrspace(1) %in
197 %ext = zext <1 x i32> %ld to <1 x i64>
198 store <1 x i64> %ext, ptr addrspace(1) %out
202 ; FUNC-LABEL: {{^}}global_sextload_v1i32_to_v1i64:
203 ; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
204 ; GCN-HSA: {{flat|global}}_load_dword v[[LO:[0-9]+]]
205 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
206 ; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
207 ; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
208 define amdgpu_kernel void @global_sextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
209 %ld = load <1 x i32>, ptr addrspace(1) %in
210 %ext = sext <1 x i32> %ld to <1 x i64>
211 store <1 x i64> %ext, ptr addrspace(1) %out
215 ; FUNC-LABEL: {{^}}global_zextload_v2i32_to_v2i64:
216 ; GCN-NOHSA: buffer_load_dwordx2
217 ; GCN-NOHSA: buffer_store_dwordx4
219 ; GCN-HSA: {{flat|global}}_load_dwordx2
220 ; GCN-HSA: {{flat|global}}_store_dwordx4
221 define amdgpu_kernel void @global_zextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
222 %ld = load <2 x i32>, ptr addrspace(1) %in
223 %ext = zext <2 x i32> %ld to <2 x i64>
224 store <2 x i64> %ext, ptr addrspace(1) %out
228 ; FUNC-LABEL: {{^}}global_sextload_v2i32_to_v2i64:
229 ; GCN-NOHSA: buffer_load_dwordx2
230 ; GCN-HSA: {{flat|global}}_load_dwordx2
232 ; GCN-DAG: v_ashrrev_i32
233 ; GCN-DAG: v_ashrrev_i32
235 ; GCN-NOHSA-DAG: buffer_store_dwordx4
236 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
237 define amdgpu_kernel void @global_sextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
238 %ld = load <2 x i32>, ptr addrspace(1) %in
239 %ext = sext <2 x i32> %ld to <2 x i64>
240 store <2 x i64> %ext, ptr addrspace(1) %out
244 ; FUNC-LABEL: {{^}}global_zextload_v4i32_to_v4i64:
245 ; GCN-NOHSA: buffer_load_dwordx4
246 ; GCN-NOHSA: buffer_store_dwordx4
247 ; GCN-NOHSA: buffer_store_dwordx4
249 ; GCN-HSA: {{flat|global}}_load_dwordx4
250 ; GCN-HSA: {{flat|global}}_store_dwordx4
251 ; GCN-HSA: {{flat|global}}_store_dwordx4
252 define amdgpu_kernel void @global_zextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
253 %ld = load <4 x i32>, ptr addrspace(1) %in
254 %ext = zext <4 x i32> %ld to <4 x i64>
255 store <4 x i64> %ext, ptr addrspace(1) %out
259 ; FUNC-LABEL: {{^}}global_sextload_v4i32_to_v4i64:
260 ; GCN-NOHSA: buffer_load_dwordx4
261 ; GCN-HSA: {{flat|global}}_load_dwordx4
263 ; GCN-DAG: v_ashrrev_i32
264 ; GCN-DAG: v_ashrrev_i32
265 ; GCN-DAG: v_ashrrev_i32
266 ; GCN-DAG: v_ashrrev_i32
268 ; GCN-NOHSA-DAG: buffer_store_dwordx4
269 ; GCN-NOHSA-DAG: buffer_store_dwordx4
271 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
272 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
273 define amdgpu_kernel void @global_sextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
274 %ld = load <4 x i32>, ptr addrspace(1) %in
275 %ext = sext <4 x i32> %ld to <4 x i64>
276 store <4 x i64> %ext, ptr addrspace(1) %out
280 ; FUNC-LABEL: {{^}}global_zextload_v8i32_to_v8i64:
281 ; GCN-NOHSA: buffer_load_dwordx4
282 ; GCN-NOHSA: buffer_load_dwordx4
284 ; GCN-HSA: {{flat|global}}_load_dwordx4
285 ; GCN-HSA: {{flat|global}}_load_dwordx4
287 ; GCN-NOHSA-DAG: buffer_store_dwordx4
288 ; GCN-NOHSA-DAG: buffer_store_dwordx4
289 ; GCN-NOHSA-DAG: buffer_store_dwordx4
290 ; GCN-NOHSA-DAG: buffer_store_dwordx4
292 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
293 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
294 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
295 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
296 define amdgpu_kernel void @global_zextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
297 %ld = load <8 x i32>, ptr addrspace(1) %in
298 %ext = zext <8 x i32> %ld to <8 x i64>
299 store <8 x i64> %ext, ptr addrspace(1) %out
303 ; FUNC-LABEL: {{^}}global_sextload_v8i32_to_v8i64:
304 ; GCN-NOHSA: buffer_load_dwordx4
305 ; GCN-NOHSA: buffer_load_dwordx4
307 ; GCN-HSA: {{flat|global}}_load_dwordx4
308 ; GCN-HSA: {{flat|global}}_load_dwordx4
310 ; GCN-DAG: v_ashrrev_i32
311 ; GCN-DAG: v_ashrrev_i32
312 ; GCN-DAG: v_ashrrev_i32
313 ; GCN-DAG: v_ashrrev_i32
314 ; GCN-DAG: v_ashrrev_i32
315 ; GCN-DAG: v_ashrrev_i32
316 ; GCN-DAG: v_ashrrev_i32
317 ; GCN-DAG: v_ashrrev_i32
319 ; GCN-NOHSA-DAG: buffer_store_dwordx4
320 ; GCN-NOHSA-DAG: buffer_store_dwordx4
321 ; GCN-NOHSA-DAG: buffer_store_dwordx4
322 ; GCN-NOHSA-DAG: buffer_store_dwordx4
324 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
325 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
326 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
327 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
328 define amdgpu_kernel void @global_sextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
329 %ld = load <8 x i32>, ptr addrspace(1) %in
330 %ext = sext <8 x i32> %ld to <8 x i64>
331 store <8 x i64> %ext, ptr addrspace(1) %out
335 ; FUNC-LABEL: {{^}}global_sextload_v16i32_to_v16i64:
336 ; GCN-NOHSA: buffer_load_dwordx4
337 ; GCN-NOHSA: buffer_load_dwordx4
338 ; GCN-NOHSA: buffer_load_dwordx4
339 ; GCN-NOHSA: buffer_load_dwordx4
341 ; GCN-HSA: {{flat|global}}_load_dwordx4
342 ; GCN-HSA: {{flat|global}}_load_dwordx4
343 ; GCN-HSA: {{flat|global}}_load_dwordx4
344 ; GCN-HSA: {{flat|global}}_load_dwordx4
347 ; GCN-DAG: v_ashrrev_i32
348 ; GCN-DAG: v_ashrrev_i32
349 ; GCN-DAG: v_ashrrev_i32
350 ; GCN-DAG: v_ashrrev_i32
351 ; GCN-NOHSA-DAG: buffer_store_dwordx4
352 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
354 ; GCN-DAG: v_ashrrev_i32
355 ; GCN-DAG: v_ashrrev_i32
356 ; GCN-DAG: v_ashrrev_i32
357 ; GCN-DAG: v_ashrrev_i32
358 ; GCN-NOHSA-DAG: buffer_store_dwordx4
359 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
361 ; GCN-DAG: v_ashrrev_i32
362 ; GCN-DAG: v_ashrrev_i32
363 ; GCN-DAG: v_ashrrev_i32
364 ; GCN-DAG: v_ashrrev_i32
365 ; GCN-NOHSA-DAG: buffer_store_dwordx4
366 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
368 ; GCN-DAG: v_ashrrev_i32
369 ; GCN-DAG: v_ashrrev_i32
370 ; GCN-DAG: v_ashrrev_i32
371 ; GCN-DAG: v_ashrrev_i32
372 ; GCN-NOHSA-DAG: buffer_store_dwordx4
373 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
374 define amdgpu_kernel void @global_sextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
375 %ld = load <16 x i32>, ptr addrspace(1) %in
376 %ext = sext <16 x i32> %ld to <16 x i64>
377 store <16 x i64> %ext, ptr addrspace(1) %out
381 ; FUNC-LABEL: {{^}}global_zextload_v16i32_to_v16i64
382 ; GCN-NOHSA: buffer_load_dwordx4
383 ; GCN-NOHSA: buffer_load_dwordx4
384 ; GCN-NOHSA: buffer_load_dwordx4
385 ; GCN-NOHSA: buffer_load_dwordx4
387 ; GCN-HSA: {{flat|global}}_load_dwordx4
388 ; GCN-HSA: {{flat|global}}_load_dwordx4
389 ; GCN-HSA: {{flat|global}}_load_dwordx4
390 ; GCN-HSA: {{flat|global}}_load_dwordx4
392 ; GCN-NOHSA: buffer_store_dwordx4
393 ; GCN-NOHSA: buffer_store_dwordx4
394 ; GCN-NOHSA: buffer_store_dwordx4
395 ; GCN-NOHSA: buffer_store_dwordx4
396 ; GCN-NOHSA: buffer_store_dwordx4
397 ; GCN-NOHSA: buffer_store_dwordx4
398 ; GCN-NOHSA: buffer_store_dwordx4
399 ; GCN-NOHSA: buffer_store_dwordx4
401 ; GCN-HSA: {{flat|global}}_store_dwordx4
402 ; GCN-HSA: {{flat|global}}_store_dwordx4
403 ; GCN-HSA: {{flat|global}}_store_dwordx4
404 ; GCN-HSA: {{flat|global}}_store_dwordx4
405 ; GCN-HSA: {{flat|global}}_store_dwordx4
406 ; GCN-HSA: {{flat|global}}_store_dwordx4
407 ; GCN-HSA: {{flat|global}}_store_dwordx4
408 ; GCN-HSA: {{flat|global}}_store_dwordx4
409 define amdgpu_kernel void @global_zextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
410 %ld = load <16 x i32>, ptr addrspace(1) %in
411 %ext = zext <16 x i32> %ld to <16 x i64>
412 store <16 x i64> %ext, ptr addrspace(1) %out
416 ; FUNC-LABEL: {{^}}global_sextload_v32i32_to_v32i64:
418 ; GCN-NOHSA: buffer_load_dwordx4
419 ; GCN-NOHSA: buffer_load_dwordx4
420 ; GCN-NOHSA: buffer_load_dwordx4
421 ; GCN-NOHSA: buffer_load_dwordx4
422 ; GCN-NOHSA: buffer_load_dwordx4
423 ; GCN-NOHSA: buffer_load_dwordx4
424 ; GCN-NOHSA: buffer_load_dwordx4
425 ; GCN-NOHSA-DAG: buffer_load_dwordx4
427 ; GCN-HSA: {{flat|global}}_load_dwordx4
428 ; GCN-HSA: {{flat|global}}_load_dwordx4
429 ; GCN-HSA: {{flat|global}}_load_dwordx4
430 ; GCN-HSA: {{flat|global}}_load_dwordx4
431 ; GCN-HSA: {{flat|global}}_load_dwordx4
432 ; GCN-HSA: {{flat|global}}_load_dwordx4
433 ; GCN-HSA: {{flat|global}}_load_dwordx4
434 ; GCN-HSA: {{flat|global}}_load_dwordx4
436 ; GCN-DAG: v_ashrrev_i32
437 ; GCN-DAG: v_ashrrev_i32
438 ; GCN-DAG: v_ashrrev_i32
439 ; GCN-DAG: v_ashrrev_i32
440 ; GCN-DAG: v_ashrrev_i32
441 ; GCN-DAG: v_ashrrev_i32
442 ; GCN-DAG: v_ashrrev_i32
443 ; GCN-DAG: v_ashrrev_i32
444 ; GCN-DAG: v_ashrrev_i32
445 ; GCN-DAG: v_ashrrev_i32
446 ; GCN-DAG: v_ashrrev_i32
447 ; GCN-DAG: v_ashrrev_i32
448 ; GCN-DAG: v_ashrrev_i32
449 ; GCN-DAG: v_ashrrev_i32
450 ; GCN-DAG: v_ashrrev_i32
451 ; GCN-DAG: v_ashrrev_i32
452 ; GCN-DAG: v_ashrrev_i32
453 ; GCN-DAG: v_ashrrev_i32
454 ; GCN-DAG: v_ashrrev_i32
455 ; GCN-DAG: v_ashrrev_i32
456 ; GCN-DAG: v_ashrrev_i32
457 ; GCN-DAG: v_ashrrev_i32
458 ; GCN-DAG: v_ashrrev_i32
459 ; GCN-DAG: v_ashrrev_i32
460 ; GCN-DAG: v_ashrrev_i32
461 ; GCN-DAG: v_ashrrev_i32
462 ; GCN-DAG: v_ashrrev_i32
463 ; GCN-DAG: v_ashrrev_i32
464 ; GCN-DAG: v_ashrrev_i32
465 ; GCN-DAG: v_ashrrev_i32
466 ; GCN-DAG: v_ashrrev_i32
467 ; GCN-DAG: v_ashrrev_i32
469 ; GCN-NOHSA: buffer_store_dwordx4
470 ; GCN-NOHSA: buffer_store_dwordx4
471 ; GCN-NOHSA: buffer_store_dwordx4
472 ; GCN-NOHSA: buffer_store_dwordx4
474 ; GCN-NOHSA: buffer_store_dwordx4
475 ; GCN-NOHSA: buffer_store_dwordx4
476 ; GCN-NOHSA: buffer_store_dwordx4
477 ; GCN-NOHSA: buffer_store_dwordx4
479 ; GCN-NOHSA: buffer_store_dwordx4
480 ; GCN-NOHSA: buffer_store_dwordx4
481 ; GCN-NOHSA: buffer_store_dwordx4
482 ; GCN-NOHSA: buffer_store_dwordx4
484 ; GCN-NOHSA: buffer_store_dwordx4
485 ; GCN-NOHSA: buffer_store_dwordx4
486 ; GCN-NOHSA: buffer_store_dwordx4
487 ; GCN-NOHSA: buffer_store_dwordx4
489 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
490 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
491 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
492 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
494 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
495 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
496 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
497 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
499 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
500 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
501 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
502 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
504 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
505 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
506 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
507 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
509 define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
510 %ld = load <32 x i32>, ptr addrspace(1) %in
511 %ext = sext <32 x i32> %ld to <32 x i64>
512 store <32 x i64> %ext, ptr addrspace(1) %out
516 ; FUNC-LABEL: {{^}}global_zextload_v32i32_to_v32i64:
517 ; GCN-NOHSA: buffer_load_dwordx4
518 ; GCN-NOHSA: buffer_load_dwordx4
519 ; GCN-NOHSA: buffer_load_dwordx4
520 ; GCN-NOHSA: buffer_load_dwordx4
521 ; GCN-NOHSA: buffer_load_dwordx4
522 ; GCN-NOHSA: buffer_load_dwordx4
523 ; GCN-NOHSA: buffer_load_dwordx4
524 ; GCN-NOHSA: buffer_load_dwordx4
526 ; GCN-HSA: {{flat|global}}_load_dwordx4
527 ; GCN-HSA: {{flat|global}}_load_dwordx4
528 ; GCN-HSA: {{flat|global}}_load_dwordx4
529 ; GCN-HSA: {{flat|global}}_load_dwordx4
530 ; GCN-HSA: {{flat|global}}_load_dwordx4
531 ; GCN-HSA: {{flat|global}}_load_dwordx4
532 ; GCN-HSA: {{flat|global}}_load_dwordx4
533 ; GCN-HSA: {{flat|global}}_load_dwordx4
536 ; GCN-NOHSA-DAG: buffer_store_dwordx4
537 ; GCN-NOHSA-DAG: buffer_store_dwordx4
538 ; GCN-NOHSA-DAG: buffer_store_dwordx4
539 ; GCN-NOHSA-DAG: buffer_store_dwordx4
541 ; GCN-NOHSA-DAG: buffer_store_dwordx4
542 ; GCN-NOHSA-DAG: buffer_store_dwordx4
543 ; GCN-NOHSA-DAG: buffer_store_dwordx4
544 ; GCN-NOHSA-DAG: buffer_store_dwordx4
546 ; GCN-NOHSA-DAG: buffer_store_dwordx4
547 ; GCN-NOHSA-DAG: buffer_store_dwordx4
548 ; GCN-NOHSA-DAG: buffer_store_dwordx4
549 ; GCN-NOHSA-DAG: buffer_store_dwordx4
551 ; GCN-NOHSA-DAG: buffer_store_dwordx4
552 ; GCN-NOHSA-DAG: buffer_store_dwordx4
553 ; GCN-NOHSA-DAG: buffer_store_dwordx4
554 ; GCN-NOHSA-DAG: buffer_store_dwordx4
557 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
558 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
559 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
560 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
562 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
563 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
564 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
565 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
567 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
568 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
569 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
570 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
572 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
573 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
574 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
575 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
576 define amdgpu_kernel void @global_zextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
577 %ld = load <32 x i32>, ptr addrspace(1) %in
578 %ext = zext <32 x i32> %ld to <32 x i64>
579 store <32 x i64> %ext, ptr addrspace(1) %out
583 ; FUNC-LABEL: {{^}}global_load_v32i32:
584 ; GCN-NOHSA: buffer_load_dwordx4
585 ; GCN-NOHSA: buffer_load_dwordx4
586 ; GCN-NOHSA: buffer_load_dwordx4
587 ; GCN-NOHSA: buffer_load_dwordx4
588 ; GCN-NOHSA: buffer_load_dwordx4
589 ; GCN-NOHSA: buffer_load_dwordx4
590 ; GCN-NOHSA: buffer_load_dwordx4
591 ; GCN-NOHSA: buffer_load_dwordx4
593 ; GCN-HSA: {{flat|global}}_load_dwordx4
594 ; GCN-HSA: {{flat|global}}_load_dwordx4
595 ; GCN-HSA: {{flat|global}}_load_dwordx4
596 ; GCN-HSA: {{flat|global}}_load_dwordx4
597 ; GCN-HSA: {{flat|global}}_load_dwordx4
598 ; GCN-HSA: {{flat|global}}_load_dwordx4
599 ; GCN-HSA: {{flat|global}}_load_dwordx4
600 ; GCN-HSA: {{flat|global}}_load_dwordx4
603 ; GCN-NOHSA-DAG: buffer_store_dwordx4
604 ; GCN-NOHSA-DAG: buffer_store_dwordx4
605 ; GCN-NOHSA-DAG: buffer_store_dwordx4
606 ; GCN-NOHSA-DAG: buffer_store_dwordx4
608 ; GCN-NOHSA-DAG: buffer_store_dwordx4
609 ; GCN-NOHSA-DAG: buffer_store_dwordx4
610 ; GCN-NOHSA-DAG: buffer_store_dwordx4
611 ; GCN-NOHSA-DAG: buffer_store_dwordx4
613 ; GCN-NOHSA-DAG: buffer_store_dwordx4
614 ; GCN-NOHSA-DAG: buffer_store_dwordx4
615 ; GCN-NOHSA-DAG: buffer_store_dwordx4
616 ; GCN-NOHSA-DAG: buffer_store_dwordx4
618 ; GCN-NOHSA-DAG: buffer_store_dwordx4
619 ; GCN-NOHSA-DAG: buffer_store_dwordx4
620 ; GCN-NOHSA-DAG: buffer_store_dwordx4
621 ; GCN-NOHSA-DAG: buffer_store_dwordx4
625 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
626 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
627 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
628 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
630 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
631 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
632 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
633 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
635 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
636 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
637 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
638 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
640 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
641 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
642 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
643 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
644 define amdgpu_kernel void @global_load_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
645 %ld = load <32 x i32>, ptr addrspace(1) %in
646 store <32 x i32> %ld, ptr addrspace(1) %out
650 attributes #0 = { nounwind }