[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / load-global-i32.ll
blobc4d9b4b2bb5ebbb2dc42aff763c2ece499959396
1 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI-NOHSA -check-prefix=FUNC %s
2 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=GCNX3-HSA -check-prefix=FUNC %s
3 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=GCNX3-NOHSA -check-prefix=FUNC %s
4 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
5 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
6 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
8 ; FUNC-LABEL: {{^}}global_load_i32:
9 ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}
10 ; GCN-HSA: {{flat|global}}_load_dword
12 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
13 define amdgpu_kernel void @global_load_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
14 entry:
15   %ld = load i32, ptr addrspace(1) %in
16   store i32 %ld, ptr addrspace(1) %out
17   ret void
20 ; FUNC-LABEL: {{^}}global_load_v2i32:
21 ; GCN-NOHSA: buffer_load_dwordx2
22 ; GCN-HSA: {{flat|global}}_load_dwordx2
24 ; EG: VTX_READ_64
25 define amdgpu_kernel void @global_load_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
26 entry:
27   %ld = load <2 x i32>, ptr addrspace(1) %in
28   store <2 x i32> %ld, ptr addrspace(1) %out
29   ret void
32 ; FUNC-LABEL: {{^}}global_load_v3i32:
33 ; SI-NOHSA: buffer_load_dwordx4
34 ; GCNX3-NOHSA: buffer_load_dwordx3
35 ; GCNX3-HSA: {{flat|global}}_load_dwordx3
37 ; EG: VTX_READ_128
38 define amdgpu_kernel void @global_load_v3i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
39 entry:
40   %ld = load <3 x i32>, ptr addrspace(1) %in
41   store <3 x i32> %ld, ptr addrspace(1) %out
42   ret void
45 ; FUNC-LABEL: {{^}}global_load_v4i32:
46 ; GCN-NOHSA: buffer_load_dwordx4
47 ; GCN-HSA: {{flat|global}}_load_dwordx4
49 ; EG: VTX_READ_128
50 define amdgpu_kernel void @global_load_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
51 entry:
52   %ld = load <4 x i32>, ptr addrspace(1) %in
53   store <4 x i32> %ld, ptr addrspace(1) %out
54   ret void
57 ; FUNC-LABEL: {{^}}global_load_v8i32:
58 ; GCN-NOHSA: buffer_load_dwordx4
59 ; GCN-NOHSA: buffer_load_dwordx4
60 ; GCN-HSA: {{flat|global}}_load_dwordx4
61 ; GCN-HSA: {{flat|global}}_load_dwordx4
63 ; EG: VTX_READ_128
64 ; EG: VTX_READ_128
65 define amdgpu_kernel void @global_load_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
66 entry:
67   %ld = load <8 x i32>, ptr addrspace(1) %in
68   store <8 x i32> %ld, ptr addrspace(1) %out
69   ret void
72 ; FUNC-LABEL: {{^}}global_load_v9i32:
73 ; GCN-NOHSA: buffer_load_dwordx4
74 ; GCN-NOHSA: buffer_load_dwordx4
75 ; GCN-NOHSA: buffer_load_dword
76 ; GCN-HSA: {{flat|global}}_load_dwordx4
77 ; GCN-HSA: {{flat|global}}_load_dwordx4
78 ; GCN-HSA: {{flat|global}}_load_dword
79 define amdgpu_kernel void @global_load_v9i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
80 entry:
81   %ld = load <9 x i32>, ptr addrspace(1) %in
82   store <9 x i32> %ld, ptr addrspace(1) %out
83   ret void
86 ; FUNC-LABEL: {{^}}global_load_v10i32:
87 ; GCN-NOHSA: buffer_load_dwordx4
88 ; GCN-NOHSA: buffer_load_dwordx4
89 ; GCN-NOHSA: buffer_load_dwordx2
90 ; GCN-HSA: {{flat|global}}_load_dwordx4
91 ; GCN-HSA: {{flat|global}}_load_dwordx4
92 ; GCN-HSA: {{flat|global}}_load_dwordx2
93 define amdgpu_kernel void @global_load_v10i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
94 entry:
95   %ld = load <10 x i32>, ptr addrspace(1) %in
96   store <10 x i32> %ld, ptr addrspace(1) %out
97   ret void
100 ; FUNC-LABEL: {{^}}global_load_v11i32:
101 ; SI-NOHSA: buffer_load_dwordx4
102 ; SI-NOHSA: buffer_load_dwordx4
103 ; SI-NOHSA: buffer_load_dwordx4
104 ; GCNX3-NOHSA: buffer_load_dwordx4
105 ; GCNX3-NOHSA: buffer_load_dwordx4
106 ; GCNX3-NOHSA: buffer_load_dwordx3
107 ; GCN-HSA: {{flat|global}}_load_dwordx4
108 ; GCN-HSA: {{flat|global}}_load_dwordx4
109 ; GCN-HSA: {{flat|global}}_load_dwordx3
110 define amdgpu_kernel void @global_load_v11i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
111 entry:
112   %ld = load <11 x i32>, ptr addrspace(1) %in
113   store <11 x i32> %ld, ptr addrspace(1) %out
114   ret void
118 ; FUNC-LABEL: {{^}}global_load_v12i32:
119 ; GCN-NOHSA: buffer_load_dwordx4
120 ; GCN-NOHSA: buffer_load_dwordx4
121 ; GCN-NOHSA: buffer_load_dwordx4
122 ; GCN-HSA: {{flat|global}}_load_dwordx4
123 ; GCN-HSA: {{flat|global}}_load_dwordx4
124 ; GCN-HSA: {{flat|global}}_load_dwordx4
125 define amdgpu_kernel void @global_load_v12i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
126 entry:
127   %ld = load <12 x i32>, ptr addrspace(1) %in
128   store <12 x i32> %ld, ptr addrspace(1) %out
129   ret void
132 ; FUNC-LABEL: {{^}}global_load_v16i32:
133 ; GCN-NOHSA: buffer_load_dwordx4
134 ; GCN-NOHSA: buffer_load_dwordx4
135 ; GCN-NOHSA: buffer_load_dwordx4
136 ; GCN-NOHSA: buffer_load_dwordx4
138 ; GCN-HSA: {{flat|global}}_load_dwordx4
139 ; GCN-HSA: {{flat|global}}_load_dwordx4
140 ; GCN-HSA: {{flat|global}}_load_dwordx4
141 ; GCN-HSA: {{flat|global}}_load_dwordx4
143 ; EG: VTX_READ_128
144 ; EG: VTX_READ_128
145 ; EG: VTX_READ_128
146 ; EG: VTX_READ_128
147 define amdgpu_kernel void @global_load_v16i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
148 entry:
149   %ld = load <16 x i32>, ptr addrspace(1) %in
150   store <16 x i32> %ld, ptr addrspace(1) %out
151   ret void
154 ; FUNC-LABEL: {{^}}global_zextload_i32_to_i64:
155 ; GCN-NOHSA-DAG: buffer_load_dword v[[LO:[0-9]+]],
156 ; GCN-HSA-DAG: {{flat|global}}_load_dword v[[LO:[0-9]+]],
157 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
159 ; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
160 ; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
162 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
163 define amdgpu_kernel void @global_zextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
164   %ld = load i32, ptr addrspace(1) %in
165   %ext = zext i32 %ld to i64
166   store i64 %ext, ptr addrspace(1) %out
167   ret void
170 ; FUNC-LABEL: {{^}}global_sextload_i32_to_i64:
171 ; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
172 ; GCN-HSA: {{flat|global}}_load_dword v[[LO:[0-9]+]]
173 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
174 ; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
175 ; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
178 ; EG: MEM_RAT
179 ; EG: VTX_READ_32
180 ; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.
181 ; EG: 31
182 define amdgpu_kernel void @global_sextload_i32_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
183   %ld = load i32, ptr addrspace(1) %in
184   %ext = sext i32 %ld to i64
185   store i64 %ext, ptr addrspace(1) %out
186   ret void
189 ; FUNC-LABEL: {{^}}global_zextload_v1i32_to_v1i64:
190 ; GCN-NOHSA: buffer_load_dword
191 ; GCN-NOHSA: buffer_store_dwordx2
193 ; GCN-HSA: {{flat|global}}_load_dword
194 ; GCN-HSA: {{flat|global}}_store_dwordx2
195 define amdgpu_kernel void @global_zextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
196   %ld = load <1 x i32>, ptr addrspace(1) %in
197   %ext = zext <1 x i32> %ld to <1 x i64>
198   store <1 x i64> %ext, ptr addrspace(1) %out
199   ret void
202 ; FUNC-LABEL: {{^}}global_sextload_v1i32_to_v1i64:
203 ; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
204 ; GCN-HSA: {{flat|global}}_load_dword v[[LO:[0-9]+]]
205 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
206 ; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
207 ; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
208 define amdgpu_kernel void @global_sextload_v1i32_to_v1i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
209   %ld = load <1 x i32>, ptr addrspace(1) %in
210   %ext = sext <1 x i32> %ld to <1 x i64>
211   store <1 x i64> %ext, ptr addrspace(1) %out
212   ret void
215 ; FUNC-LABEL: {{^}}global_zextload_v2i32_to_v2i64:
216 ; GCN-NOHSA: buffer_load_dwordx2
217 ; GCN-NOHSA: buffer_store_dwordx4
219 ; GCN-HSA: {{flat|global}}_load_dwordx2
220 ; GCN-HSA: {{flat|global}}_store_dwordx4
221 define amdgpu_kernel void @global_zextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
222   %ld = load <2 x i32>, ptr addrspace(1) %in
223   %ext = zext <2 x i32> %ld to <2 x i64>
224   store <2 x i64> %ext, ptr addrspace(1) %out
225   ret void
228 ; FUNC-LABEL: {{^}}global_sextload_v2i32_to_v2i64:
229 ; GCN-NOHSA: buffer_load_dwordx2
230 ; GCN-HSA: {{flat|global}}_load_dwordx2
232 ; GCN-DAG: v_ashrrev_i32
233 ; GCN-DAG: v_ashrrev_i32
235 ; GCN-NOHSA-DAG: buffer_store_dwordx4
236 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
237 define amdgpu_kernel void @global_sextload_v2i32_to_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
238   %ld = load <2 x i32>, ptr addrspace(1) %in
239   %ext = sext <2 x i32> %ld to <2 x i64>
240   store <2 x i64> %ext, ptr addrspace(1) %out
241   ret void
244 ; FUNC-LABEL: {{^}}global_zextload_v4i32_to_v4i64:
245 ; GCN-NOHSA: buffer_load_dwordx4
246 ; GCN-NOHSA: buffer_store_dwordx4
247 ; GCN-NOHSA: buffer_store_dwordx4
249 ; GCN-HSA: {{flat|global}}_load_dwordx4
250 ; GCN-HSA: {{flat|global}}_store_dwordx4
251 ; GCN-HSA: {{flat|global}}_store_dwordx4
252 define amdgpu_kernel void @global_zextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
253   %ld = load <4 x i32>, ptr addrspace(1) %in
254   %ext = zext <4 x i32> %ld to <4 x i64>
255   store <4 x i64> %ext, ptr addrspace(1) %out
256   ret void
259 ; FUNC-LABEL: {{^}}global_sextload_v4i32_to_v4i64:
260 ; GCN-NOHSA: buffer_load_dwordx4
261 ; GCN-HSA: {{flat|global}}_load_dwordx4
263 ; GCN-DAG: v_ashrrev_i32
264 ; GCN-DAG: v_ashrrev_i32
265 ; GCN-DAG: v_ashrrev_i32
266 ; GCN-DAG: v_ashrrev_i32
268 ; GCN-NOHSA-DAG: buffer_store_dwordx4
269 ; GCN-NOHSA-DAG: buffer_store_dwordx4
271 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
272 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
273 define amdgpu_kernel void @global_sextload_v4i32_to_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
274   %ld = load <4 x i32>, ptr addrspace(1) %in
275   %ext = sext <4 x i32> %ld to <4 x i64>
276   store <4 x i64> %ext, ptr addrspace(1) %out
277   ret void
280 ; FUNC-LABEL: {{^}}global_zextload_v8i32_to_v8i64:
281 ; GCN-NOHSA: buffer_load_dwordx4
282 ; GCN-NOHSA: buffer_load_dwordx4
284 ; GCN-HSA: {{flat|global}}_load_dwordx4
285 ; GCN-HSA: {{flat|global}}_load_dwordx4
287 ; GCN-NOHSA-DAG: buffer_store_dwordx4
288 ; GCN-NOHSA-DAG: buffer_store_dwordx4
289 ; GCN-NOHSA-DAG: buffer_store_dwordx4
290 ; GCN-NOHSA-DAG: buffer_store_dwordx4
292 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
293 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
294 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
295 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
296 define amdgpu_kernel void @global_zextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
297   %ld = load <8 x i32>, ptr addrspace(1) %in
298   %ext = zext <8 x i32> %ld to <8 x i64>
299   store <8 x i64> %ext, ptr addrspace(1) %out
300   ret void
303 ; FUNC-LABEL: {{^}}global_sextload_v8i32_to_v8i64:
304 ; GCN-NOHSA: buffer_load_dwordx4
305 ; GCN-NOHSA: buffer_load_dwordx4
307 ; GCN-HSA: {{flat|global}}_load_dwordx4
308 ; GCN-HSA: {{flat|global}}_load_dwordx4
310 ; GCN-DAG: v_ashrrev_i32
311 ; GCN-DAG: v_ashrrev_i32
312 ; GCN-DAG: v_ashrrev_i32
313 ; GCN-DAG: v_ashrrev_i32
314 ; GCN-DAG: v_ashrrev_i32
315 ; GCN-DAG: v_ashrrev_i32
316 ; GCN-DAG: v_ashrrev_i32
317 ; GCN-DAG: v_ashrrev_i32
319 ; GCN-NOHSA-DAG: buffer_store_dwordx4
320 ; GCN-NOHSA-DAG: buffer_store_dwordx4
321 ; GCN-NOHSA-DAG: buffer_store_dwordx4
322 ; GCN-NOHSA-DAG: buffer_store_dwordx4
324 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
325 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
326 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
327 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
328 define amdgpu_kernel void @global_sextload_v8i32_to_v8i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
329   %ld = load <8 x i32>, ptr addrspace(1) %in
330   %ext = sext <8 x i32> %ld to <8 x i64>
331   store <8 x i64> %ext, ptr addrspace(1) %out
332   ret void
335 ; FUNC-LABEL: {{^}}global_sextload_v16i32_to_v16i64:
336 ; GCN-NOHSA: buffer_load_dwordx4
337 ; GCN-NOHSA: buffer_load_dwordx4
338 ; GCN-NOHSA: buffer_load_dwordx4
339 ; GCN-NOHSA: buffer_load_dwordx4
341 ; GCN-HSA: {{flat|global}}_load_dwordx4
342 ; GCN-HSA: {{flat|global}}_load_dwordx4
343 ; GCN-HSA: {{flat|global}}_load_dwordx4
344 ; GCN-HSA: {{flat|global}}_load_dwordx4
347 ; GCN-DAG: v_ashrrev_i32
348 ; GCN-DAG: v_ashrrev_i32
349 ; GCN-DAG: v_ashrrev_i32
350 ; GCN-DAG: v_ashrrev_i32
351 ; GCN-NOHSA-DAG: buffer_store_dwordx4
352 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
354 ; GCN-DAG: v_ashrrev_i32
355 ; GCN-DAG: v_ashrrev_i32
356 ; GCN-DAG: v_ashrrev_i32
357 ; GCN-DAG: v_ashrrev_i32
358 ; GCN-NOHSA-DAG: buffer_store_dwordx4
359 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
361 ; GCN-DAG: v_ashrrev_i32
362 ; GCN-DAG: v_ashrrev_i32
363 ; GCN-DAG: v_ashrrev_i32
364 ; GCN-DAG: v_ashrrev_i32
365 ; GCN-NOHSA-DAG: buffer_store_dwordx4
366 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
368 ; GCN-DAG: v_ashrrev_i32
369 ; GCN-DAG: v_ashrrev_i32
370 ; GCN-DAG: v_ashrrev_i32
371 ; GCN-DAG: v_ashrrev_i32
372 ; GCN-NOHSA-DAG: buffer_store_dwordx4
373 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
374 define amdgpu_kernel void @global_sextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
375   %ld = load <16 x i32>, ptr addrspace(1) %in
376   %ext = sext <16 x i32> %ld to <16 x i64>
377   store <16 x i64> %ext, ptr addrspace(1) %out
378   ret void
381 ; FUNC-LABEL: {{^}}global_zextload_v16i32_to_v16i64
382 ; GCN-NOHSA: buffer_load_dwordx4
383 ; GCN-NOHSA: buffer_load_dwordx4
384 ; GCN-NOHSA: buffer_load_dwordx4
385 ; GCN-NOHSA: buffer_load_dwordx4
387 ; GCN-HSA: {{flat|global}}_load_dwordx4
388 ; GCN-HSA: {{flat|global}}_load_dwordx4
389 ; GCN-HSA: {{flat|global}}_load_dwordx4
390 ; GCN-HSA: {{flat|global}}_load_dwordx4
392 ; GCN-NOHSA: buffer_store_dwordx4
393 ; GCN-NOHSA: buffer_store_dwordx4
394 ; GCN-NOHSA: buffer_store_dwordx4
395 ; GCN-NOHSA: buffer_store_dwordx4
396 ; GCN-NOHSA: buffer_store_dwordx4
397 ; GCN-NOHSA: buffer_store_dwordx4
398 ; GCN-NOHSA: buffer_store_dwordx4
399 ; GCN-NOHSA: buffer_store_dwordx4
401 ; GCN-HSA: {{flat|global}}_store_dwordx4
402 ; GCN-HSA: {{flat|global}}_store_dwordx4
403 ; GCN-HSA: {{flat|global}}_store_dwordx4
404 ; GCN-HSA: {{flat|global}}_store_dwordx4
405 ; GCN-HSA: {{flat|global}}_store_dwordx4
406 ; GCN-HSA: {{flat|global}}_store_dwordx4
407 ; GCN-HSA: {{flat|global}}_store_dwordx4
408 ; GCN-HSA: {{flat|global}}_store_dwordx4
409 define amdgpu_kernel void @global_zextload_v16i32_to_v16i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
410   %ld = load <16 x i32>, ptr addrspace(1) %in
411   %ext = zext <16 x i32> %ld to <16 x i64>
412   store <16 x i64> %ext, ptr addrspace(1) %out
413   ret void
416 ; FUNC-LABEL: {{^}}global_sextload_v32i32_to_v32i64:
418 ; GCN-NOHSA: buffer_load_dwordx4
419 ; GCN-NOHSA: buffer_load_dwordx4
420 ; GCN-NOHSA: buffer_load_dwordx4
421 ; GCN-NOHSA: buffer_load_dwordx4
422 ; GCN-NOHSA: buffer_load_dwordx4
423 ; GCN-NOHSA: buffer_load_dwordx4
424 ; GCN-NOHSA: buffer_load_dwordx4
425 ; GCN-NOHSA-DAG: buffer_load_dwordx4
427 ; GCN-HSA: {{flat|global}}_load_dwordx4
428 ; GCN-HSA: {{flat|global}}_load_dwordx4
429 ; GCN-HSA: {{flat|global}}_load_dwordx4
430 ; GCN-HSA: {{flat|global}}_load_dwordx4
431 ; GCN-HSA: {{flat|global}}_load_dwordx4
432 ; GCN-HSA: {{flat|global}}_load_dwordx4
433 ; GCN-HSA: {{flat|global}}_load_dwordx4
434 ; GCN-HSA: {{flat|global}}_load_dwordx4
436 ; GCN-DAG: v_ashrrev_i32
437 ; GCN-DAG: v_ashrrev_i32
438 ; GCN-DAG: v_ashrrev_i32
439 ; GCN-DAG: v_ashrrev_i32
440 ; GCN-DAG: v_ashrrev_i32
441 ; GCN-DAG: v_ashrrev_i32
442 ; GCN-DAG: v_ashrrev_i32
443 ; GCN-DAG: v_ashrrev_i32
444 ; GCN-DAG: v_ashrrev_i32
445 ; GCN-DAG: v_ashrrev_i32
446 ; GCN-DAG: v_ashrrev_i32
447 ; GCN-DAG: v_ashrrev_i32
448 ; GCN-DAG: v_ashrrev_i32
449 ; GCN-DAG: v_ashrrev_i32
450 ; GCN-DAG: v_ashrrev_i32
451 ; GCN-DAG: v_ashrrev_i32
452 ; GCN-DAG: v_ashrrev_i32
453 ; GCN-DAG: v_ashrrev_i32
454 ; GCN-DAG: v_ashrrev_i32
455 ; GCN-DAG: v_ashrrev_i32
456 ; GCN-DAG: v_ashrrev_i32
457 ; GCN-DAG: v_ashrrev_i32
458 ; GCN-DAG: v_ashrrev_i32
459 ; GCN-DAG: v_ashrrev_i32
460 ; GCN-DAG: v_ashrrev_i32
461 ; GCN-DAG: v_ashrrev_i32
462 ; GCN-DAG: v_ashrrev_i32
463 ; GCN-DAG: v_ashrrev_i32
464 ; GCN-DAG: v_ashrrev_i32
465 ; GCN-DAG: v_ashrrev_i32
466 ; GCN-DAG: v_ashrrev_i32
467 ; GCN-DAG: v_ashrrev_i32
469 ; GCN-NOHSA: buffer_store_dwordx4
470 ; GCN-NOHSA: buffer_store_dwordx4
471 ; GCN-NOHSA: buffer_store_dwordx4
472 ; GCN-NOHSA: buffer_store_dwordx4
474 ; GCN-NOHSA: buffer_store_dwordx4
475 ; GCN-NOHSA: buffer_store_dwordx4
476 ; GCN-NOHSA: buffer_store_dwordx4
477 ; GCN-NOHSA: buffer_store_dwordx4
479 ; GCN-NOHSA: buffer_store_dwordx4
480 ; GCN-NOHSA: buffer_store_dwordx4
481 ; GCN-NOHSA: buffer_store_dwordx4
482 ; GCN-NOHSA: buffer_store_dwordx4
484 ; GCN-NOHSA: buffer_store_dwordx4
485 ; GCN-NOHSA: buffer_store_dwordx4
486 ; GCN-NOHSA: buffer_store_dwordx4
487 ; GCN-NOHSA: buffer_store_dwordx4
489 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
490 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
491 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
492 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
494 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
495 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
496 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
497 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
499 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
500 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
501 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
502 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
504 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
505 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
506 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
507 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
509 define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
510   %ld = load <32 x i32>, ptr addrspace(1) %in
511   %ext = sext <32 x i32> %ld to <32 x i64>
512   store <32 x i64> %ext, ptr addrspace(1) %out
513   ret void
516 ; FUNC-LABEL: {{^}}global_zextload_v32i32_to_v32i64:
517 ; GCN-NOHSA: buffer_load_dwordx4
518 ; GCN-NOHSA: buffer_load_dwordx4
519 ; GCN-NOHSA: buffer_load_dwordx4
520 ; GCN-NOHSA: buffer_load_dwordx4
521 ; GCN-NOHSA: buffer_load_dwordx4
522 ; GCN-NOHSA: buffer_load_dwordx4
523 ; GCN-NOHSA: buffer_load_dwordx4
524 ; GCN-NOHSA: buffer_load_dwordx4
526 ; GCN-HSA: {{flat|global}}_load_dwordx4
527 ; GCN-HSA: {{flat|global}}_load_dwordx4
528 ; GCN-HSA: {{flat|global}}_load_dwordx4
529 ; GCN-HSA: {{flat|global}}_load_dwordx4
530 ; GCN-HSA: {{flat|global}}_load_dwordx4
531 ; GCN-HSA: {{flat|global}}_load_dwordx4
532 ; GCN-HSA: {{flat|global}}_load_dwordx4
533 ; GCN-HSA: {{flat|global}}_load_dwordx4
536 ; GCN-NOHSA-DAG: buffer_store_dwordx4
537 ; GCN-NOHSA-DAG: buffer_store_dwordx4
538 ; GCN-NOHSA-DAG: buffer_store_dwordx4
539 ; GCN-NOHSA-DAG: buffer_store_dwordx4
541 ; GCN-NOHSA-DAG: buffer_store_dwordx4
542 ; GCN-NOHSA-DAG: buffer_store_dwordx4
543 ; GCN-NOHSA-DAG: buffer_store_dwordx4
544 ; GCN-NOHSA-DAG: buffer_store_dwordx4
546 ; GCN-NOHSA-DAG: buffer_store_dwordx4
547 ; GCN-NOHSA-DAG: buffer_store_dwordx4
548 ; GCN-NOHSA-DAG: buffer_store_dwordx4
549 ; GCN-NOHSA-DAG: buffer_store_dwordx4
551 ; GCN-NOHSA-DAG: buffer_store_dwordx4
552 ; GCN-NOHSA-DAG: buffer_store_dwordx4
553 ; GCN-NOHSA-DAG: buffer_store_dwordx4
554 ; GCN-NOHSA-DAG: buffer_store_dwordx4
557 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
558 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
559 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
560 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
562 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
563 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
564 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
565 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
567 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
568 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
569 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
570 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
572 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
573 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
574 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
575 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
576 define amdgpu_kernel void @global_zextload_v32i32_to_v32i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
577   %ld = load <32 x i32>, ptr addrspace(1) %in
578   %ext = zext <32 x i32> %ld to <32 x i64>
579   store <32 x i64> %ext, ptr addrspace(1) %out
580   ret void
583 ; FUNC-LABEL: {{^}}global_load_v32i32:
584 ; GCN-NOHSA: buffer_load_dwordx4
585 ; GCN-NOHSA: buffer_load_dwordx4
586 ; GCN-NOHSA: buffer_load_dwordx4
587 ; GCN-NOHSA: buffer_load_dwordx4
588 ; GCN-NOHSA: buffer_load_dwordx4
589 ; GCN-NOHSA: buffer_load_dwordx4
590 ; GCN-NOHSA: buffer_load_dwordx4
591 ; GCN-NOHSA: buffer_load_dwordx4
593 ; GCN-HSA: {{flat|global}}_load_dwordx4
594 ; GCN-HSA: {{flat|global}}_load_dwordx4
595 ; GCN-HSA: {{flat|global}}_load_dwordx4
596 ; GCN-HSA: {{flat|global}}_load_dwordx4
597 ; GCN-HSA: {{flat|global}}_load_dwordx4
598 ; GCN-HSA: {{flat|global}}_load_dwordx4
599 ; GCN-HSA: {{flat|global}}_load_dwordx4
600 ; GCN-HSA: {{flat|global}}_load_dwordx4
603 ; GCN-NOHSA-DAG: buffer_store_dwordx4
604 ; GCN-NOHSA-DAG: buffer_store_dwordx4
605 ; GCN-NOHSA-DAG: buffer_store_dwordx4
606 ; GCN-NOHSA-DAG: buffer_store_dwordx4
608 ; GCN-NOHSA-DAG: buffer_store_dwordx4
609 ; GCN-NOHSA-DAG: buffer_store_dwordx4
610 ; GCN-NOHSA-DAG: buffer_store_dwordx4
611 ; GCN-NOHSA-DAG: buffer_store_dwordx4
613 ; GCN-NOHSA-DAG: buffer_store_dwordx4
614 ; GCN-NOHSA-DAG: buffer_store_dwordx4
615 ; GCN-NOHSA-DAG: buffer_store_dwordx4
616 ; GCN-NOHSA-DAG: buffer_store_dwordx4
618 ; GCN-NOHSA-DAG: buffer_store_dwordx4
619 ; GCN-NOHSA-DAG: buffer_store_dwordx4
620 ; GCN-NOHSA-DAG: buffer_store_dwordx4
621 ; GCN-NOHSA-DAG: buffer_store_dwordx4
623 ; GCN-NOT: accvgpr
625 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
626 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
627 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
628 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
630 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
631 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
632 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
633 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
635 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
636 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
637 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
638 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
640 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
641 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
642 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
643 ; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
644 define amdgpu_kernel void @global_load_v32i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
645   %ld = load <32 x i32>, ptr addrspace(1) %in
646   store <32 x i32> %ld, ptr addrspace(1) %out
647   ret void
650 attributes #0 = { nounwind }