1 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
2 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
3 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
4 ; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefixes=EG,FUNC %s
6 ; FUNC-LABEL: {{^}}local_load_i1:
11 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 1
14 ; EG: LDS_UBYTE_READ_RET
17 define amdgpu_kernel void @local_load_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
18 %load = load i1, i1 addrspace(3)* %in
19 store i1 %load, i1 addrspace(3)* %out
23 ; FUNC-LABEL: {{^}}local_load_v2i1:
24 ; SICIVI: s_mov_b32 m0
26 define amdgpu_kernel void @local_load_v2i1(<2 x i1> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
27 %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
28 store <2 x i1> %load, <2 x i1> addrspace(3)* %out
32 ; FUNC-LABEL: {{^}}local_load_v3i1:
33 ; SICIVI: s_mov_b32 m0
35 define amdgpu_kernel void @local_load_v3i1(<3 x i1> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
36 %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
37 store <3 x i1> %load, <3 x i1> addrspace(3)* %out
41 ; FUNC-LABEL: {{^}}local_load_v4i1:
42 ; SICIVI: s_mov_b32 m0
44 define amdgpu_kernel void @local_load_v4i1(<4 x i1> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
45 %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
46 store <4 x i1> %load, <4 x i1> addrspace(3)* %out
50 ; FUNC-LABEL: {{^}}local_load_v8i1:
51 ; SICIVI: s_mov_b32 m0
53 define amdgpu_kernel void @local_load_v8i1(<8 x i1> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
54 %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
55 store <8 x i1> %load, <8 x i1> addrspace(3)* %out
59 ; FUNC-LABEL: {{^}}local_load_v16i1:
60 ; SICIVI: s_mov_b32 m0
62 define amdgpu_kernel void @local_load_v16i1(<16 x i1> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
63 %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
64 store <16 x i1> %load, <16 x i1> addrspace(3)* %out
68 ; FUNC-LABEL: {{^}}local_load_v32i1:
69 ; SICIVI: s_mov_b32 m0
71 define amdgpu_kernel void @local_load_v32i1(<32 x i1> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
72 %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
73 store <32 x i1> %load, <32 x i1> addrspace(3)* %out
77 ; FUNC-LABEL: {{^}}local_load_v64i1:
78 ; SICIVI: s_mov_b32 m0
80 define amdgpu_kernel void @local_load_v64i1(<64 x i1> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
81 %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
82 store <64 x i1> %load, <64 x i1> addrspace(3)* %out
86 ; FUNC-LABEL: {{^}}local_zextload_i1_to_i32:
87 ; SICIVI: s_mov_b32 m0
92 define amdgpu_kernel void @local_zextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
93 %a = load i1, i1 addrspace(3)* %in
94 %ext = zext i1 %a to i32
95 store i32 %ext, i32 addrspace(3)* %out
99 ; FUNC-LABEL: {{^}}local_sextload_i1_to_i32:
100 ; SICIVI: s_mov_b32 m0
104 ; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
107 ; EG: LDS_UBYTE_READ_RET
109 define amdgpu_kernel void @local_sextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
110 %a = load i1, i1 addrspace(3)* %in
111 %ext = sext i1 %a to i32
112 store i32 %ext, i32 addrspace(3)* %out
116 ; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i32:
117 ; SICIVI: s_mov_b32 m0
119 define amdgpu_kernel void @local_zextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
120 %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
121 %ext = zext <1 x i1> %load to <1 x i32>
122 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
126 ; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i32:
127 ; SICIVI: s_mov_b32 m0
129 define amdgpu_kernel void @local_sextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
130 %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
131 %ext = sext <1 x i1> %load to <1 x i32>
132 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
136 ; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i32:
137 ; SICIVI: s_mov_b32 m0
139 define amdgpu_kernel void @local_zextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
140 %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
141 %ext = zext <2 x i1> %load to <2 x i32>
142 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
146 ; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i32:
147 ; SICIVI: s_mov_b32 m0
149 define amdgpu_kernel void @local_sextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
150 %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
151 %ext = sext <2 x i1> %load to <2 x i32>
152 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
156 ; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i32:
157 ; SICIVI: s_mov_b32 m0
159 define amdgpu_kernel void @local_zextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
160 %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
161 %ext = zext <3 x i1> %load to <3 x i32>
162 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
166 ; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i32:
167 ; SICIVI: s_mov_b32 m0
169 define amdgpu_kernel void @local_sextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
170 %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
171 %ext = sext <3 x i1> %load to <3 x i32>
172 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
176 ; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i32:
177 ; SICIVI: s_mov_b32 m0
179 define amdgpu_kernel void @local_zextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
180 %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
181 %ext = zext <4 x i1> %load to <4 x i32>
182 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
186 ; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i32:
187 ; SICIVI: s_mov_b32 m0
189 define amdgpu_kernel void @local_sextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
190 %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
191 %ext = sext <4 x i1> %load to <4 x i32>
192 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
196 ; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i32:
197 ; SICIVI: s_mov_b32 m0
199 define amdgpu_kernel void @local_zextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
200 %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
201 %ext = zext <8 x i1> %load to <8 x i32>
202 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
206 ; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i32:
207 ; SICIVI: s_mov_b32 m0
209 define amdgpu_kernel void @local_sextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
210 %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
211 %ext = sext <8 x i1> %load to <8 x i32>
212 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
216 ; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i32:
217 ; SICIVI: s_mov_b32 m0
219 define amdgpu_kernel void @local_zextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
220 %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
221 %ext = zext <16 x i1> %load to <16 x i32>
222 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
226 ; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i32:
227 ; SICIVI: s_mov_b32 m0
229 define amdgpu_kernel void @local_sextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
230 %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
231 %ext = sext <16 x i1> %load to <16 x i32>
232 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
236 ; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i32:
237 ; SICIVI: s_mov_b32 m0
239 define amdgpu_kernel void @local_zextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
240 %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
241 %ext = zext <32 x i1> %load to <32 x i32>
242 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
246 ; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i32:
247 ; SICIVI: s_mov_b32 m0
249 define amdgpu_kernel void @local_sextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
250 %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
251 %ext = sext <32 x i1> %load to <32 x i32>
252 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
256 ; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i32:
257 ; SICIVI: s_mov_b32 m0
259 define amdgpu_kernel void @local_zextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
260 %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
261 %ext = zext <64 x i1> %load to <64 x i32>
262 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
266 ; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i32:
267 ; SICIVI: s_mov_b32 m0
269 define amdgpu_kernel void @local_sextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
270 %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
271 %ext = sext <64 x i1> %load to <64 x i32>
272 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
276 ; FUNC-LABEL: {{^}}local_zextload_i1_to_i64:
277 ; SICIVI: s_mov_b32 m0
280 ; GCN-DAG: ds_read_u8 [[LOAD:v[0-9]+]],
281 ; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
283 define amdgpu_kernel void @local_zextload_i1_to_i64(i64 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
284 %a = load i1, i1 addrspace(3)* %in
285 %ext = zext i1 %a to i64
286 store i64 %ext, i64 addrspace(3)* %out
290 ; FUNC-LABEL: {{^}}local_sextload_i1_to_i64:
291 ; SICIVI: s_mov_b32 m0
294 ; GCN: ds_read_u8 [[LOAD:v[0-9]+]],
295 ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
296 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
298 define amdgpu_kernel void @local_sextload_i1_to_i64(i64 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
299 %a = load i1, i1 addrspace(3)* %in
300 %ext = sext i1 %a to i64
301 store i64 %ext, i64 addrspace(3)* %out
305 ; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i64:
306 ; SICIVI: s_mov_b32 m0
308 define amdgpu_kernel void @local_zextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
309 %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
310 %ext = zext <1 x i1> %load to <1 x i64>
311 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
315 ; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i64:
316 ; SICIVI: s_mov_b32 m0
318 define amdgpu_kernel void @local_sextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
319 %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
320 %ext = sext <1 x i1> %load to <1 x i64>
321 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
325 ; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i64:
326 ; SICIVI: s_mov_b32 m0
328 define amdgpu_kernel void @local_zextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
329 %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
330 %ext = zext <2 x i1> %load to <2 x i64>
331 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
335 ; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i64:
336 ; SICIVI: s_mov_b32 m0
338 define amdgpu_kernel void @local_sextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
339 %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
340 %ext = sext <2 x i1> %load to <2 x i64>
341 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
345 ; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i64:
346 ; SICIVI: s_mov_b32 m0
348 define amdgpu_kernel void @local_zextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
349 %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
350 %ext = zext <3 x i1> %load to <3 x i64>
351 store <3 x i64> %ext, <3 x i64> addrspace(3)* %out
355 ; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i64:
356 ; SICIVI: s_mov_b32 m0
358 define amdgpu_kernel void @local_sextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
359 %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
360 %ext = sext <3 x i1> %load to <3 x i64>
361 store <3 x i64> %ext, <3 x i64> addrspace(3)* %out
365 ; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i64:
366 ; SICIVI: s_mov_b32 m0
368 define amdgpu_kernel void @local_zextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
369 %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
370 %ext = zext <4 x i1> %load to <4 x i64>
371 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
375 ; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i64:
376 ; SICIVI: s_mov_b32 m0
378 define amdgpu_kernel void @local_sextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
379 %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
380 %ext = sext <4 x i1> %load to <4 x i64>
381 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
385 ; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i64:
386 ; SICIVI: s_mov_b32 m0
388 define amdgpu_kernel void @local_zextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
389 %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
390 %ext = zext <8 x i1> %load to <8 x i64>
391 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
395 ; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i64:
396 ; SICIVI: s_mov_b32 m0
398 define amdgpu_kernel void @local_sextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
399 %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
400 %ext = sext <8 x i1> %load to <8 x i64>
401 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
405 ; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i64:
406 ; SICIVI: s_mov_b32 m0
408 define amdgpu_kernel void @local_zextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
409 %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
410 %ext = zext <16 x i1> %load to <16 x i64>
411 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
415 ; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i64:
416 ; SICIVI: s_mov_b32 m0
418 define amdgpu_kernel void @local_sextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
419 %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
420 %ext = sext <16 x i1> %load to <16 x i64>
421 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
425 ; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i64:
426 ; SICIVI: s_mov_b32 m0
428 define amdgpu_kernel void @local_zextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
429 %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
430 %ext = zext <32 x i1> %load to <32 x i64>
431 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
435 ; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i64:
436 ; SICIVI: s_mov_b32 m0
438 define amdgpu_kernel void @local_sextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
439 %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
440 %ext = sext <32 x i1> %load to <32 x i64>
441 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
445 ; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i64:
446 ; SICIVI: s_mov_b32 m0
448 define amdgpu_kernel void @local_zextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
449 %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
450 %ext = zext <64 x i1> %load to <64 x i64>
451 store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
455 ; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i64:
456 ; SICIVI: s_mov_b32 m0
458 define amdgpu_kernel void @local_sextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
459 %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
460 %ext = sext <64 x i1> %load to <64 x i64>
461 store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
465 attributes #0 = { nounwind }