1 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5 ; FUNC-LABEL: {{^}}global_load_i1:
6 ; GCN: buffer_load_ubyte
7 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 1
8 ; GCN: buffer_store_byte
12 define amdgpu_kernel void @global_load_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
13 %load = load i1, i1 addrspace(1)* %in
14 store i1 %load, i1 addrspace(1)* %out
18 ; FUNC-LABEL: {{^}}global_load_v2i1:
19 define amdgpu_kernel void @global_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
20 %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
21 store <2 x i1> %load, <2 x i1> addrspace(1)* %out
25 ; FUNC-LABEL: {{^}}global_load_v3i1:
26 define amdgpu_kernel void @global_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
27 %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
28 store <3 x i1> %load, <3 x i1> addrspace(1)* %out
32 ; FUNC-LABEL: {{^}}global_load_v4i1:
33 define amdgpu_kernel void @global_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
34 %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
35 store <4 x i1> %load, <4 x i1> addrspace(1)* %out
39 ; FUNC-LABEL: {{^}}global_load_v8i1:
40 define amdgpu_kernel void @global_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
41 %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
42 store <8 x i1> %load, <8 x i1> addrspace(1)* %out
46 ; FUNC-LABEL: {{^}}global_load_v16i1:
47 define amdgpu_kernel void @global_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
48 %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
49 store <16 x i1> %load, <16 x i1> addrspace(1)* %out
53 ; FUNC-LABEL: {{^}}global_load_v32i1:
54 define amdgpu_kernel void @global_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
55 %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
56 store <32 x i1> %load, <32 x i1> addrspace(1)* %out
60 ; FUNC-LABEL: {{^}}global_load_v64i1:
61 define amdgpu_kernel void @global_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
62 %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
63 store <64 x i1> %load, <64 x i1> addrspace(1)* %out
67 ; FUNC-LABEL: {{^}}global_zextload_i1_to_i32:
68 ; GCN: buffer_load_ubyte
69 ; GCN: buffer_store_dword
70 define amdgpu_kernel void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
71 %a = load i1, i1 addrspace(1)* %in
72 %ext = zext i1 %a to i32
73 store i32 %ext, i32 addrspace(1)* %out
77 ; FUNC-LABEL: {{^}}global_sextload_i1_to_i32:
78 ; GCN: buffer_load_ubyte
79 ; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
80 ; GCN: buffer_store_dword
84 define amdgpu_kernel void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
85 %a = load i1, i1 addrspace(1)* %in
86 %ext = sext i1 %a to i32
87 store i32 %ext, i32 addrspace(1)* %out
91 ; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i32:
92 define amdgpu_kernel void @global_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
93 %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
94 %ext = zext <1 x i1> %load to <1 x i32>
95 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
99 ; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i32:
100 define amdgpu_kernel void @global_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
101 %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
102 %ext = sext <1 x i1> %load to <1 x i32>
103 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
107 ; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i32:
108 define amdgpu_kernel void @global_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
109 %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
110 %ext = zext <2 x i1> %load to <2 x i32>
111 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
115 ; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i32:
116 define amdgpu_kernel void @global_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
117 %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
118 %ext = sext <2 x i1> %load to <2 x i32>
119 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
123 ; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i32:
124 define amdgpu_kernel void @global_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
125 %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
126 %ext = zext <3 x i1> %load to <3 x i32>
127 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
131 ; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i32:
132 define amdgpu_kernel void @global_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
133 %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
134 %ext = sext <3 x i1> %load to <3 x i32>
135 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
139 ; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i32:
140 define amdgpu_kernel void @global_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
141 %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
142 %ext = zext <4 x i1> %load to <4 x i32>
143 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
147 ; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i32:
148 define amdgpu_kernel void @global_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
149 %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
150 %ext = sext <4 x i1> %load to <4 x i32>
151 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
155 ; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i32:
156 define amdgpu_kernel void @global_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
157 %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
158 %ext = zext <8 x i1> %load to <8 x i32>
159 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
163 ; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i32:
164 define amdgpu_kernel void @global_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
165 %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
166 %ext = sext <8 x i1> %load to <8 x i32>
167 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
171 ; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i32:
172 define amdgpu_kernel void @global_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
173 %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
174 %ext = zext <16 x i1> %load to <16 x i32>
175 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
179 ; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i32:
180 define amdgpu_kernel void @global_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
181 %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
182 %ext = sext <16 x i1> %load to <16 x i32>
183 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
187 ; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i32:
188 define amdgpu_kernel void @global_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
189 %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
190 %ext = zext <32 x i1> %load to <32 x i32>
191 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
195 ; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i32:
196 define amdgpu_kernel void @global_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
197 %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
198 %ext = sext <32 x i1> %load to <32 x i32>
199 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
203 ; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i32:
204 define amdgpu_kernel void @global_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
205 %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
206 %ext = zext <64 x i1> %load to <64 x i32>
207 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
211 ; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i32:
212 define amdgpu_kernel void @global_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
213 %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
214 %ext = sext <64 x i1> %load to <64 x i32>
215 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
219 ; FUNC-LABEL: {{^}}global_zextload_i1_to_i64:
220 ; GCN-DAG: buffer_load_ubyte [[LOAD:v[0-9]+]],
221 ; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
222 ; GCN-DAG: v_and_b32_e32 {{v[0-9]+}}, 1, [[LOAD]]{{$}}
223 ; GCN: buffer_store_dwordx2
224 define amdgpu_kernel void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
225 %a = load i1, i1 addrspace(1)* %in
226 %ext = zext i1 %a to i64
227 store i64 %ext, i64 addrspace(1)* %out
231 ; FUNC-LABEL: {{^}}global_sextload_i1_to_i64:
232 ; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]],
233 ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
234 ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
235 ; GCN: buffer_store_dwordx2
236 define amdgpu_kernel void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
237 %a = load i1, i1 addrspace(1)* %in
238 %ext = sext i1 %a to i64
239 store i64 %ext, i64 addrspace(1)* %out
243 ; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i64:
244 define amdgpu_kernel void @global_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
245 %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
246 %ext = zext <1 x i1> %load to <1 x i64>
247 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
251 ; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i64:
252 define amdgpu_kernel void @global_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
253 %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
254 %ext = sext <1 x i1> %load to <1 x i64>
255 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
259 ; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i64:
260 define amdgpu_kernel void @global_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
261 %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
262 %ext = zext <2 x i1> %load to <2 x i64>
263 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
267 ; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i64:
268 define amdgpu_kernel void @global_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
269 %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
270 %ext = sext <2 x i1> %load to <2 x i64>
271 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
275 ; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i64:
276 define amdgpu_kernel void @global_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
277 %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
278 %ext = zext <3 x i1> %load to <3 x i64>
279 store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
283 ; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i64:
284 define amdgpu_kernel void @global_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
285 %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
286 %ext = sext <3 x i1> %load to <3 x i64>
287 store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
291 ; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i64:
292 define amdgpu_kernel void @global_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
293 %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
294 %ext = zext <4 x i1> %load to <4 x i64>
295 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
299 ; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i64:
300 define amdgpu_kernel void @global_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
301 %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
302 %ext = sext <4 x i1> %load to <4 x i64>
303 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
307 ; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i64:
308 define amdgpu_kernel void @global_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
309 %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
310 %ext = zext <8 x i1> %load to <8 x i64>
311 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
315 ; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i64:
316 define amdgpu_kernel void @global_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
317 %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
318 %ext = sext <8 x i1> %load to <8 x i64>
319 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
323 ; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i64:
324 define amdgpu_kernel void @global_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
325 %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
326 %ext = zext <16 x i1> %load to <16 x i64>
327 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
331 ; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i64:
332 define amdgpu_kernel void @global_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
333 %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
334 %ext = sext <16 x i1> %load to <16 x i64>
335 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
339 ; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i64:
340 define amdgpu_kernel void @global_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
341 %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
342 %ext = zext <32 x i1> %load to <32 x i64>
343 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
347 ; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i64:
348 define amdgpu_kernel void @global_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
349 %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
350 %ext = sext <32 x i1> %load to <32 x i64>
351 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
355 ; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i64:
356 define amdgpu_kernel void @global_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
357 %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
358 %ext = zext <64 x i1> %load to <64 x i64>
359 store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
363 ; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i64:
364 define amdgpu_kernel void @global_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
365 %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
366 %ext = sext <64 x i1> %load to <64 x i64>
367 store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
371 attributes #0 = { nounwind }