1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
4 ; FUNC-LABEL: {{^}}extract_vector_elt_v1i8:
5 ; GCN: buffer_load_ubyte
6 ; GCN: buffer_store_byte
7 define amdgpu_kernel void @extract_vector_elt_v1i8(i8 addrspace(1)* %out, <1 x i8> %foo) #0 {
8 %p0 = extractelement <1 x i8> %foo, i32 0
9 store i8 %p0, i8 addrspace(1)* %out
13 ; FUNC-LABEL: {{^}}extract_vector_elt_v2i8:
14 ; GCN: buffer_load_ubyte
15 ; GCN: buffer_load_ubyte
16 ; GCN: buffer_store_byte
17 ; GCN: buffer_store_byte
18 define amdgpu_kernel void @extract_vector_elt_v2i8(i8 addrspace(1)* %out, <2 x i8> %foo) #0 {
19 %p0 = extractelement <2 x i8> %foo, i32 0
20 %p1 = extractelement <2 x i8> %foo, i32 1
21 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
22 store i8 %p1, i8 addrspace(1)* %out
23 store i8 %p0, i8 addrspace(1)* %out1
27 ; FUNC-LABEL: {{^}}extract_vector_elt_v3i8:
28 ; GCN: buffer_load_ubyte
29 ; GCN: buffer_load_ubyte
30 ; GCN: buffer_store_byte
31 ; GCN: buffer_store_byte
32 define amdgpu_kernel void @extract_vector_elt_v3i8(i8 addrspace(1)* %out, <3 x i8> %foo) #0 {
33 %p0 = extractelement <3 x i8> %foo, i32 0
34 %p1 = extractelement <3 x i8> %foo, i32 2
35 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
36 store i8 %p1, i8 addrspace(1)* %out
37 store i8 %p0, i8 addrspace(1)* %out1
41 ; FUNC-LABEL: {{^}}extract_vector_elt_v4i8:
42 ; GCN: buffer_load_ubyte
43 ; GCN: buffer_load_ubyte
44 ; GCN: buffer_store_byte
45 ; GCN: buffer_store_byte
46 define amdgpu_kernel void @extract_vector_elt_v4i8(i8 addrspace(1)* %out, <4 x i8> %foo) #0 {
47 %p0 = extractelement <4 x i8> %foo, i32 0
48 %p1 = extractelement <4 x i8> %foo, i32 2
49 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
50 store i8 %p1, i8 addrspace(1)* %out
51 store i8 %p0, i8 addrspace(1)* %out1
55 ; FUNC-LABEL: {{^}}extract_vector_elt_v8i8:
56 ; GCN: buffer_load_ubyte
57 ; GCN: buffer_load_ubyte
58 ; GCN: buffer_store_byte
59 ; GCN: buffer_store_byte
60 define amdgpu_kernel void @extract_vector_elt_v8i8(i8 addrspace(1)* %out, <8 x i8> %foo) #0 {
61 %p0 = extractelement <8 x i8> %foo, i32 0
62 %p1 = extractelement <8 x i8> %foo, i32 2
63 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
64 store i8 %p1, i8 addrspace(1)* %out
65 store i8 %p0, i8 addrspace(1)* %out1
69 ; FUNC-LABEL: {{^}}extract_vector_elt_v16i8:
70 ; GCN: buffer_load_ubyte
71 ; GCN: buffer_load_ubyte
72 ; GCN: buffer_store_byte
73 ; GCN: buffer_store_byte
74 define amdgpu_kernel void @extract_vector_elt_v16i8(i8 addrspace(1)* %out, <16 x i8> %foo) #0 {
75 %p0 = extractelement <16 x i8> %foo, i32 0
76 %p1 = extractelement <16 x i8> %foo, i32 2
77 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
78 store i8 %p1, i8 addrspace(1)* %out
79 store i8 %p0, i8 addrspace(1)* %out1
83 ; FUNC-LABEL: {{^}}extract_vector_elt_v32i8:
84 ; GCN: buffer_load_ubyte
85 ; GCN: buffer_load_ubyte
86 ; GCN: buffer_store_byte
87 ; GCN: buffer_store_byte
88 define amdgpu_kernel void @extract_vector_elt_v32i8(i8 addrspace(1)* %out, <32 x i8> %foo) #0 {
89 %p0 = extractelement <32 x i8> %foo, i32 0
90 %p1 = extractelement <32 x i8> %foo, i32 2
91 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
92 store i8 %p1, i8 addrspace(1)* %out
93 store i8 %p0, i8 addrspace(1)* %out1
97 ; FUNC-LABEL: {{^}}extract_vector_elt_v64i8:
98 ; GCN: buffer_load_ubyte
99 ; GCN: buffer_load_ubyte
100 ; GCN: buffer_store_byte
101 ; GCN: buffer_store_byte
102 define amdgpu_kernel void @extract_vector_elt_v64i8(i8 addrspace(1)* %out, <64 x i8> %foo) #0 {
103 %p0 = extractelement <64 x i8> %foo, i32 0
104 %p1 = extractelement <64 x i8> %foo, i32 2
105 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
106 store i8 %p1, i8 addrspace(1)* %out
107 store i8 %p0, i8 addrspace(1)* %out1
111 ; FUNC-LABEL: {{^}}dynamic_extract_vector_elt_v3i8:
112 ; GCN: buffer_load_ubyte
113 ; GCN: buffer_load_ubyte
114 ; GCN: buffer_load_ubyte
116 ; GCN: buffer_store_byte
117 ; GCN: buffer_store_byte
118 ; GCN: buffer_store_byte
120 ; GCN: buffer_store_byte
121 ; GCN: buffer_load_ubyte
122 ; GCN: buffer_store_byte
123 define amdgpu_kernel void @dynamic_extract_vector_elt_v3i8(i8 addrspace(1)* %out, <3 x i8> %foo, i32 %idx) #0 {
124 %p0 = extractelement <3 x i8> %foo, i32 %idx
125 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
126 store i8 %p0, i8 addrspace(1)* %out
130 ; FUNC-LABEL: {{^}}dynamic_extract_vector_elt_v4i8:
131 ; GCN: buffer_load_ubyte
132 ; GCN: buffer_load_ubyte
133 ; GCN: buffer_load_ubyte
134 ; GCN: buffer_load_ubyte
136 ; GCN: buffer_store_byte
137 ; GCN: buffer_store_byte
138 ; GCN: buffer_store_byte
139 ; GCN: buffer_store_byte
141 ; GCN: buffer_store_byte
142 ; GCN: buffer_load_ubyte
143 ; GCN: buffer_store_byte
144 define amdgpu_kernel void @dynamic_extract_vector_elt_v4i8(i8 addrspace(1)* %out, <4 x i8> %foo, i32 %idx) #0 {
145 %p0 = extractelement <4 x i8> %foo, i32 %idx
146 %out1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
147 store i8 %p0, i8 addrspace(1)* %out
151 attributes #0 = { nounwind }