1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+sve2 | FileCheck %s
4 define {<vscale x 2 x half>, <vscale x 2 x half>} @vector_deinterleave_nxv2f16_nxv4f16(<vscale x 4 x half> %vec) {
5 ; CHECK-LABEL: vector_deinterleave_nxv2f16_nxv4f16:
7 ; CHECK-NEXT: uunpkhi z1.d, z0.s
8 ; CHECK-NEXT: uunpklo z2.d, z0.s
9 ; CHECK-NEXT: uzp1 z0.d, z2.d, z1.d
10 ; CHECK-NEXT: uzp2 z1.d, z2.d, z1.d
12 %retval = call {<vscale x 2 x half>, <vscale x 2 x half>} @llvm.experimental.vector.deinterleave2.nxv4f16(<vscale x 4 x half> %vec)
13 ret {<vscale x 2 x half>, <vscale x 2 x half>} %retval
16 define {<vscale x 4 x half>, <vscale x 4 x half>} @vector_deinterleave_nxv4f16_nxv8f16(<vscale x 8 x half> %vec) {
17 ; CHECK-LABEL: vector_deinterleave_nxv4f16_nxv8f16:
19 ; CHECK-NEXT: uunpkhi z1.s, z0.h
20 ; CHECK-NEXT: uunpklo z2.s, z0.h
21 ; CHECK-NEXT: uzp1 z0.s, z2.s, z1.s
22 ; CHECK-NEXT: uzp2 z1.s, z2.s, z1.s
24 %retval = call {<vscale x 4 x half>, <vscale x 4 x half>} @llvm.experimental.vector.deinterleave2.nxv8f16(<vscale x 8 x half> %vec)
25 ret {<vscale x 4 x half>, <vscale x 4 x half>} %retval
28 define {<vscale x 8 x half>, <vscale x 8 x half>} @vector_deinterleave_nxv8f16_nxv16f16(<vscale x 16 x half> %vec) {
29 ; CHECK-LABEL: vector_deinterleave_nxv8f16_nxv16f16:
31 ; CHECK-NEXT: uzp1 z2.h, z0.h, z1.h
32 ; CHECK-NEXT: uzp2 z1.h, z0.h, z1.h
33 ; CHECK-NEXT: mov z0.d, z2.d
35 %retval = call {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.experimental.vector.deinterleave2.nxv16f16(<vscale x 16 x half> %vec)
36 ret {<vscale x 8 x half>, <vscale x 8 x half>} %retval
39 define {<vscale x 2 x float>, <vscale x 2 x float>} @vector_deinterleave_nxv2f32_nxv4f32(<vscale x 4 x float> %vec) {
40 ; CHECK-LABEL: vector_deinterleave_nxv2f32_nxv4f32:
42 ; CHECK-NEXT: uunpkhi z1.d, z0.s
43 ; CHECK-NEXT: uunpklo z2.d, z0.s
44 ; CHECK-NEXT: uzp1 z0.d, z2.d, z1.d
45 ; CHECK-NEXT: uzp2 z1.d, z2.d, z1.d
47 %retval = call {<vscale x 2 x float>, <vscale x 2 x float>} @llvm.experimental.vector.deinterleave2.nxv4f32(<vscale x 4 x float> %vec)
48 ret {<vscale x 2 x float>, <vscale x 2 x float>} %retval
51 define {<vscale x 4 x float>, <vscale x 4 x float>} @vector_deinterleave_nxv4f32_nxv8f32(<vscale x 8 x float> %vec) {
52 ; CHECK-LABEL: vector_deinterleave_nxv4f32_nxv8f32:
54 ; CHECK-NEXT: uzp1 z2.s, z0.s, z1.s
55 ; CHECK-NEXT: uzp2 z1.s, z0.s, z1.s
56 ; CHECK-NEXT: mov z0.d, z2.d
58 %retval = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.experimental.vector.deinterleave2.nxv8f32(<vscale x 8 x float> %vec)
59 ret {<vscale x 4 x float>, <vscale x 4 x float>} %retval
62 define {<vscale x 2 x double>, <vscale x 2 x double>} @vector_deinterleave_nxv2f64_nxv4f64(<vscale x 4 x double> %vec) {
63 ; CHECK-LABEL: vector_deinterleave_nxv2f64_nxv4f64:
65 ; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
66 ; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
67 ; CHECK-NEXT: mov z0.d, z2.d
69 %retval = call {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.experimental.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %vec)
70 ret {<vscale x 2 x double>, <vscale x 2 x double>} %retval
75 define {<vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_nxv16i8_nxv32i8(<vscale x 32 x i8> %vec) {
76 ; CHECK-LABEL: vector_deinterleave_nxv16i8_nxv32i8:
78 ; CHECK-NEXT: uzp1 z2.b, z0.b, z1.b
79 ; CHECK-NEXT: uzp2 z1.b, z0.b, z1.b
80 ; CHECK-NEXT: mov z0.d, z2.d
82 %retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.experimental.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %vec)
83 ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %retval
86 define {<vscale x 8 x i16>, <vscale x 8 x i16>} @vector_deinterleave_nxv8i16_nxv16i16(<vscale x 16 x i16> %vec) {
87 ; CHECK-LABEL: vector_deinterleave_nxv8i16_nxv16i16:
89 ; CHECK-NEXT: uzp1 z2.h, z0.h, z1.h
90 ; CHECK-NEXT: uzp2 z1.h, z0.h, z1.h
91 ; CHECK-NEXT: mov z0.d, z2.d
93 %retval = call {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.experimental.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> %vec)
94 ret {<vscale x 8 x i16>, <vscale x 8 x i16>} %retval
97 define {<vscale x 4 x i32>, <vscale x 4 x i32>} @vector_deinterleave_nxv4i32_nxvv8i32(<vscale x 8 x i32> %vec) {
98 ; CHECK-LABEL: vector_deinterleave_nxv4i32_nxvv8i32:
100 ; CHECK-NEXT: uzp1 z2.s, z0.s, z1.s
101 ; CHECK-NEXT: uzp2 z1.s, z0.s, z1.s
102 ; CHECK-NEXT: mov z0.d, z2.d
104 %retval = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %vec)
105 ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %retval
108 define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv4i64(<vscale x 4 x i64> %vec) {
109 ; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv4i64:
111 ; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
112 ; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
113 ; CHECK-NEXT: mov z0.d, z2.d
115 %retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.experimental.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %vec)
116 ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
120 define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_nxv16i1_nxv32i1(<vscale x 32 x i1> %vec) {
121 ; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1:
123 ; CHECK-NEXT: uzp1 p2.b, p0.b, p1.b
124 ; CHECK-NEXT: uzp2 p1.b, p0.b, p1.b
125 ; CHECK-NEXT: mov p0.b, p2.b
127 %retval = call {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.experimental.vector.deinterleave2.nxv32i1(<vscale x 32 x i1> %vec)
128 ret {<vscale x 16 x i1>, <vscale x 16 x i1>} %retval
131 define {<vscale x 8 x i1>, <vscale x 8 x i1>} @vector_deinterleave_nxv8i1_nxv16i1(<vscale x 16 x i1> %vec) {
132 ; CHECK-LABEL: vector_deinterleave_nxv8i1_nxv16i1:
134 ; CHECK-NEXT: punpkhi p1.h, p0.b
135 ; CHECK-NEXT: punpklo p2.h, p0.b
136 ; CHECK-NEXT: uzp1 p0.h, p2.h, p1.h
137 ; CHECK-NEXT: uzp2 p1.h, p2.h, p1.h
139 %retval = call {<vscale x 8 x i1>, <vscale x 8 x i1>} @llvm.experimental.vector.deinterleave2.nxv16i1(<vscale x 16 x i1> %vec)
140 ret {<vscale x 8 x i1>, <vscale x 8 x i1>} %retval
143 define {<vscale x 4 x i1>, <vscale x 4 x i1>} @vector_deinterleave_nxv4i1_nxv8i1(<vscale x 8 x i1> %vec) {
144 ; CHECK-LABEL: vector_deinterleave_nxv4i1_nxv8i1:
146 ; CHECK-NEXT: punpkhi p1.h, p0.b
147 ; CHECK-NEXT: punpklo p2.h, p0.b
148 ; CHECK-NEXT: uzp1 p0.s, p2.s, p1.s
149 ; CHECK-NEXT: uzp2 p1.s, p2.s, p1.s
151 %retval = call {<vscale x 4 x i1>, <vscale x 4 x i1>} @llvm.experimental.vector.deinterleave2.nxv8i1(<vscale x 8 x i1> %vec)
152 ret {<vscale x 4 x i1>, <vscale x 4 x i1>} %retval
155 define {<vscale x 2 x i1>, <vscale x 2 x i1>} @vector_deinterleave_nxv2i1_nxv4i1(<vscale x 4 x i1> %vec) {
156 ; CHECK-LABEL: vector_deinterleave_nxv2i1_nxv4i1:
158 ; CHECK-NEXT: punpkhi p1.h, p0.b
159 ; CHECK-NEXT: punpklo p2.h, p0.b
160 ; CHECK-NEXT: uzp1 p0.d, p2.d, p1.d
161 ; CHECK-NEXT: uzp2 p1.d, p2.d, p1.d
163 %retval = call {<vscale x 2 x i1>, <vscale x 2 x i1>} @llvm.experimental.vector.deinterleave2.nxv4i1(<vscale x 4 x i1> %vec)
164 ret {<vscale x 2 x i1>, <vscale x 2 x i1>} %retval
168 ; Split illegal types
170 define {<vscale x 4 x i64>, <vscale x 4 x i64>} @vector_deinterleave_nxv4i64_nxv8i64(<vscale x 8 x i64> %vec) {
171 ; CHECK-LABEL: vector_deinterleave_nxv4i64_nxv8i64:
173 ; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d
174 ; CHECK-NEXT: uzp1 z5.d, z0.d, z1.d
175 ; CHECK-NEXT: uzp2 z6.d, z0.d, z1.d
176 ; CHECK-NEXT: uzp2 z3.d, z2.d, z3.d
177 ; CHECK-NEXT: mov z0.d, z5.d
178 ; CHECK-NEXT: mov z1.d, z4.d
179 ; CHECK-NEXT: mov z2.d, z6.d
181 %retval = call {<vscale x 4 x i64>, <vscale x 4 x i64>} @llvm.experimental.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %vec)
182 ret {<vscale x 4 x i64>, <vscale x 4 x i64>} %retval
185 define {<vscale x 8 x i64>, <vscale x 8 x i64>} @vector_deinterleave_nxv8i64_nxv16i64(<vscale x 16 x i64> %vec) {
186 ; CHECK-LABEL: vector_deinterleave_nxv8i64_nxv16i64:
188 ; CHECK-NEXT: uzp1 z24.d, z2.d, z3.d
189 ; CHECK-NEXT: uzp1 z25.d, z0.d, z1.d
190 ; CHECK-NEXT: uzp1 z26.d, z4.d, z5.d
191 ; CHECK-NEXT: uzp1 z27.d, z6.d, z7.d
192 ; CHECK-NEXT: uzp2 z28.d, z0.d, z1.d
193 ; CHECK-NEXT: uzp2 z29.d, z2.d, z3.d
194 ; CHECK-NEXT: uzp2 z30.d, z4.d, z5.d
195 ; CHECK-NEXT: uzp2 z7.d, z6.d, z7.d
196 ; CHECK-NEXT: mov z0.d, z25.d
197 ; CHECK-NEXT: mov z1.d, z24.d
198 ; CHECK-NEXT: mov z2.d, z26.d
199 ; CHECK-NEXT: mov z3.d, z27.d
200 ; CHECK-NEXT: mov z4.d, z28.d
201 ; CHECK-NEXT: mov z5.d, z29.d
202 ; CHECK-NEXT: mov z6.d, z30.d
204 %retval = call {<vscale x 8 x i64>, <vscale x 8 x i64>} @llvm.experimental.vector.deinterleave2.nxv16i64(<vscale x 16 x i64> %vec)
205 ret {<vscale x 8 x i64>, <vscale x 8 x i64>} %retval
209 ; Promote illegal type size
211 define {<vscale x 8 x i8>, <vscale x 8 x i8>} @vector_deinterleave_nxv8i8_nxv16i8(<vscale x 16 x i8> %vec) {
212 ; CHECK-LABEL: vector_deinterleave_nxv8i8_nxv16i8:
214 ; CHECK-NEXT: uunpkhi z1.h, z0.b
215 ; CHECK-NEXT: uunpklo z2.h, z0.b
216 ; CHECK-NEXT: uzp1 z0.h, z2.h, z1.h
217 ; CHECK-NEXT: uzp2 z1.h, z2.h, z1.h
219 %retval = call {<vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.experimental.vector.deinterleave2.nxv16i8(<vscale x 16 x i8> %vec)
220 ret {<vscale x 8 x i8>, <vscale x 8 x i8>} %retval
223 define {<vscale x 4 x i16>, <vscale x 4 x i16>} @vector_deinterleave_nxv4i16_nxv8i16(<vscale x 8 x i16> %vec) {
224 ; CHECK-LABEL: vector_deinterleave_nxv4i16_nxv8i16:
226 ; CHECK-NEXT: uunpkhi z1.s, z0.h
227 ; CHECK-NEXT: uunpklo z2.s, z0.h
228 ; CHECK-NEXT: uzp1 z0.s, z2.s, z1.s
229 ; CHECK-NEXT: uzp2 z1.s, z2.s, z1.s
231 %retval = call {<vscale x 4 x i16>, <vscale x 4 x i16>} @llvm.experimental.vector.deinterleave2.nxv8i16(<vscale x 8 x i16> %vec)
232 ret {<vscale x 4 x i16>, <vscale x 4 x i16>} %retval
235 define {<vscale x 2 x i32>, <vscale x 2 x i32>} @vector_deinterleave_nxv2i32_nxv4i32(<vscale x 4 x i32> %vec) {
236 ; CHECK-LABEL: vector_deinterleave_nxv2i32_nxv4i32:
238 ; CHECK-NEXT: uunpkhi z1.d, z0.s
239 ; CHECK-NEXT: uunpklo z2.d, z0.s
240 ; CHECK-NEXT: uzp1 z0.d, z2.d, z1.d
241 ; CHECK-NEXT: uzp2 z1.d, z2.d, z1.d
243 %retval = call {<vscale x 2 x i32>,<vscale x 2 x i32>} @llvm.experimental.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %vec)
244 ret {<vscale x 2 x i32>, <vscale x 2 x i32>} %retval
248 ; Floating declarations
249 declare {<vscale x 2 x half>,<vscale x 2 x half>} @llvm.experimental.vector.deinterleave2.nxv4f16(<vscale x 4 x half>)
250 declare {<vscale x 4 x half>, <vscale x 4 x half>} @llvm.experimental.vector.deinterleave2.nxv8f16(<vscale x 8 x half>)
251 declare {<vscale x 2 x float>, <vscale x 2 x float>} @llvm.experimental.vector.deinterleave2.nxv4f32(<vscale x 4 x float>)
252 declare {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.experimental.vector.deinterleave2.nxv16f16(<vscale x 16 x half>)
253 declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.experimental.vector.deinterleave2.nxv8f32(<vscale x 8 x float>)
254 declare {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.experimental.vector.deinterleave2.nxv4f64(<vscale x 4 x double>)
256 ; Integer declarations
257 declare {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.experimental.vector.deinterleave2.nxv32i8(<vscale x 32 x i8>)
258 declare {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.experimental.vector.deinterleave2.nxv16i16(<vscale x 16 x i16>)
259 declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32>)
260 declare {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.experimental.vector.deinterleave2.nxv4i64(<vscale x 4 x i64>)
262 ; Predicated declarations
263 declare {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.experimental.vector.deinterleave2.nxv32i1(<vscale x 32 x i1>)
264 declare {<vscale x 8 x i1>, <vscale x 8 x i1>} @llvm.experimental.vector.deinterleave2.nxv16i1(<vscale x 16 x i1>)
265 declare {<vscale x 4 x i1>, <vscale x 4 x i1>} @llvm.experimental.vector.deinterleave2.nxv8i1(<vscale x 8 x i1>)
266 declare {<vscale x 2 x i1>, <vscale x 2 x i1>} @llvm.experimental.vector.deinterleave2.nxv4i1(<vscale x 4 x i1>)
269 declare {<vscale x 4 x i64>, <vscale x 4 x i64>} @llvm.experimental.vector.deinterleave2.nxv8i64(<vscale x 8 x i64>)
270 declare {<vscale x 8 x i64>, <vscale x 8 x i64>} @llvm.experimental.vector.deinterleave2.nxv16i64(<vscale x 16 x i64>)
272 declare {<vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.experimental.vector.deinterleave2.nxv16i8(<vscale x 16 x i8>)
273 declare {<vscale x 4 x i16>, <vscale x 4 x i16>} @llvm.experimental.vector.deinterleave2.nxv8i16(<vscale x 8 x i16>)
274 declare {<vscale x 2 x i32>, <vscale x 2 x i32>} @llvm.experimental.vector.deinterleave2.nxv4i32(<vscale x 4 x i32>)