1 ; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
7 define <vscale x 32 x i8> @ret_svint8x2_t(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) #0 {
8 ; CHECK-LABEL: ret_svint8x2_t
9 ; CHECK: mov z0.d, z1.d
10 ; CHECK-NEXT: mov z1.d, z2.d
12 %tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2)
13 ret <vscale x 32 x i8> %tuple
16 define void @call_svint8x2_t(<vscale x 16 x i8> %dummy_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %dummy_z2, <vscale x 16 x i8> %z3) #0 {
17 ; CHECK-LABEL: call_svint8x2_t
18 ; CHECK: mov z0.d, z1.d
19 ; CHECK-NEXT: mov z1.d, z3.d
20 ; CHECK-NEXT: bl callee_svint8x2_t
21 %tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z3)
22 call void @callee_svint8x2_t(<vscale x 32 x i8> %tuple)
30 define <vscale x 16 x i16> @ret_svint16x2_t(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) #0 {
31 ; CHECK-LABEL: ret_svint16x2_t
32 ; CHECK: mov z0.d, z1.d
33 ; CHECK-NEXT: mov z1.d, z2.d
35 %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2)
36 ret <vscale x 16 x i16> %tuple
39 define void @call_svint16x2_t(<vscale x 8 x i16> %dummy_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %dummy_z2, <vscale x 8 x i16> %z3) #0 {
40 ; CHECK-LABEL: call_svint16x2_t
41 ; CHECK: mov z0.d, z1.d
42 ; CHECK-NEXT: mov z1.d, z3.d
43 ; CHECK-NEXT: bl callee_svint16x2_t
44 %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z3)
45 call void @callee_svint16x2_t(<vscale x 16 x i16> %tuple)
53 define <vscale x 8 x i32> @ret_svint32x2_t(<vscale x 4 x i32> %unused_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) #0 {
54 ; CHECK-LABEL: ret_svint32x2_t
55 ; CHECK: mov z0.d, z1.d
56 ; CHECK-NEXT: mov z1.d, z2.d
58 %tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
59 ret <vscale x 8 x i32> %tuple
62 define void @call_svint32x2_t(<vscale x 4 x i32> %dummy_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %dummy_z2, <vscale x 4 x i32> %z3) #0 {
63 ; CHECK-LABEL: call_svint32x2_t
64 ; CHECK: mov z0.d, z1.d
65 ; CHECK-NEXT: mov z1.d, z3.d
66 ; CHECK-NEXT: bl callee_svint32x2_t
67 %tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z3)
68 call void @callee_svint32x2_t(<vscale x 8 x i32> %tuple)
76 define <vscale x 4 x i64> @ret_svint64x2_t(<vscale x 2 x i64> %unused_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) #0 {
77 ; CHECK-LABEL: ret_svint64x2_t
78 ; CHECK: mov z0.d, z1.d
79 ; CHECK-NEXT: mov z1.d, z2.d
81 %tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2)
82 ret <vscale x 4 x i64> %tuple
85 define void @call_svint64x2_t(<vscale x 2 x i64> %dummy_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %dummy_z2, <vscale x 2 x i64> %z3) #0 {
86 ; CHECK-LABEL: call_svint64x2_t
87 ; CHECK: mov z0.d, z1.d
88 ; CHECK-NEXT: mov z1.d, z3.d
89 ; CHECK-NEXT: bl callee_svint64x2_t
90 %tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z3)
91 call void @callee_svint64x2_t(<vscale x 4 x i64> %tuple)
99 define <vscale x 8 x float> @ret_svfloatx2_t(<vscale x 4 x float> %unused_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) #0 {
100 ; CHECK-LABEL: ret_svfloatx2_t
101 ; CHECK: mov z0.d, z1.d
102 ; CHECK-NEXT: mov z1.d, z2.d
104 %tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2)
105 ret <vscale x 8 x float> %tuple
108 define void @call_svfloatx2_t(<vscale x 4 x float> %dummy_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %dummy_z2, <vscale x 4 x float> %z3) #0 {
109 ; CHECK-LABEL: call_svfloatx2_t
110 ; CHECK: mov z0.d, z1.d
111 ; CHECK-NEXT: mov z1.d, z3.d
112 ; CHECK-NEXT: bl callee_svfloatx2_t
113 %tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z3)
114 call void @callee_svfloatx2_t(<vscale x 8 x float> %tuple)
122 define <vscale x 4 x double> @ret_svdoublex2_t(<vscale x 2 x double> %unused_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) #0 {
123 ; CHECK-LABEL: ret_svdoublex2_t
124 ; CHECK: mov z0.d, z1.d
125 ; CHECK-NEXT: mov z1.d, z2.d
127 %tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2)
128 ret <vscale x 4 x double> %tuple
131 define void @call_svdoublex2_t(<vscale x 2 x double> %dummy_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %dummy_z2, <vscale x 2 x double> %z3) #0 {
132 ; CHECK-LABEL: call_svdoublex2_t
133 ; CHECK: mov z0.d, z1.d
134 ; CHECK-NEXT: mov z1.d, z3.d
135 ; CHECK-NEXT: bl callee_svdoublex2_t
136 %tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z3)
137 call void @callee_svdoublex2_t(<vscale x 4 x double> %tuple)
145 define <vscale x 48 x i8> @ret_svint8x3_t(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) #0 {
146 ; CHECK-LABEL: ret_svint8x3_t
147 ; CHECK: mov z0.d, z1.d
148 ; CHECK-NEXT: mov z1.d, z2.d
149 ; CHECK-NEXT: mov z2.d, z3.d
151 %tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3)
152 ret <vscale x 48 x i8> %tuple
155 define void @call_svint8x3_t(<vscale x 16 x i8> %dummy_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %dummy_z3, <vscale x 16 x i8> %z4) #0 {
156 ; CHECK-LABEL: call_svint8x3_t
157 ; CHECK: mov z0.d, z1.d
158 ; CHECK-NEXT: mov z1.d, z2.d
159 ; CHECK-NEXT: mov z2.d, z4.d
160 ; CHECK-NEXT: bl callee_svint8x3_t
161 %tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z4)
162 call void @callee_svint8x3_t(<vscale x 48 x i8> %tuple)
170 define <vscale x 24 x i16> @ret_svint16x3_t(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) #0 {
171 ; CHECK-LABEL: ret_svint16x3_t
172 ; CHECK: mov z0.d, z1.d
173 ; CHECK-NEXT: mov z1.d, z2.d
174 ; CHECK-NEXT: mov z2.d, z3.d
176 %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3)
177 ret <vscale x 24 x i16> %tuple
180 define void @call_svint16x3_t(<vscale x 8 x i16> %dummy_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %dummy_z3, <vscale x 8 x i16> %z4) #0 {
181 ; CHECK-LABEL: call_svint16x3_t
182 ; CHECK: mov z0.d, z1.d
183 ; CHECK-NEXT: mov z1.d, z2.d
184 ; CHECK-NEXT: mov z2.d, z4.d
185 ; CHECK-NEXT: bl callee_svint16x3_t
186 %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z4)
187 call void @callee_svint16x3_t(<vscale x 24 x i16> %tuple)
195 define <vscale x 12 x i32> @ret_svint32x3_t(<vscale x 4 x i32> %unused_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) #0 {
196 ; CHECK-LABEL: ret_svint32x3_t
197 ; CHECK: mov z0.d, z1.d
198 ; CHECK-NEXT: mov z1.d, z2.d
199 ; CHECK-NEXT: mov z2.d, z3.d
201 %tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
202 ret <vscale x 12 x i32> %tuple
205 define void @call_svint32x3_t(<vscale x 4 x i32> %dummy_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %dummy_z3, <vscale x 4 x i32> %z4) #0 {
206 ; CHECK-LABEL: call_svint32x3_t
207 ; CHECK: mov z0.d, z1.d
208 ; CHECK-NEXT: mov z1.d, z2.d
209 ; CHECK-NEXT: mov z2.d, z4.d
210 ; CHECK-NEXT: bl callee_svint32x3_t
211 %tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z4)
212 call void @callee_svint32x3_t(<vscale x 12 x i32> %tuple)
220 define <vscale x 6 x i64> @ret_svint64x3_t(<vscale x 2 x i64> %unused_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) #0 {
221 ; CHECK-LABEL: ret_svint64x3_t
222 ; CHECK: mov z0.d, z1.d
223 ; CHECK-NEXT: mov z1.d, z2.d
224 ; CHECK-NEXT: mov z2.d, z3.d
226 %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3)
227 ret <vscale x 6 x i64> %tuple
230 define void @call_svint64x3_t(<vscale x 2 x i64> %dummy_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %dummy_z3, <vscale x 2 x i64> %z4) #0 {
231 ; CHECK-LABEL: call_svint64x3_t
232 ; CHECK: mov z0.d, z1.d
233 ; CHECK-NEXT: mov z1.d, z2.d
234 ; CHECK-NEXT: mov z2.d, z4.d
235 ; CHECK-NEXT: bl callee_svint64x3_t
236 %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z4)
237 call void @callee_svint64x3_t(<vscale x 6 x i64> %tuple)
245 define <vscale x 12 x float> @ret_svfloatx3_t(<vscale x 4 x float> %unused_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) #0 {
246 ; CHECK-LABEL: ret_svfloatx3_t
247 ; CHECK: mov z0.d, z1.d
248 ; CHECK-NEXT: mov z1.d, z2.d
249 ; CHECK-NEXT: mov z2.d, z3.d
251 %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3)
252 ret <vscale x 12 x float> %tuple
255 define void @call_svfloatx3_t(<vscale x 4 x float> %dummy_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %dummy_z3, <vscale x 4 x float> %z4) #0 {
256 ; CHECK-LABEL: call_svfloatx3_t
257 ; CHECK: mov z0.d, z1.d
258 ; CHECK-NEXT: mov z1.d, z2.d
259 ; CHECK-NEXT: mov z2.d, z4.d
260 ; CHECK-NEXT: bl callee_svfloatx3_t
261 %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z4)
262 call void @callee_svfloatx3_t(<vscale x 12 x float> %tuple)
270 define <vscale x 6 x double> @ret_svdoublex3_t(<vscale x 2 x double> %unused_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) #0 {
271 ; CHECK-LABEL: ret_svdoublex3_t
272 ; CHECK: mov z0.d, z1.d
273 ; CHECK-NEXT: mov z1.d, z2.d
274 ; CHECK-NEXT: mov z2.d, z3.d
276 %tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3)
277 ret <vscale x 6 x double> %tuple
280 define void @call_svdoublex3_t(<vscale x 2 x double> %dummy_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %dummy_z3, <vscale x 2 x double> %z4) #0 {
281 ; CHECK-LABEL: call_svdoublex3_t
282 ; CHECK: mov z0.d, z1.d
283 ; CHECK-NEXT: mov z1.d, z2.d
284 ; CHECK-NEXT: mov z2.d, z4.d
285 ; CHECK-NEXT: bl callee_svdoublex3_t
286 %tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z4)
287 call void @callee_svdoublex3_t(<vscale x 6 x double> %tuple)
295 define <vscale x 64 x i8> @ret_svint8x4_t(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3, <vscale x 16 x i8> %z4) #0 {
296 ; CHECK-LABEL: ret_svint8x4_t
297 ; CHECK: mov z0.d, z1.d
298 ; CHECK-NEXT: mov z1.d, z2.d
299 ; CHECK-NEXT: mov z2.d, z3.d
300 ; CHECK-NEXT: mov z3.d, z4.d
302 %tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3, <vscale x 16 x i8> %z4)
303 ret <vscale x 64 x i8> %tuple
306 define void @call_svint8x4_t(<vscale x 16 x i8> %dummy_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %dummy_z3, <vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5) #0 {
307 ; CHECK-LABEL: call_svint8x4_t
308 ; CHECK: mov z3.d, z5.d
309 ; CHECK-NEXT: mov z0.d, z1.d
310 ; CHECK-NEXT: mov z1.d, z2.d
311 ; CHECK-NEXT: mov z2.d, z4.d
312 ; CHECK-NEXT: bl callee_svint8x4_t
313 %tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5)
314 call void @callee_svint8x4_t(<vscale x 64 x i8> %tuple)
322 define <vscale x 32 x i16> @ret_svint16x4_t(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3, <vscale x 8 x i16> %z4) #0 {
323 ; CHECK-LABEL: ret_svint16x4_t
324 ; CHECK: mov z0.d, z1.d
325 ; CHECK-NEXT: mov z1.d, z2.d
326 ; CHECK-NEXT: mov z2.d, z3.d
327 ; CHECK-NEXT: mov z3.d, z4.d
329 %tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3, <vscale x 8 x i16> %z4)
330 ret <vscale x 32 x i16> %tuple
333 define void @call_svint16x4_t(<vscale x 8 x i16> %dummy_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %dummy_z3, <vscale x 8 x i16> %z4, <vscale x 8 x i16> %z5) #0 {
334 ; CHECK-LABEL: call_svint16x4_t
335 ; CHECK: mov z3.d, z5.d
336 ; CHECK-NEXT: mov z0.d, z1.d
337 ; CHECK-NEXT: mov z1.d, z2.d
338 ; CHECK-NEXT: mov z2.d, z4.d
339 ; CHECK-NEXT: bl callee_svint16x4_t
340 %tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z4, <vscale x 8 x i16> %z5)
341 call void @callee_svint16x4_t(<vscale x 32 x i16> %tuple)
349 define <vscale x 16 x i32> @ret_svint32x4_t(<vscale x 4 x i32> %unused_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4) #0 {
350 ; CHECK-LABEL: ret_svint32x4_t
351 ; CHECK: mov z0.d, z1.d
352 ; CHECK-NEXT: mov z1.d, z2.d
353 ; CHECK-NEXT: mov z2.d, z3.d
354 ; CHECK-NEXT: mov z3.d, z4.d
356 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4)
357 ret <vscale x 16 x i32> %tuple
360 define void @call_svint32x4_t(<vscale x 4 x i32> %dummy_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %dummy_z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
361 ; CHECK-LABEL: call_svint32x4_t
362 ; CHECK: mov z3.d, z5.d
363 ; CHECK-NEXT: mov z0.d, z1.d
364 ; CHECK-NEXT: mov z1.d, z2.d
365 ; CHECK-NEXT: mov z2.d, z4.d
366 ; CHECK-NEXT: bl callee_svint32x4_t
367 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5)
368 call void @callee_svint32x4_t(<vscale x 16 x i32> %tuple)
376 define <vscale x 8 x i64> @ret_svint64x4_t(<vscale x 2 x i64> %unused_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3, <vscale x 2 x i64> %z4) #0 {
377 ; CHECK-LABEL: ret_svint64x4_t
378 ; CHECK: mov z0.d, z1.d
379 ; CHECK-NEXT: mov z1.d, z2.d
380 ; CHECK-NEXT: mov z2.d, z3.d
381 ; CHECK-NEXT: mov z3.d, z4.d
383 %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3, <vscale x 2 x i64> %z4)
384 ret <vscale x 8 x i64> %tuple
387 define void @call_svint64x4_t(<vscale x 2 x i64> %dummy_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %dummy_z3, <vscale x 2 x i64> %z4, <vscale x 2 x i64> %z5) #0 {
388 ; CHECK-LABEL: call_svint64x4_t
389 ; CHECK: mov z3.d, z5.d
390 ; CHECK-NEXT: mov z0.d, z1.d
391 ; CHECK-NEXT: mov z1.d, z2.d
392 ; CHECK-NEXT: mov z2.d, z4.d
393 ; CHECK-NEXT: bl callee_svint64x4_t
394 %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z4, <vscale x 2 x i64> %z5)
395 call void @callee_svint64x4_t(<vscale x 8 x i64> %tuple)
403 define <vscale x 16 x float> @ret_svfloatx4_t(<vscale x 4 x float> %unused_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3, <vscale x 4 x float> %z4) #0 {
404 ; CHECK-LABEL: ret_svfloatx4_t
405 ; CHECK: mov z0.d, z1.d
406 ; CHECK-NEXT: mov z1.d, z2.d
407 ; CHECK-NEXT: mov z2.d, z3.d
408 ; CHECK-NEXT: mov z3.d, z4.d
410 %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3, <vscale x 4 x float> %z4)
411 ret <vscale x 16 x float> %tuple
414 define void @call_svfloatx4_t(<vscale x 4 x float> %dummy_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %dummy_z3, <vscale x 4 x float> %z4, <vscale x 4 x float> %z5) #0 {
415 ; CHECK-LABEL: call_svfloatx4_t
416 ; CHECK: mov z3.d, z5.d
417 ; CHECK-NEXT: mov z0.d, z1.d
418 ; CHECK-NEXT: mov z1.d, z2.d
419 ; CHECK-NEXT: mov z2.d, z4.d
420 ; CHECK-NEXT: bl callee_svfloatx4_t
421 %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z4, <vscale x 4 x float> %z5)
422 call void @callee_svfloatx4_t(<vscale x 16 x float> %tuple)
430 define <vscale x 8 x double> @ret_svdoublex4_t(<vscale x 2 x double> %unused_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3, <vscale x 2 x double> %z4) #0 {
431 ; CHECK-LABEL: ret_svdoublex4_t
432 ; CHECK: mov z0.d, z1.d
433 ; CHECK-NEXT: mov z1.d, z2.d
434 ; CHECK-NEXT: mov z2.d, z3.d
435 ; CHECK-NEXT: mov z3.d, z4.d
437 %tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3, <vscale x 2 x double> %z4)
438 ret <vscale x 8 x double> %tuple
441 define void @call_svdoublex4_t(<vscale x 2 x double> %dummy_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %dummy_z3, <vscale x 2 x double> %z4, <vscale x 2 x double> %z5) #0 {
442 ; CHECK-LABEL: call_svdoublex4_t
443 ; CHECK: mov z3.d, z5.d
444 ; CHECK-NEXT: mov z0.d, z1.d
445 ; CHECK-NEXT: mov z1.d, z2.d
446 ; CHECK-NEXT: mov z2.d, z4.d
447 ; CHECK-NEXT: bl callee_svdoublex4_t
448 %tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z4, <vscale x 2 x double> %z5)
449 call void @callee_svdoublex4_t(<vscale x 8 x double> %tuple)
453 attributes #0 = { nounwind "target-features"="+sve" }
455 declare void @callee_svint8x2_t(<vscale x 32 x i8>)
456 declare void @callee_svint16x2_t(<vscale x 16 x i16>)
457 declare void @callee_svint32x2_t(<vscale x 8 x i32>)
458 declare void @callee_svint64x2_t(<vscale x 4 x i64>)
459 declare void @callee_svfloatx2_t(<vscale x 8 x float>)
460 declare void @callee_svdoublex2_t(<vscale x 4 x double>)
462 declare void @callee_svint8x3_t(<vscale x 48 x i8>)
463 declare void @callee_svint16x3_t(<vscale x 24 x i16>)
464 declare void @callee_svint32x3_t(<vscale x 12 x i32>)
465 declare void @callee_svint64x3_t(<vscale x 6 x i64>)
466 declare void @callee_svfloatx3_t(<vscale x 12 x float>)
467 declare void @callee_svdoublex3_t(<vscale x 6 x double>)
469 declare void @callee_svint8x4_t(<vscale x 64 x i8>)
470 declare void @callee_svint16x4_t(<vscale x 32 x i16>)
471 declare void @callee_svint32x4_t(<vscale x 16 x i32>)
472 declare void @callee_svint64x4_t(<vscale x 8 x i64>)
473 declare void @callee_svfloatx4_t(<vscale x 16 x float>)
474 declare void @callee_svdoublex4_t(<vscale x 8 x double>)
478 declare <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
479 declare <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
480 declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
481 declare <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
482 declare <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
483 declare <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
486 declare <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
487 declare <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
488 declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
489 declare <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
490 declare <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
491 declare <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
494 declare <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
495 declare <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
496 declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
497 declare <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
498 declare <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
499 declare <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)