1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs < %s | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
7 ; VECTOR_SPLICE (index)
10 define <vscale x 16 x i8> @splice_nxv16i8_zero_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
11 ; CHECK-LABEL: splice_nxv16i8_zero_idx:
14 %res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 0)
15 ret <vscale x 16 x i8> %res
18 define <vscale x 16 x i8> @splice_nxv16i8_first_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
19 ; CHECK-LABEL: splice_nxv16i8_first_idx:
21 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #1
23 %res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 1)
24 ret <vscale x 16 x i8> %res
27 define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(16,16) #0 {
28 ; CHECK-LABEL: splice_nxv16i8_last_idx:
30 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #255
32 %res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 255)
33 ret <vscale x 16 x i8> %res
36 define <vscale x 8 x i16> @splice_nxv8i16_first_idx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
37 ; CHECK-LABEL: splice_nxv8i16_first_idx:
39 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #2
41 %res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 1)
42 ret <vscale x 8 x i16> %res
45 define <vscale x 4 x i32> @splice_nxv4i32_first_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
46 ; CHECK-LABEL: splice_nxv4i32_first_idx:
48 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4
50 %res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 1)
51 ret <vscale x 4 x i32> %res
54 define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) vscale_range(16,16) #0 {
55 ; CHECK-LABEL: splice_nxv4i32_last_idx:
57 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #252
59 %res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 63)
60 ret <vscale x 4 x i32> %res
63 define <vscale x 2 x i64> @splice_nxv2i64_first_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
64 ; CHECK-LABEL: splice_nxv2i64_first_idx:
66 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
68 %res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 1)
69 ret <vscale x 2 x i64> %res
72 define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) vscale_range(16,16) #0 {
73 ; CHECK-LABEL: splice_nxv2i64_last_idx:
75 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248
77 %res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 31)
78 ret <vscale x 2 x i64> %res
81 define <vscale x 2 x half> @splice_nxv2f16_neg_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
82 ; CHECK-LABEL: splice_nxv2f16_neg_idx:
84 ; CHECK-NEXT: ptrue p0.d, vl1
85 ; CHECK-NEXT: rev p0.d, p0.d
86 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
88 %res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -1)
89 ret <vscale x 2 x half> %res
92 define <vscale x 2 x half> @splice_nxv2f16_neg2_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
93 ; CHECK-LABEL: splice_nxv2f16_neg2_idx:
95 ; CHECK-NEXT: ptrue p0.d, vl2
96 ; CHECK-NEXT: rev p0.d, p0.d
97 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
99 %res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -2)
100 ret <vscale x 2 x half> %res
103 define <vscale x 2 x half> @splice_nxv2f16_first_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
104 ; CHECK-LABEL: splice_nxv2f16_first_idx:
106 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
108 %res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 1)
109 ret <vscale x 2 x half> %res
112 define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) vscale_range(16,16) #0 {
113 ; CHECK-LABEL: splice_nxv2f16_last_idx:
115 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248
117 %res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 31)
118 ret <vscale x 2 x half> %res
121 define <vscale x 4 x half> @splice_nxv4f16_neg_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
122 ; CHECK-LABEL: splice_nxv4f16_neg_idx:
124 ; CHECK-NEXT: ptrue p0.s, vl1
125 ; CHECK-NEXT: rev p0.s, p0.s
126 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
128 %res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -1)
129 ret <vscale x 4 x half> %res
132 define <vscale x 4 x half> @splice_nxv4f16_neg3_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
133 ; CHECK-LABEL: splice_nxv4f16_neg3_idx:
135 ; CHECK-NEXT: ptrue p0.s, vl3
136 ; CHECK-NEXT: rev p0.s, p0.s
137 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
139 %res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -3)
140 ret <vscale x 4 x half> %res
143 define <vscale x 4 x half> @splice_nxv4f16_first_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
144 ; CHECK-LABEL: splice_nxv4f16_first_idx:
146 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4
148 %res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 1)
149 ret <vscale x 4 x half> %res
152 define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) vscale_range(16,16) #0 {
153 ; CHECK-LABEL: splice_nxv4f16_last_idx:
155 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #252
157 %res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 63)
158 ret <vscale x 4 x half> %res
161 define <vscale x 8 x half> @splice_nxv8f16_first_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
162 ; CHECK-LABEL: splice_nxv8f16_first_idx:
164 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #2
166 %res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 1)
167 ret <vscale x 8 x half> %res
170 define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) vscale_range(16,16) #0 {
171 ; CHECK-LABEL: splice_nxv8f16_last_idx:
173 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #254
175 %res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 127)
176 ret <vscale x 8 x half> %res
179 define <vscale x 2 x float> @splice_nxv2f32_neg_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
180 ; CHECK-LABEL: splice_nxv2f32_neg_idx:
182 ; CHECK-NEXT: ptrue p0.d, vl1
183 ; CHECK-NEXT: rev p0.d, p0.d
184 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
186 %res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 -1)
187 ret <vscale x 2 x float> %res
190 define <vscale x 2 x float> @splice_nxv2f32_neg2_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
191 ; CHECK-LABEL: splice_nxv2f32_neg2_idx:
193 ; CHECK-NEXT: ptrue p0.d, vl2
194 ; CHECK-NEXT: rev p0.d, p0.d
195 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
197 %res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 -2)
198 ret <vscale x 2 x float> %res
201 define <vscale x 2 x float> @splice_nxv2f32_first_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
202 ; CHECK-LABEL: splice_nxv2f32_first_idx:
204 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
206 %res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 1)
207 ret <vscale x 2 x float> %res
210 define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) vscale_range(16,16) #0 {
211 ; CHECK-LABEL: splice_nxv2f32_last_idx:
213 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248
215 %res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 31)
216 ret <vscale x 2 x float> %res
219 define <vscale x 4 x float> @splice_nxv4f32_first_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
220 ; CHECK-LABEL: splice_nxv4f32_first_idx:
222 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4
224 %res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 1)
225 ret <vscale x 4 x float> %res
228 define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) vscale_range(16,16) #0 {
229 ; CHECK-LABEL: splice_nxv4f32_last_idx:
231 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #252
233 %res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 63)
234 ret <vscale x 4 x float> %res
237 define <vscale x 2 x double> @splice_nxv2f64_first_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
238 ; CHECK-LABEL: splice_nxv2f64_first_idx:
240 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
242 %res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 1)
243 ret <vscale x 2 x double> %res
246 define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) vscale_range(16,16) #0 {
247 ; CHECK-LABEL: splice_nxv2f64_last_idx:
249 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248
251 %res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 31)
252 ret <vscale x 2 x double> %res
255 ; Ensure predicate based splice is promoted to use ZPRs.
256 define <vscale x 2 x i1> @splice_nxv2i1_idx(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) #0 {
257 ; CHECK-LABEL: splice_nxv2i1_idx:
259 ; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1
260 ; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1
261 ; CHECK-NEXT: ptrue p2.d
262 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
263 ; CHECK-NEXT: and z1.d, z1.d, #0x1
264 ; CHECK-NEXT: cmpne p0.d, p2/z, z1.d, #0
266 %res = call <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i32 1)
267 ret <vscale x 2 x i1> %res
270 ; Ensure predicate based splice is promoted to use ZPRs.
271 define <vscale x 4 x i1> @splice_nxv4i1_idx(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) #0 {
272 ; CHECK-LABEL: splice_nxv4i1_idx:
274 ; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
275 ; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
276 ; CHECK-NEXT: ptrue p2.s
277 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
278 ; CHECK-NEXT: and z1.s, z1.s, #0x1
279 ; CHECK-NEXT: cmpne p0.s, p2/z, z1.s, #0
281 %res = call <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i32 2)
282 ret <vscale x 4 x i1> %res
285 ; Ensure predicate based splice is promoted to use ZPRs.
286 define <vscale x 8 x i1> @splice_nxv8i1_idx(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) #0 {
287 ; CHECK-LABEL: splice_nxv8i1_idx:
289 ; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1
290 ; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1
291 ; CHECK-NEXT: ptrue p2.h
292 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
293 ; CHECK-NEXT: and z1.h, z1.h, #0x1
294 ; CHECK-NEXT: cmpne p0.h, p2/z, z1.h, #0
296 %res = call <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i32 4)
297 ret <vscale x 8 x i1> %res
300 ; Ensure predicate based splice is promoted to use ZPRs.
301 define <vscale x 16 x i1> @splice_nxv16i1_idx(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 {
302 ; CHECK-LABEL: splice_nxv16i1_idx:
304 ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
305 ; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
306 ; CHECK-NEXT: ptrue p2.b
307 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
308 ; CHECK-NEXT: and z1.b, z1.b, #0x1
309 ; CHECK-NEXT: cmpne p0.b, p2/z, z1.b, #0
311 %res = call <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i32 8)
312 ret <vscale x 16 x i1> %res
315 ; Verify promote type legalisation works as expected.
316 define <vscale x 2 x i8> @splice_nxv2i8_idx(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b) #0 {
317 ; CHECK-LABEL: splice_nxv2i8_idx:
319 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
321 %res = call <vscale x 2 x i8> @llvm.experimental.vector.splice.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, i32 1)
322 ret <vscale x 2 x i8> %res
325 ; Verify splitvec type legalisation works as expected.
326 define <vscale x 8 x i32> @splice_nxv8i32_idx(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) #0 {
327 ; CHECK-LABEL: splice_nxv8i32_idx:
329 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
330 ; CHECK-NEXT: addvl sp, sp, #-4
331 ; CHECK-NEXT: ptrue p0.s
332 ; CHECK-NEXT: mov x8, sp
333 ; CHECK-NEXT: orr x8, x8, #0x8
334 ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
335 ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
336 ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
337 ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
338 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
339 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl]
340 ; CHECK-NEXT: addvl sp, sp, #4
341 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
343 %res = call <vscale x 8 x i32> @llvm.experimental.vector.splice.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, i32 2)
344 ret <vscale x 8 x i32> %res
347 ; Verify splitvec type legalisation works as expected.
348 define <vscale x 16 x float> @splice_nxv16f32_16(<vscale x 16 x float> %a, <vscale x 16 x float> %b) vscale_range(2,16) #0 {
349 ; CHECK-LABEL: splice_nxv16f32_16:
351 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
352 ; CHECK-NEXT: addvl sp, sp, #-8
353 ; CHECK-NEXT: ptrue p0.s
354 ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff
355 ; CHECK-NEXT: mov w9, #16 // =0x10
356 ; CHECK-NEXT: addvl x8, x8, #1
357 ; CHECK-NEXT: cmp x8, #16
358 ; CHECK-NEXT: csel x8, x8, x9, lo
359 ; CHECK-NEXT: mov x9, sp
360 ; CHECK-NEXT: add x10, x9, x8, lsl #2
361 ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
362 ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
363 ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
364 ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
365 ; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl]
366 ; CHECK-NEXT: st1w { z4.s }, p0, [sp, #4, mul vl]
367 ; CHECK-NEXT: st1w { z5.s }, p0, [sp, #5, mul vl]
368 ; CHECK-NEXT: st1w { z6.s }, p0, [sp, #6, mul vl]
369 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
370 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl]
371 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x10, #2, mul vl]
372 ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x10, #3, mul vl]
373 ; CHECK-NEXT: addvl sp, sp, #8
374 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
376 %res = call <vscale x 16 x float> @llvm.experimental.vector.splice.nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b, i32 16)
377 ret <vscale x 16 x float> %res
381 ; VECTOR_SPLICE (trailing elements)
384 define <vscale x 16 x i8> @splice_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
385 ; CHECK-LABEL: splice_nxv16i8:
387 ; CHECK-NEXT: ptrue p0.b, vl16
388 ; CHECK-NEXT: rev p0.b, p0.b
389 ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
391 %res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -16)
392 ret <vscale x 16 x i8> %res
395 define <vscale x 16 x i8> @splice_nxv16i8_neg32(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(2,16) #0 {
396 ; CHECK-LABEL: splice_nxv16i8_neg32:
398 ; CHECK-NEXT: ptrue p0.b, vl32
399 ; CHECK-NEXT: rev p0.b, p0.b
400 ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
402 %res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -32)
403 ret <vscale x 16 x i8> %res
406 define <vscale x 16 x i8> @splice_nxv16i8_neg64(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(4,16) #0 {
407 ; CHECK-LABEL: splice_nxv16i8_neg64:
409 ; CHECK-NEXT: ptrue p0.b, vl64
410 ; CHECK-NEXT: rev p0.b, p0.b
411 ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
413 %res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -64)
414 ret <vscale x 16 x i8> %res
417 define <vscale x 16 x i8> @splice_nxv16i8_neg128(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(8,16) #0 {
418 ; CHECK-LABEL: splice_nxv16i8_neg128:
420 ; CHECK-NEXT: ptrue p0.b, vl128
421 ; CHECK-NEXT: rev p0.b, p0.b
422 ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
424 %res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -128)
425 ret <vscale x 16 x i8> %res
428 define <vscale x 16 x i8> @splice_nxv16i8_neg256(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(16,16) #0 {
429 ; CHECK-LABEL: splice_nxv16i8_neg256:
431 ; CHECK-NEXT: ptrue p0.b, vl256
432 ; CHECK-NEXT: rev p0.b, p0.b
433 ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
435 %res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -256)
436 ret <vscale x 16 x i8> %res
439 define <vscale x 16 x i8> @splice_nxv16i8_1(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
440 ; CHECK-LABEL: splice_nxv16i8_1:
442 ; CHECK-NEXT: ptrue p0.b, vl1
443 ; CHECK-NEXT: rev p0.b, p0.b
444 ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
446 %res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -1)
447 ret <vscale x 16 x i8> %res
450 define <vscale x 16 x i8> @splice_nxv16i8_neg17(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(2,16) #0 {
451 ; CHECK-LABEL: splice_nxv16i8_neg17:
453 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
454 ; CHECK-NEXT: addvl sp, sp, #-2
455 ; CHECK-NEXT: ptrue p0.b
456 ; CHECK-NEXT: rdvl x8, #1
457 ; CHECK-NEXT: mov w9, #17 // =0x11
458 ; CHECK-NEXT: mov x10, sp
459 ; CHECK-NEXT: cmp x8, #17
460 ; CHECK-NEXT: addvl x10, x10, #1
461 ; CHECK-NEXT: csel x8, x8, x9, lo
462 ; CHECK-NEXT: sub x8, x10, x8
463 ; CHECK-NEXT: st1b { z0.b }, p0, [sp]
464 ; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
465 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8]
466 ; CHECK-NEXT: addvl sp, sp, #2
467 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
469 %res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -17)
470 ret <vscale x 16 x i8> %res
473 define <vscale x 8 x i16> @splice_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
474 ; CHECK-LABEL: splice_nxv8i16:
476 ; CHECK-NEXT: ptrue p0.h, vl8
477 ; CHECK-NEXT: rev p0.h, p0.h
478 ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
480 %res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -8)
481 ret <vscale x 8 x i16> %res
484 define <vscale x 8 x i16> @splice_nxv8i16_1(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
485 ; CHECK-LABEL: splice_nxv8i16_1:
487 ; CHECK-NEXT: ptrue p0.h, vl1
488 ; CHECK-NEXT: rev p0.h, p0.h
489 ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
491 %res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -1)
492 ret <vscale x 8 x i16> %res
495 define <vscale x 8 x i16> @splice_nxv8i16_neg9(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) vscale_range(2,16) #0 {
496 ; CHECK-LABEL: splice_nxv8i16_neg9:
498 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
499 ; CHECK-NEXT: addvl sp, sp, #-2
500 ; CHECK-NEXT: ptrue p0.h
501 ; CHECK-NEXT: rdvl x8, #1
502 ; CHECK-NEXT: mov w9, #18 // =0x12
503 ; CHECK-NEXT: mov x10, sp
504 ; CHECK-NEXT: cmp x8, #18
505 ; CHECK-NEXT: addvl x10, x10, #1
506 ; CHECK-NEXT: csel x8, x8, x9, lo
507 ; CHECK-NEXT: sub x8, x10, x8
508 ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
509 ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
510 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
511 ; CHECK-NEXT: addvl sp, sp, #2
512 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
514 %res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -9)
515 ret <vscale x 8 x i16> %res
518 define <vscale x 4 x i32> @splice_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
519 ; CHECK-LABEL: splice_nxv4i32:
521 ; CHECK-NEXT: ptrue p0.s, vl4
522 ; CHECK-NEXT: rev p0.s, p0.s
523 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
525 %res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -4)
526 ret <vscale x 4 x i32> %res
529 define <vscale x 4 x i32> @splice_nxv4i32_1(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
530 ; CHECK-LABEL: splice_nxv4i32_1:
532 ; CHECK-NEXT: ptrue p0.s, vl1
533 ; CHECK-NEXT: rev p0.s, p0.s
534 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
536 %res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -1)
537 ret <vscale x 4 x i32> %res
540 define <vscale x 4 x i32> @splice_nxv4i32_neg5(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) vscale_range(2,16) #0 {
541 ; CHECK-LABEL: splice_nxv4i32_neg5:
543 ; CHECK-NEXT: ptrue p0.s, vl5
544 ; CHECK-NEXT: rev p0.s, p0.s
545 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
547 %res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -5)
548 ret <vscale x 4 x i32> %res
551 define <vscale x 2 x i64> @splice_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
552 ; CHECK-LABEL: splice_nxv2i64:
554 ; CHECK-NEXT: ptrue p0.d, vl2
555 ; CHECK-NEXT: rev p0.d, p0.d
556 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
558 %res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -2)
559 ret <vscale x 2 x i64> %res
562 define <vscale x 2 x i64> @splice_nxv2i64_1(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
563 ; CHECK-LABEL: splice_nxv2i64_1:
565 ; CHECK-NEXT: ptrue p0.d, vl1
566 ; CHECK-NEXT: rev p0.d, p0.d
567 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
569 %res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -1)
570 ret <vscale x 2 x i64> %res
573 define <vscale x 2 x i64> @splice_nxv2i64_neg3(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) vscale_range(2,16) #0 {
574 ; CHECK-LABEL: splice_nxv2i64_neg3:
576 ; CHECK-NEXT: ptrue p0.d, vl3
577 ; CHECK-NEXT: rev p0.d, p0.d
578 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
580 %res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -3)
581 ret <vscale x 2 x i64> %res
584 define <vscale x 8 x half> @splice_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
585 ; CHECK-LABEL: splice_nxv8f16:
587 ; CHECK-NEXT: ptrue p0.h, vl8
588 ; CHECK-NEXT: rev p0.h, p0.h
589 ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
591 %res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -8)
592 ret <vscale x 8 x half> %res
595 define <vscale x 8 x half> @splice_nxv8f16_1(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
596 ; CHECK-LABEL: splice_nxv8f16_1:
598 ; CHECK-NEXT: ptrue p0.h, vl1
599 ; CHECK-NEXT: rev p0.h, p0.h
600 ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
602 %res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -1)
603 ret <vscale x 8 x half> %res
606 define <vscale x 8 x half> @splice_nxv8f16_neg9(<vscale x 8 x half> %a, <vscale x 8 x half> %b) vscale_range(2,16) #0 {
607 ; CHECK-LABEL: splice_nxv8f16_neg9:
609 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
610 ; CHECK-NEXT: addvl sp, sp, #-2
611 ; CHECK-NEXT: ptrue p0.h
612 ; CHECK-NEXT: rdvl x8, #1
613 ; CHECK-NEXT: mov w9, #18 // =0x12
614 ; CHECK-NEXT: mov x10, sp
615 ; CHECK-NEXT: cmp x8, #18
616 ; CHECK-NEXT: addvl x10, x10, #1
617 ; CHECK-NEXT: csel x8, x8, x9, lo
618 ; CHECK-NEXT: sub x8, x10, x8
619 ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
620 ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
621 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
622 ; CHECK-NEXT: addvl sp, sp, #2
623 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
625 %res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -9)
626 ret <vscale x 8 x half> %res
629 define <vscale x 4 x float> @splice_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
630 ; CHECK-LABEL: splice_nxv4f32:
632 ; CHECK-NEXT: ptrue p0.s, vl4
633 ; CHECK-NEXT: rev p0.s, p0.s
634 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
636 %res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -4)
637 ret <vscale x 4 x float> %res
640 define <vscale x 4 x float> @splice_nxv4f32_1(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
641 ; CHECK-LABEL: splice_nxv4f32_1:
643 ; CHECK-NEXT: ptrue p0.s, vl1
644 ; CHECK-NEXT: rev p0.s, p0.s
645 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
647 %res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -1)
648 ret <vscale x 4 x float> %res
651 define <vscale x 4 x float> @splice_nxv4f32_neg5(<vscale x 4 x float> %a, <vscale x 4 x float> %b) vscale_range(2,16) #0 {
652 ; CHECK-LABEL: splice_nxv4f32_neg5:
654 ; CHECK-NEXT: ptrue p0.s, vl5
655 ; CHECK-NEXT: rev p0.s, p0.s
656 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
658 %res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -5)
659 ret <vscale x 4 x float> %res
662 define <vscale x 2 x double> @splice_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
663 ; CHECK-LABEL: splice_nxv2f64:
665 ; CHECK-NEXT: ptrue p0.d, vl2
666 ; CHECK-NEXT: rev p0.d, p0.d
667 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
669 %res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -2)
670 ret <vscale x 2 x double> %res
673 define <vscale x 2 x double> @splice_nxv2f64_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
674 ; CHECK-LABEL: splice_nxv2f64_1:
676 ; CHECK-NEXT: ptrue p0.d, vl1
677 ; CHECK-NEXT: rev p0.d, p0.d
678 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
680 %res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -1)
681 ret <vscale x 2 x double> %res
684 define <vscale x 2 x double> @splice_nxv2f64_neg3(<vscale x 2 x double> %a, <vscale x 2 x double> %b) vscale_range(2,16) #0 {
685 ; CHECK-LABEL: splice_nxv2f64_neg3:
687 ; CHECK-NEXT: ptrue p0.d, vl3
688 ; CHECK-NEXT: rev p0.d, p0.d
689 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
691 %res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -3)
692 ret <vscale x 2 x double> %res
695 ; Ensure predicate based splice is promoted to use ZPRs.
696 define <vscale x 2 x i1> @splice_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) #0 {
697 ; CHECK-LABEL: splice_nxv2i1:
699 ; CHECK-NEXT: ptrue p2.d, vl1
700 ; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1
701 ; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1
702 ; CHECK-NEXT: ptrue p0.d
703 ; CHECK-NEXT: rev p2.d, p2.d
704 ; CHECK-NEXT: splice z1.d, p2, z1.d, z0.d
705 ; CHECK-NEXT: and z1.d, z1.d, #0x1
706 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
708 %res = call <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i32 -1)
709 ret <vscale x 2 x i1> %res
712 ; Ensure predicate based splice is promoted to use ZPRs.
713 define <vscale x 4 x i1> @splice_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) #0 {
714 ; CHECK-LABEL: splice_nxv4i1:
716 ; CHECK-NEXT: ptrue p2.s, vl1
717 ; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
718 ; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
719 ; CHECK-NEXT: ptrue p0.s
720 ; CHECK-NEXT: rev p2.s, p2.s
721 ; CHECK-NEXT: splice z1.s, p2, z1.s, z0.s
722 ; CHECK-NEXT: and z1.s, z1.s, #0x1
723 ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
725 %res = call <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i32 -1)
726 ret <vscale x 4 x i1> %res
729 ; Ensure predicate based splice is promoted to use ZPRs.
730 define <vscale x 8 x i1> @splice_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) #0 {
731 ; CHECK-LABEL: splice_nxv8i1:
733 ; CHECK-NEXT: ptrue p2.h, vl1
734 ; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1
735 ; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1
736 ; CHECK-NEXT: ptrue p0.h
737 ; CHECK-NEXT: rev p2.h, p2.h
738 ; CHECK-NEXT: splice z1.h, p2, z1.h, z0.h
739 ; CHECK-NEXT: and z1.h, z1.h, #0x1
740 ; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
742 %res = call <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i32 -1)
743 ret <vscale x 8 x i1> %res
746 ; Ensure predicate based splice is promoted to use ZPRs.
747 define <vscale x 16 x i1> @splice_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 {
748 ; CHECK-LABEL: splice_nxv16i1:
750 ; CHECK-NEXT: ptrue p2.b, vl1
751 ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
752 ; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
753 ; CHECK-NEXT: ptrue p0.b
754 ; CHECK-NEXT: rev p2.b, p2.b
755 ; CHECK-NEXT: splice z1.b, p2, z1.b, z0.b
756 ; CHECK-NEXT: and z1.b, z1.b, #0x1
757 ; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
759 %res = call <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i32 -1)
760 ret <vscale x 16 x i1> %res
763 ; Verify promote type legalisation works as expected.
764 define <vscale x 2 x i8> @splice_nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b) #0 {
765 ; CHECK-LABEL: splice_nxv2i8:
767 ; CHECK-NEXT: ptrue p0.d, vl2
768 ; CHECK-NEXT: rev p0.d, p0.d
769 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
771 %res = call <vscale x 2 x i8> @llvm.experimental.vector.splice.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, i32 -2)
772 ret <vscale x 2 x i8> %res
775 ; Verify splitvec type legalisation works as expected.
776 define <vscale x 8 x i32> @splice_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) #0 {
777 ; CHECK-LABEL: splice_nxv8i32:
779 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
780 ; CHECK-NEXT: addvl sp, sp, #-4
781 ; CHECK-NEXT: ptrue p0.s
782 ; CHECK-NEXT: mov x8, sp
783 ; CHECK-NEXT: mov x9, #-8 // =0xfffffffffffffff8
784 ; CHECK-NEXT: addvl x8, x8, #2
785 ; CHECK-NEXT: sub x10, x8, #32
786 ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
787 ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
788 ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
789 ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
790 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
791 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl]
792 ; CHECK-NEXT: addvl sp, sp, #4
793 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
795 %res = call <vscale x 8 x i32> @llvm.experimental.vector.splice.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, i32 -8)
796 ret <vscale x 8 x i32> %res
799 ; Verify splitvec type legalisation works as expected.
800 define <vscale x 16 x float> @splice_nxv16f32_neg17(<vscale x 16 x float> %a, <vscale x 16 x float> %b) vscale_range(2,16) #0 {
801 ; CHECK-LABEL: splice_nxv16f32_neg17:
803 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
804 ; CHECK-NEXT: addvl sp, sp, #-8
805 ; CHECK-NEXT: ptrue p0.s
806 ; CHECK-NEXT: rdvl x8, #4
807 ; CHECK-NEXT: mov w9, #68 // =0x44
808 ; CHECK-NEXT: mov x10, sp
809 ; CHECK-NEXT: cmp x8, #68
810 ; CHECK-NEXT: csel x8, x8, x9, lo
811 ; CHECK-NEXT: addvl x9, x10, #4
812 ; CHECK-NEXT: sub x8, x9, x8
813 ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
814 ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
815 ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
816 ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
817 ; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl]
818 ; CHECK-NEXT: st1w { z4.s }, p0, [sp, #4, mul vl]
819 ; CHECK-NEXT: st1w { z5.s }, p0, [sp, #5, mul vl]
820 ; CHECK-NEXT: st1w { z6.s }, p0, [sp, #6, mul vl]
821 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
822 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl]
823 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x8, #2, mul vl]
824 ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x8, #3, mul vl]
825 ; CHECK-NEXT: addvl sp, sp, #8
826 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
828 %res = call <vscale x 16 x float> @llvm.experimental.vector.splice.nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b, i32 -17)
829 ret <vscale x 16 x float> %res
832 declare <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>, i32)
833 declare <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>, i32)
834 declare <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>, i32)
835 declare <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, i32)
836 declare <vscale x 2 x i8> @llvm.experimental.vector.splice.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, i32)
837 declare <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
838 declare <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
839 declare <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
840 declare <vscale x 8 x i32> @llvm.experimental.vector.splice.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, i32)
841 declare <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
842 declare <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, i32)
843 declare <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, i32)
844 declare <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
845 declare <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i32)
846 declare <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
847 declare <vscale x 16 x float> @llvm.experimental.vector.splice.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, i32)
848 declare <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
850 attributes #0 = { nounwind "target-features"="+sve" }