1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs < %s | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
7 ; VECTOR_SPLICE (index)
10 define <vscale x 16 x i8> @splice_nxv16i8_zero_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
11 ; CHECK-LABEL: splice_nxv16i8_zero_idx:
14 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 0)
15 ret <vscale x 16 x i8> %res
18 define <vscale x 16 x i8> @splice_nxv16i8_first_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
19 ; CHECK-LABEL: splice_nxv16i8_first_idx:
21 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #1
23 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 1)
24 ret <vscale x 16 x i8> %res
27 define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(16,16) #0 {
28 ; CHECK-LABEL: splice_nxv16i8_last_idx:
30 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #255
32 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 255)
33 ret <vscale x 16 x i8> %res
36 define <vscale x 8 x i16> @splice_nxv8i16_first_idx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
37 ; CHECK-LABEL: splice_nxv8i16_first_idx:
39 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #2
41 %res = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 1)
42 ret <vscale x 8 x i16> %res
45 define <vscale x 4 x i32> @splice_nxv4i32_first_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
46 ; CHECK-LABEL: splice_nxv4i32_first_idx:
48 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4
50 %res = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 1)
51 ret <vscale x 4 x i32> %res
54 define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) vscale_range(16,16) #0 {
55 ; CHECK-LABEL: splice_nxv4i32_last_idx:
57 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #252
59 %res = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 63)
60 ret <vscale x 4 x i32> %res
63 define <vscale x 2 x i64> @splice_nxv2i64_first_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
64 ; CHECK-LABEL: splice_nxv2i64_first_idx:
66 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
68 %res = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 1)
69 ret <vscale x 2 x i64> %res
72 define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) vscale_range(16,16) #0 {
73 ; CHECK-LABEL: splice_nxv2i64_last_idx:
75 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248
77 %res = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 31)
78 ret <vscale x 2 x i64> %res
81 define <vscale x 2 x half> @splice_nxv2f16_neg_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
82 ; CHECK-LABEL: splice_nxv2f16_neg_idx:
84 ; CHECK-NEXT: ptrue p0.d, vl1
85 ; CHECK-NEXT: rev p0.d, p0.d
86 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
88 %res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -1)
89 ret <vscale x 2 x half> %res
92 define <vscale x 2 x half> @splice_nxv2f16_neg2_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
93 ; CHECK-LABEL: splice_nxv2f16_neg2_idx:
95 ; CHECK-NEXT: ptrue p0.d, vl2
96 ; CHECK-NEXT: rev p0.d, p0.d
97 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
99 %res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -2)
100 ret <vscale x 2 x half> %res
103 define <vscale x 2 x half> @splice_nxv2f16_first_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
104 ; CHECK-LABEL: splice_nxv2f16_first_idx:
106 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
108 %res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 1)
109 ret <vscale x 2 x half> %res
112 define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) vscale_range(16,16) #0 {
113 ; CHECK-LABEL: splice_nxv2f16_last_idx:
115 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248
117 %res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 31)
118 ret <vscale x 2 x half> %res
121 define <vscale x 4 x half> @splice_nxv4f16_neg_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
122 ; CHECK-LABEL: splice_nxv4f16_neg_idx:
124 ; CHECK-NEXT: ptrue p0.s, vl1
125 ; CHECK-NEXT: rev p0.s, p0.s
126 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
128 %res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -1)
129 ret <vscale x 4 x half> %res
132 define <vscale x 4 x half> @splice_nxv4f16_neg3_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
133 ; CHECK-LABEL: splice_nxv4f16_neg3_idx:
135 ; CHECK-NEXT: ptrue p0.s, vl3
136 ; CHECK-NEXT: rev p0.s, p0.s
137 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
139 %res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -3)
140 ret <vscale x 4 x half> %res
143 define <vscale x 4 x half> @splice_nxv4f16_first_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
144 ; CHECK-LABEL: splice_nxv4f16_first_idx:
146 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4
148 %res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 1)
149 ret <vscale x 4 x half> %res
152 define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) vscale_range(16,16) #0 {
153 ; CHECK-LABEL: splice_nxv4f16_last_idx:
155 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #252
157 %res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 63)
158 ret <vscale x 4 x half> %res
161 define <vscale x 8 x half> @splice_nxv8f16_first_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
162 ; CHECK-LABEL: splice_nxv8f16_first_idx:
164 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #2
166 %res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 1)
167 ret <vscale x 8 x half> %res
170 define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) vscale_range(16,16) #0 {
171 ; CHECK-LABEL: splice_nxv8f16_last_idx:
173 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #254
175 %res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 127)
176 ret <vscale x 8 x half> %res
179 define <vscale x 2 x float> @splice_nxv2f32_neg_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
180 ; CHECK-LABEL: splice_nxv2f32_neg_idx:
182 ; CHECK-NEXT: ptrue p0.d, vl1
183 ; CHECK-NEXT: rev p0.d, p0.d
184 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
186 %res = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 -1)
187 ret <vscale x 2 x float> %res
190 define <vscale x 2 x float> @splice_nxv2f32_neg2_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
191 ; CHECK-LABEL: splice_nxv2f32_neg2_idx:
193 ; CHECK-NEXT: ptrue p0.d, vl2
194 ; CHECK-NEXT: rev p0.d, p0.d
195 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
197 %res = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 -2)
198 ret <vscale x 2 x float> %res
201 define <vscale x 2 x float> @splice_nxv2f32_first_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
202 ; CHECK-LABEL: splice_nxv2f32_first_idx:
204 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
206 %res = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 1)
207 ret <vscale x 2 x float> %res
210 define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) vscale_range(16,16) #0 {
211 ; CHECK-LABEL: splice_nxv2f32_last_idx:
213 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248
215 %res = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 31)
216 ret <vscale x 2 x float> %res
219 define <vscale x 4 x float> @splice_nxv4f32_first_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
220 ; CHECK-LABEL: splice_nxv4f32_first_idx:
222 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4
224 %res = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 1)
225 ret <vscale x 4 x float> %res
228 define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) vscale_range(16,16) #0 {
229 ; CHECK-LABEL: splice_nxv4f32_last_idx:
231 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #252
233 %res = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 63)
234 ret <vscale x 4 x float> %res
237 define <vscale x 2 x double> @splice_nxv2f64_first_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
238 ; CHECK-LABEL: splice_nxv2f64_first_idx:
240 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
242 %res = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 1)
243 ret <vscale x 2 x double> %res
246 define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) vscale_range(16,16) #0 {
247 ; CHECK-LABEL: splice_nxv2f64_last_idx:
249 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248
251 %res = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 31)
252 ret <vscale x 2 x double> %res
255 ; Ensure predicate based splice is promoted to use ZPRs.
256 define <vscale x 2 x i1> @splice_nxv2i1_idx(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) #0 {
257 ; CHECK-LABEL: splice_nxv2i1_idx:
259 ; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1
260 ; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1
261 ; CHECK-NEXT: ptrue p0.d
262 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
263 ; CHECK-NEXT: and z1.d, z1.d, #0x1
264 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
266 %res = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i32 1)
267 ret <vscale x 2 x i1> %res
270 ; Ensure predicate based splice is promoted to use ZPRs.
271 define <vscale x 4 x i1> @splice_nxv4i1_idx(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) #0 {
272 ; CHECK-LABEL: splice_nxv4i1_idx:
274 ; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
275 ; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
276 ; CHECK-NEXT: ptrue p0.s
277 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
278 ; CHECK-NEXT: and z1.s, z1.s, #0x1
279 ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
281 %res = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i32 2)
282 ret <vscale x 4 x i1> %res
285 ; Ensure predicate based splice is promoted to use ZPRs.
286 define <vscale x 8 x i1> @splice_nxv8i1_idx(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) #0 {
287 ; CHECK-LABEL: splice_nxv8i1_idx:
289 ; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1
290 ; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1
291 ; CHECK-NEXT: ptrue p0.h
292 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
293 ; CHECK-NEXT: and z1.h, z1.h, #0x1
294 ; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
296 %res = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i32 4)
297 ret <vscale x 8 x i1> %res
300 ; Ensure predicate based splice is promoted to use ZPRs.
301 define <vscale x 16 x i1> @splice_nxv16i1_idx(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 {
302 ; CHECK-LABEL: splice_nxv16i1_idx:
304 ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
305 ; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
306 ; CHECK-NEXT: ptrue p0.b
307 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
308 ; CHECK-NEXT: and z1.b, z1.b, #0x1
309 ; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
311 %res = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i32 8)
312 ret <vscale x 16 x i1> %res
315 ; Verify promote type legalisation works as expected.
316 define <vscale x 2 x i8> @splice_nxv2i8_idx(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b) #0 {
317 ; CHECK-LABEL: splice_nxv2i8_idx:
319 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
321 %res = call <vscale x 2 x i8> @llvm.vector.splice.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, i32 1)
322 ret <vscale x 2 x i8> %res
325 ; Verify splitvec type legalisation works as expected.
326 define <vscale x 8 x i32> @splice_nxv8i32_idx(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) #0 {
327 ; CHECK-LABEL: splice_nxv8i32_idx:
329 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
330 ; CHECK-NEXT: addvl sp, sp, #-4
331 ; CHECK-NEXT: ptrue p0.s
332 ; CHECK-NEXT: mov x8, sp
333 ; CHECK-NEXT: orr x8, x8, #0x8
334 ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
335 ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
336 ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
337 ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
338 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
339 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl]
340 ; CHECK-NEXT: addvl sp, sp, #4
341 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
343 %res = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, i32 2)
344 ret <vscale x 8 x i32> %res
347 ; Verify splitvec type legalisation works as expected.
348 define <vscale x 16 x float> @splice_nxv16f32_16(<vscale x 16 x float> %a, <vscale x 16 x float> %b) vscale_range(2,16) #0 {
349 ; CHECK-LABEL: splice_nxv16f32_16:
351 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
352 ; CHECK-NEXT: addvl sp, sp, #-8
353 ; CHECK-NEXT: rdvl x8, #1
354 ; CHECK-NEXT: mov w9, #16 // =0x10
355 ; CHECK-NEXT: ptrue p0.s
356 ; CHECK-NEXT: sub x8, x8, #1
357 ; CHECK-NEXT: cmp x8, #16
358 ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
359 ; CHECK-NEXT: csel x8, x8, x9, lo
360 ; CHECK-NEXT: mov x9, sp
361 ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
362 ; CHECK-NEXT: add x10, x9, x8, lsl #2
363 ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
364 ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
365 ; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl]
366 ; CHECK-NEXT: st1w { z4.s }, p0, [sp, #4, mul vl]
367 ; CHECK-NEXT: st1w { z5.s }, p0, [sp, #5, mul vl]
368 ; CHECK-NEXT: st1w { z6.s }, p0, [sp, #6, mul vl]
369 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
370 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl]
371 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x10, #2, mul vl]
372 ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x10, #3, mul vl]
373 ; CHECK-NEXT: addvl sp, sp, #8
374 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
376 %res = call <vscale x 16 x float> @llvm.vector.splice.nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b, i32 16)
377 ret <vscale x 16 x float> %res
381 ; VECTOR_SPLICE (trailing elements)
384 define <vscale x 16 x i8> @splice_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
385 ; CHECK-LABEL: splice_nxv16i8:
387 ; CHECK-NEXT: ptrue p0.b, vl16
388 ; CHECK-NEXT: rev p0.b, p0.b
389 ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
391 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -16)
392 ret <vscale x 16 x i8> %res
395 define <vscale x 16 x i8> @splice_nxv16i8_neg32(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(2,16) #0 {
396 ; CHECK-LABEL: splice_nxv16i8_neg32:
398 ; CHECK-NEXT: ptrue p0.b, vl32
399 ; CHECK-NEXT: rev p0.b, p0.b
400 ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
402 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -32)
403 ret <vscale x 16 x i8> %res
406 define <vscale x 16 x i8> @splice_nxv16i8_neg64(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(4,16) #0 {
407 ; CHECK-LABEL: splice_nxv16i8_neg64:
409 ; CHECK-NEXT: ptrue p0.b, vl64
410 ; CHECK-NEXT: rev p0.b, p0.b
411 ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
413 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -64)
414 ret <vscale x 16 x i8> %res
417 define <vscale x 16 x i8> @splice_nxv16i8_neg128(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(8,16) #0 {
418 ; CHECK-LABEL: splice_nxv16i8_neg128:
420 ; CHECK-NEXT: ptrue p0.b, vl128
421 ; CHECK-NEXT: rev p0.b, p0.b
422 ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
424 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -128)
425 ret <vscale x 16 x i8> %res
428 define <vscale x 16 x i8> @splice_nxv16i8_neg256(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(16,16) #0 {
429 ; CHECK-LABEL: splice_nxv16i8_neg256:
431 ; CHECK-NEXT: ptrue p0.b, vl256
432 ; CHECK-NEXT: rev p0.b, p0.b
433 ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
435 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -256)
436 ret <vscale x 16 x i8> %res
439 define <vscale x 16 x i8> @splice_nxv16i8_1(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
440 ; CHECK-LABEL: splice_nxv16i8_1:
442 ; CHECK-NEXT: ptrue p0.b, vl1
443 ; CHECK-NEXT: rev p0.b, p0.b
444 ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
446 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -1)
447 ret <vscale x 16 x i8> %res
450 define <vscale x 16 x i8> @splice_nxv16i8_neg17(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(2,16) #0 {
451 ; CHECK-LABEL: splice_nxv16i8_neg17:
453 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
454 ; CHECK-NEXT: addvl sp, sp, #-2
455 ; CHECK-NEXT: rdvl x8, #1
456 ; CHECK-NEXT: ptrue p0.b
457 ; CHECK-NEXT: mov w9, #17 // =0x11
458 ; CHECK-NEXT: cmp x8, #17
459 ; CHECK-NEXT: mov x10, sp
460 ; CHECK-NEXT: csel x9, x8, x9, lo
461 ; CHECK-NEXT: add x8, x10, x8
462 ; CHECK-NEXT: st1b { z0.b }, p0, [sp]
463 ; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
464 ; CHECK-NEXT: sub x8, x8, x9
465 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8]
466 ; CHECK-NEXT: addvl sp, sp, #2
467 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
469 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -17)
470 ret <vscale x 16 x i8> %res
473 define <vscale x 8 x i16> @splice_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
474 ; CHECK-LABEL: splice_nxv8i16:
476 ; CHECK-NEXT: ptrue p0.h, vl8
477 ; CHECK-NEXT: rev p0.h, p0.h
478 ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
480 %res = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -8)
481 ret <vscale x 8 x i16> %res
484 define <vscale x 8 x i16> @splice_nxv8i16_1(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
485 ; CHECK-LABEL: splice_nxv8i16_1:
487 ; CHECK-NEXT: ptrue p0.h, vl1
488 ; CHECK-NEXT: rev p0.h, p0.h
489 ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
491 %res = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -1)
492 ret <vscale x 8 x i16> %res
495 define <vscale x 8 x i16> @splice_nxv8i16_neg9(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) vscale_range(2,16) #0 {
496 ; CHECK-LABEL: splice_nxv8i16_neg9:
498 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
499 ; CHECK-NEXT: addvl sp, sp, #-2
500 ; CHECK-NEXT: rdvl x8, #1
501 ; CHECK-NEXT: ptrue p0.h
502 ; CHECK-NEXT: mov w9, #18 // =0x12
503 ; CHECK-NEXT: cmp x8, #18
504 ; CHECK-NEXT: mov x10, sp
505 ; CHECK-NEXT: csel x9, x8, x9, lo
506 ; CHECK-NEXT: add x8, x10, x8
507 ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
508 ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
509 ; CHECK-NEXT: sub x8, x8, x9
510 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
511 ; CHECK-NEXT: addvl sp, sp, #2
512 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
514 %res = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -9)
515 ret <vscale x 8 x i16> %res
518 define <vscale x 4 x i32> @splice_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
519 ; CHECK-LABEL: splice_nxv4i32:
521 ; CHECK-NEXT: ptrue p0.s, vl4
522 ; CHECK-NEXT: rev p0.s, p0.s
523 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
525 %res = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -4)
526 ret <vscale x 4 x i32> %res
529 define <vscale x 4 x i32> @splice_nxv4i32_1(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
530 ; CHECK-LABEL: splice_nxv4i32_1:
532 ; CHECK-NEXT: ptrue p0.s, vl1
533 ; CHECK-NEXT: rev p0.s, p0.s
534 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
536 %res = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -1)
537 ret <vscale x 4 x i32> %res
540 define <vscale x 4 x i32> @splice_nxv4i32_neg5(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) vscale_range(2,16) #0 {
541 ; CHECK-LABEL: splice_nxv4i32_neg5:
543 ; CHECK-NEXT: ptrue p0.s, vl5
544 ; CHECK-NEXT: rev p0.s, p0.s
545 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
547 %res = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -5)
548 ret <vscale x 4 x i32> %res
551 define <vscale x 2 x i64> @splice_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
552 ; CHECK-LABEL: splice_nxv2i64:
554 ; CHECK-NEXT: ptrue p0.d, vl2
555 ; CHECK-NEXT: rev p0.d, p0.d
556 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
558 %res = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -2)
559 ret <vscale x 2 x i64> %res
562 define <vscale x 2 x i64> @splice_nxv2i64_1(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
563 ; CHECK-LABEL: splice_nxv2i64_1:
565 ; CHECK-NEXT: ptrue p0.d, vl1
566 ; CHECK-NEXT: rev p0.d, p0.d
567 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
569 %res = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -1)
570 ret <vscale x 2 x i64> %res
573 define <vscale x 2 x i64> @splice_nxv2i64_neg3(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) vscale_range(2,16) #0 {
574 ; CHECK-LABEL: splice_nxv2i64_neg3:
576 ; CHECK-NEXT: ptrue p0.d, vl3
577 ; CHECK-NEXT: rev p0.d, p0.d
578 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
580 %res = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -3)
581 ret <vscale x 2 x i64> %res
584 define <vscale x 8 x half> @splice_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
585 ; CHECK-LABEL: splice_nxv8f16:
587 ; CHECK-NEXT: ptrue p0.h, vl8
588 ; CHECK-NEXT: rev p0.h, p0.h
589 ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
591 %res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -8)
592 ret <vscale x 8 x half> %res
595 define <vscale x 8 x half> @splice_nxv8f16_1(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
596 ; CHECK-LABEL: splice_nxv8f16_1:
598 ; CHECK-NEXT: ptrue p0.h, vl1
599 ; CHECK-NEXT: rev p0.h, p0.h
600 ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
602 %res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -1)
603 ret <vscale x 8 x half> %res
606 define <vscale x 8 x half> @splice_nxv8f16_neg9(<vscale x 8 x half> %a, <vscale x 8 x half> %b) vscale_range(2,16) #0 {
607 ; CHECK-LABEL: splice_nxv8f16_neg9:
609 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
610 ; CHECK-NEXT: addvl sp, sp, #-2
611 ; CHECK-NEXT: rdvl x8, #1
612 ; CHECK-NEXT: ptrue p0.h
613 ; CHECK-NEXT: mov w9, #18 // =0x12
614 ; CHECK-NEXT: cmp x8, #18
615 ; CHECK-NEXT: mov x10, sp
616 ; CHECK-NEXT: csel x9, x8, x9, lo
617 ; CHECK-NEXT: add x8, x10, x8
618 ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
619 ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
620 ; CHECK-NEXT: sub x8, x8, x9
621 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
622 ; CHECK-NEXT: addvl sp, sp, #2
623 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
625 %res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -9)
626 ret <vscale x 8 x half> %res
629 define <vscale x 4 x float> @splice_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
630 ; CHECK-LABEL: splice_nxv4f32:
632 ; CHECK-NEXT: ptrue p0.s, vl4
633 ; CHECK-NEXT: rev p0.s, p0.s
634 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
636 %res = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -4)
637 ret <vscale x 4 x float> %res
640 define <vscale x 4 x float> @splice_nxv4f32_1(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
641 ; CHECK-LABEL: splice_nxv4f32_1:
643 ; CHECK-NEXT: ptrue p0.s, vl1
644 ; CHECK-NEXT: rev p0.s, p0.s
645 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
647 %res = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -1)
648 ret <vscale x 4 x float> %res
651 define <vscale x 4 x float> @splice_nxv4f32_neg5(<vscale x 4 x float> %a, <vscale x 4 x float> %b) vscale_range(2,16) #0 {
652 ; CHECK-LABEL: splice_nxv4f32_neg5:
654 ; CHECK-NEXT: ptrue p0.s, vl5
655 ; CHECK-NEXT: rev p0.s, p0.s
656 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
658 %res = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -5)
659 ret <vscale x 4 x float> %res
662 define <vscale x 2 x double> @splice_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
663 ; CHECK-LABEL: splice_nxv2f64:
665 ; CHECK-NEXT: ptrue p0.d, vl2
666 ; CHECK-NEXT: rev p0.d, p0.d
667 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
669 %res = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -2)
670 ret <vscale x 2 x double> %res
673 define <vscale x 2 x double> @splice_nxv2f64_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
674 ; CHECK-LABEL: splice_nxv2f64_1:
676 ; CHECK-NEXT: ptrue p0.d, vl1
677 ; CHECK-NEXT: rev p0.d, p0.d
678 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
680 %res = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -1)
681 ret <vscale x 2 x double> %res
684 define <vscale x 2 x double> @splice_nxv2f64_neg3(<vscale x 2 x double> %a, <vscale x 2 x double> %b) vscale_range(2,16) #0 {
685 ; CHECK-LABEL: splice_nxv2f64_neg3:
687 ; CHECK-NEXT: ptrue p0.d, vl3
688 ; CHECK-NEXT: rev p0.d, p0.d
689 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
691 %res = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -3)
692 ret <vscale x 2 x double> %res
695 define <vscale x 2 x bfloat> @splice_nxv2bf16_neg_idx(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) #0 {
696 ; CHECK-LABEL: splice_nxv2bf16_neg_idx:
698 ; CHECK-NEXT: ptrue p0.d, vl1
699 ; CHECK-NEXT: rev p0.d, p0.d
700 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
702 %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 -1)
703 ret <vscale x 2 x bfloat> %res
706 define <vscale x 2 x bfloat> @splice_nxv2bf16_neg2_idx(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) #0 {
707 ; CHECK-LABEL: splice_nxv2bf16_neg2_idx:
709 ; CHECK-NEXT: ptrue p0.d, vl2
710 ; CHECK-NEXT: rev p0.d, p0.d
711 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
713 %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 -2)
714 ret <vscale x 2 x bfloat> %res
717 define <vscale x 2 x bfloat> @splice_nxv2bf16_first_idx(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) #0 {
718 ; CHECK-LABEL: splice_nxv2bf16_first_idx:
720 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8
722 %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 1)
723 ret <vscale x 2 x bfloat> %res
726 define <vscale x 2 x bfloat> @splice_nxv2bf16_last_idx(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) vscale_range(16,16) #0 {
727 ; CHECK-LABEL: splice_nxv2bf16_last_idx:
729 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248
731 %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 31)
732 ret <vscale x 2 x bfloat> %res
735 define <vscale x 4 x bfloat> @splice_nxv4bf16_neg_idx(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) #0 {
736 ; CHECK-LABEL: splice_nxv4bf16_neg_idx:
738 ; CHECK-NEXT: ptrue p0.s, vl1
739 ; CHECK-NEXT: rev p0.s, p0.s
740 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
742 %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 -1)
743 ret <vscale x 4 x bfloat> %res
746 define <vscale x 4 x bfloat> @splice_nxv4bf16_neg3_idx(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) #0 {
747 ; CHECK-LABEL: splice_nxv4bf16_neg3_idx:
749 ; CHECK-NEXT: ptrue p0.s, vl3
750 ; CHECK-NEXT: rev p0.s, p0.s
751 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
753 %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 -3)
754 ret <vscale x 4 x bfloat> %res
757 define <vscale x 4 x bfloat> @splice_nxv4bf16_first_idx(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) #0 {
758 ; CHECK-LABEL: splice_nxv4bf16_first_idx:
760 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4
762 %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 1)
763 ret <vscale x 4 x bfloat> %res
766 define <vscale x 4 x bfloat> @splice_nxv4bf16_last_idx(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) vscale_range(16,16) #0 {
767 ; CHECK-LABEL: splice_nxv4bf16_last_idx:
769 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #252
771 %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 63)
772 ret <vscale x 4 x bfloat> %res
775 define <vscale x 8 x bfloat> @splice_nxv8bf16_first_idx(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
776 ; CHECK-LABEL: splice_nxv8bf16_first_idx:
778 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #2
780 %res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, i32 1)
781 ret <vscale x 8 x bfloat> %res
784 define <vscale x 8 x bfloat> @splice_nxv8bf16_last_idx(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) vscale_range(16,16) #0 {
785 ; CHECK-LABEL: splice_nxv8bf16_last_idx:
787 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #254
789 %res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, i32 127)
790 ret <vscale x 8 x bfloat> %res
793 ; Ensure predicate based splice is promoted to use ZPRs.
794 define <vscale x 2 x i1> @splice_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) #0 {
795 ; CHECK-LABEL: splice_nxv2i1:
797 ; CHECK-NEXT: ptrue p2.d, vl1
798 ; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1
799 ; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1
800 ; CHECK-NEXT: rev p0.d, p2.d
801 ; CHECK-NEXT: splice z1.d, p0, z1.d, z0.d
802 ; CHECK-NEXT: ptrue p0.d
803 ; CHECK-NEXT: and z1.d, z1.d, #0x1
804 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
806 %res = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i32 -1)
807 ret <vscale x 2 x i1> %res
810 ; Ensure predicate based splice is promoted to use ZPRs.
811 define <vscale x 4 x i1> @splice_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) #0 {
812 ; CHECK-LABEL: splice_nxv4i1:
814 ; CHECK-NEXT: ptrue p2.s, vl1
815 ; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
816 ; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
817 ; CHECK-NEXT: rev p0.s, p2.s
818 ; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s
819 ; CHECK-NEXT: ptrue p0.s
820 ; CHECK-NEXT: and z1.s, z1.s, #0x1
821 ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
823 %res = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i32 -1)
824 ret <vscale x 4 x i1> %res
827 ; Ensure predicate based splice is promoted to use ZPRs.
828 define <vscale x 8 x i1> @splice_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) #0 {
829 ; CHECK-LABEL: splice_nxv8i1:
831 ; CHECK-NEXT: ptrue p2.h, vl1
832 ; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1
833 ; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1
834 ; CHECK-NEXT: rev p0.h, p2.h
835 ; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h
836 ; CHECK-NEXT: ptrue p0.h
837 ; CHECK-NEXT: and z1.h, z1.h, #0x1
838 ; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
840 %res = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i32 -1)
841 ret <vscale x 8 x i1> %res
844 ; Ensure predicate based splice is promoted to use ZPRs.
845 define <vscale x 16 x i1> @splice_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 {
846 ; CHECK-LABEL: splice_nxv16i1:
848 ; CHECK-NEXT: ptrue p2.b, vl1
849 ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
850 ; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
851 ; CHECK-NEXT: rev p0.b, p2.b
852 ; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b
853 ; CHECK-NEXT: ptrue p0.b
854 ; CHECK-NEXT: and z1.b, z1.b, #0x1
855 ; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
857 %res = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i32 -1)
858 ret <vscale x 16 x i1> %res
861 ; Verify promote type legalisation works as expected.
862 define <vscale x 2 x i8> @splice_nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b) #0 {
863 ; CHECK-LABEL: splice_nxv2i8:
865 ; CHECK-NEXT: ptrue p0.d, vl2
866 ; CHECK-NEXT: rev p0.d, p0.d
867 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
869 %res = call <vscale x 2 x i8> @llvm.vector.splice.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, i32 -2)
870 ret <vscale x 2 x i8> %res
873 ; Verify splitvec type legalisation works as expected.
874 define <vscale x 8 x i32> @splice_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) #0 {
875 ; CHECK-LABEL: splice_nxv8i32:
877 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
878 ; CHECK-NEXT: addvl sp, sp, #-4
879 ; CHECK-NEXT: ptrue p0.s
880 ; CHECK-NEXT: rdvl x8, #2
881 ; CHECK-NEXT: mov x9, sp
882 ; CHECK-NEXT: add x8, x9, x8
883 ; CHECK-NEXT: mov x9, #-8 // =0xfffffffffffffff8
884 ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
885 ; CHECK-NEXT: sub x10, x8, #32
886 ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
887 ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
888 ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
889 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
890 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl]
891 ; CHECK-NEXT: addvl sp, sp, #4
892 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
894 %res = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, i32 -8)
895 ret <vscale x 8 x i32> %res
898 ; Verify splitvec type legalisation works as expected.
899 define <vscale x 16 x float> @splice_nxv16f32_neg17(<vscale x 16 x float> %a, <vscale x 16 x float> %b) vscale_range(2,16) #0 {
900 ; CHECK-LABEL: splice_nxv16f32_neg17:
902 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
903 ; CHECK-NEXT: addvl sp, sp, #-8
904 ; CHECK-NEXT: rdvl x8, #4
905 ; CHECK-NEXT: ptrue p0.s
906 ; CHECK-NEXT: mov w9, #68 // =0x44
907 ; CHECK-NEXT: cmp x8, #68
908 ; CHECK-NEXT: mov x10, sp
909 ; CHECK-NEXT: csel x9, x8, x9, lo
910 ; CHECK-NEXT: add x8, x10, x8
911 ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
912 ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
913 ; CHECK-NEXT: sub x8, x8, x9
914 ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
915 ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
916 ; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl]
917 ; CHECK-NEXT: st1w { z4.s }, p0, [sp, #4, mul vl]
918 ; CHECK-NEXT: st1w { z5.s }, p0, [sp, #5, mul vl]
919 ; CHECK-NEXT: st1w { z6.s }, p0, [sp, #6, mul vl]
920 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
921 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl]
922 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x8, #2, mul vl]
923 ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x8, #3, mul vl]
924 ; CHECK-NEXT: addvl sp, sp, #8
925 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
927 %res = call <vscale x 16 x float> @llvm.vector.splice.nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b, i32 -17)
928 ret <vscale x 16 x float> %res
931 declare <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>, i32)
932 declare <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>, i32)
933 declare <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>, i32)
934 declare <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, i32)
936 declare <vscale x 2 x i8> @llvm.vector.splice.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, i32)
937 declare <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
938 declare <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
939 declare <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
940 declare <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, i32)
941 declare <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
943 declare <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, i32)
944 declare <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, i32)
945 declare <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
946 declare <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i32)
947 declare <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
948 declare <vscale x 16 x float> @llvm.vector.splice.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, i32)
949 declare <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
951 declare <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i32)
952 declare <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, i32)
953 declare <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
955 attributes #0 = { nounwind "target-features"="+sve" }