1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 target triple = "aarch64-unknown-linux-gnu"
12 ; Don't use SVE for 64-bit vectors.
13 define <8 x i8> @splat_v8i8(i8 %a) vscale_range(2,0) #0 {
14 ; CHECK-LABEL: splat_v8i8:
16 ; CHECK-NEXT: dup v0.8b, w0
18 %insert = insertelement <8 x i8> undef, i8 %a, i64 0
19 %splat = shufflevector <8 x i8> %insert, <8 x i8> undef, <8 x i32> zeroinitializer
23 ; Don't use SVE for 128-bit vectors.
24 define <16 x i8> @splat_v16i8(i8 %a) vscale_range(2,0) #0 {
25 ; CHECK-LABEL: splat_v16i8:
27 ; CHECK-NEXT: dup v0.16b, w0
29 %insert = insertelement <16 x i8> undef, i8 %a, i64 0
30 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
34 define void @splat_v32i8(i8 %a, ptr %b) vscale_range(2,0) #0 {
35 ; CHECK-LABEL: splat_v32i8:
37 ; CHECK-NEXT: mov z0.b, w0
38 ; CHECK-NEXT: ptrue p0.b, vl32
39 ; CHECK-NEXT: st1b { z0.b }, p0, [x1]
41 %insert = insertelement <32 x i8> undef, i8 %a, i64 0
42 %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
43 store <32 x i8> %splat, ptr %b
47 define void @splat_v64i8(i8 %a, ptr %b) #0 {
48 ; VBITS_GE_256-LABEL: splat_v64i8:
49 ; VBITS_GE_256: // %bb.0:
50 ; VBITS_GE_256-NEXT: mov z0.b, w0
51 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32
52 ; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
53 ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1, x8]
54 ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1]
55 ; VBITS_GE_256-NEXT: ret
57 ; VBITS_GE_512-LABEL: splat_v64i8:
58 ; VBITS_GE_512: // %bb.0:
59 ; VBITS_GE_512-NEXT: mov z0.b, w0
60 ; VBITS_GE_512-NEXT: ptrue p0.b, vl64
61 ; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x1]
62 ; VBITS_GE_512-NEXT: ret
63 %insert = insertelement <64 x i8> undef, i8 %a, i64 0
64 %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
65 store <64 x i8> %splat, ptr %b
69 define void @splat_v128i8(i8 %a, ptr %b) vscale_range(8,0) #0 {
70 ; CHECK-LABEL: splat_v128i8:
72 ; CHECK-NEXT: mov z0.b, w0
73 ; CHECK-NEXT: ptrue p0.b, vl128
74 ; CHECK-NEXT: st1b { z0.b }, p0, [x1]
76 %insert = insertelement <128 x i8> undef, i8 %a, i64 0
77 %splat = shufflevector <128 x i8> %insert, <128 x i8> undef, <128 x i32> zeroinitializer
78 store <128 x i8> %splat, ptr %b
82 define void @splat_v256i8(i8 %a, ptr %b) vscale_range(16,0) #0 {
83 ; CHECK-LABEL: splat_v256i8:
85 ; CHECK-NEXT: mov z0.b, w0
86 ; CHECK-NEXT: ptrue p0.b, vl256
87 ; CHECK-NEXT: st1b { z0.b }, p0, [x1]
89 %insert = insertelement <256 x i8> undef, i8 %a, i64 0
90 %splat = shufflevector <256 x i8> %insert, <256 x i8> undef, <256 x i32> zeroinitializer
91 store <256 x i8> %splat, ptr %b
95 ; Don't use SVE for 64-bit vectors.
96 define <4 x i16> @splat_v4i16(i16 %a) vscale_range(2,0) #0 {
97 ; CHECK-LABEL: splat_v4i16:
99 ; CHECK-NEXT: dup v0.4h, w0
101 %insert = insertelement <4 x i16> undef, i16 %a, i64 0
102 %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer
106 ; Don't use SVE for 128-bit vectors.
107 define <8 x i16> @splat_v8i16(i16 %a) vscale_range(2,0) #0 {
108 ; CHECK-LABEL: splat_v8i16:
110 ; CHECK-NEXT: dup v0.8h, w0
112 %insert = insertelement <8 x i16> undef, i16 %a, i64 0
113 %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
117 define void @splat_v16i16(i16 %a, ptr %b) vscale_range(2,0) #0 {
118 ; CHECK-LABEL: splat_v16i16:
120 ; CHECK-NEXT: mov z0.h, w0
121 ; CHECK-NEXT: ptrue p0.h, vl16
122 ; CHECK-NEXT: st1h { z0.h }, p0, [x1]
124 %insert = insertelement <16 x i16> undef, i16 %a, i64 0
125 %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
126 store <16 x i16> %splat, ptr %b
130 define void @splat_v32i16(i16 %a, ptr %b) #0 {
131 ; VBITS_GE_256-LABEL: splat_v32i16:
132 ; VBITS_GE_256: // %bb.0:
133 ; VBITS_GE_256-NEXT: mov z0.h, w0
134 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
135 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
136 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
137 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1]
138 ; VBITS_GE_256-NEXT: ret
140 ; VBITS_GE_512-LABEL: splat_v32i16:
141 ; VBITS_GE_512: // %bb.0:
142 ; VBITS_GE_512-NEXT: mov z0.h, w0
143 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
144 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
145 ; VBITS_GE_512-NEXT: ret
146 %insert = insertelement <32 x i16> undef, i16 %a, i64 0
147 %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
148 store <32 x i16> %splat, ptr %b
152 define void @splat_v64i16(i16 %a, ptr %b) vscale_range(8,0) #0 {
153 ; CHECK-LABEL: splat_v64i16:
155 ; CHECK-NEXT: mov z0.h, w0
156 ; CHECK-NEXT: ptrue p0.h, vl64
157 ; CHECK-NEXT: st1h { z0.h }, p0, [x1]
159 %insert = insertelement <64 x i16> undef, i16 %a, i64 0
160 %splat = shufflevector <64 x i16> %insert, <64 x i16> undef, <64 x i32> zeroinitializer
161 store <64 x i16> %splat, ptr %b
165 define void @splat_v128i16(i16 %a, ptr %b) vscale_range(16,0) #0 {
166 ; CHECK-LABEL: splat_v128i16:
168 ; CHECK-NEXT: mov z0.h, w0
169 ; CHECK-NEXT: ptrue p0.h, vl128
170 ; CHECK-NEXT: st1h { z0.h }, p0, [x1]
172 %insert = insertelement <128 x i16> undef, i16 %a, i64 0
173 %splat = shufflevector <128 x i16> %insert, <128 x i16> undef, <128 x i32> zeroinitializer
174 store <128 x i16> %splat, ptr %b
178 ; Don't use SVE for 64-bit vectors.
179 define <2 x i32> @splat_v2i32(i32 %a) vscale_range(2,0) #0 {
180 ; CHECK-LABEL: splat_v2i32:
182 ; CHECK-NEXT: dup v0.2s, w0
184 %insert = insertelement <2 x i32> undef, i32 %a, i64 0
185 %splat = shufflevector <2 x i32> %insert, <2 x i32> undef, <2 x i32> zeroinitializer
189 ; Don't use SVE for 128-bit vectors.
190 define <4 x i32> @splat_v4i32(i32 %a) vscale_range(2,0) #0 {
191 ; CHECK-LABEL: splat_v4i32:
193 ; CHECK-NEXT: dup v0.4s, w0
195 %insert = insertelement <4 x i32> undef, i32 %a, i64 0
196 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
200 define void @splat_v8i32(i32 %a, ptr %b) vscale_range(2,0) #0 {
201 ; CHECK-LABEL: splat_v8i32:
203 ; CHECK-NEXT: mov z0.s, w0
204 ; CHECK-NEXT: ptrue p0.s, vl8
205 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
207 %insert = insertelement <8 x i32> undef, i32 %a, i64 0
208 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
209 store <8 x i32> %splat, ptr %b
213 define void @splat_v16i32(i32 %a, ptr %b) #0 {
214 ; VBITS_GE_256-LABEL: splat_v16i32:
215 ; VBITS_GE_256: // %bb.0:
216 ; VBITS_GE_256-NEXT: mov z0.s, w0
217 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
218 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
219 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
220 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1]
221 ; VBITS_GE_256-NEXT: ret
223 ; VBITS_GE_512-LABEL: splat_v16i32:
224 ; VBITS_GE_512: // %bb.0:
225 ; VBITS_GE_512-NEXT: mov z0.s, w0
226 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
227 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
228 ; VBITS_GE_512-NEXT: ret
229 %insert = insertelement <16 x i32> undef, i32 %a, i64 0
230 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
231 store <16 x i32> %splat, ptr %b
235 define void @splat_v32i32(i32 %a, ptr %b) vscale_range(8,0) #0 {
236 ; CHECK-LABEL: splat_v32i32:
238 ; CHECK-NEXT: mov z0.s, w0
239 ; CHECK-NEXT: ptrue p0.s, vl32
240 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
242 %insert = insertelement <32 x i32> undef, i32 %a, i64 0
243 %splat = shufflevector <32 x i32> %insert, <32 x i32> undef, <32 x i32> zeroinitializer
244 store <32 x i32> %splat, ptr %b
248 define void @splat_v64i32(i32 %a, ptr %b) vscale_range(16,0) #0 {
249 ; CHECK-LABEL: splat_v64i32:
251 ; CHECK-NEXT: mov z0.s, w0
252 ; CHECK-NEXT: ptrue p0.s, vl64
253 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
255 %insert = insertelement <64 x i32> undef, i32 %a, i64 0
256 %splat = shufflevector <64 x i32> %insert, <64 x i32> undef, <64 x i32> zeroinitializer
257 store <64 x i32> %splat, ptr %b
261 ; Don't use SVE for 64-bit vectors.
262 define <1 x i64> @splat_v1i64(i64 %a) vscale_range(2,0) #0 {
263 ; CHECK-LABEL: splat_v1i64:
265 ; CHECK-NEXT: fmov d0, x0
267 %insert = insertelement <1 x i64> undef, i64 %a, i64 0
268 %splat = shufflevector <1 x i64> %insert, <1 x i64> undef, <1 x i32> zeroinitializer
272 ; Don't use SVE for 128-bit vectors.
273 define <2 x i64> @splat_v2i64(i64 %a) vscale_range(2,0) #0 {
274 ; CHECK-LABEL: splat_v2i64:
276 ; CHECK-NEXT: dup v0.2d, x0
278 %insert = insertelement <2 x i64> undef, i64 %a, i64 0
279 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
283 define void @splat_v4i64(i64 %a, ptr %b) vscale_range(2,0) #0 {
284 ; CHECK-LABEL: splat_v4i64:
286 ; CHECK-NEXT: mov z0.d, x0
287 ; CHECK-NEXT: ptrue p0.d, vl4
288 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
290 %insert = insertelement <4 x i64> undef, i64 %a, i64 0
291 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
292 store <4 x i64> %splat, ptr %b
296 define void @splat_v8i64(i64 %a, ptr %b) #0 {
297 ; VBITS_GE_256-LABEL: splat_v8i64:
298 ; VBITS_GE_256: // %bb.0:
299 ; VBITS_GE_256-NEXT: mov z0.d, x0
300 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
301 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
302 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
303 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
304 ; VBITS_GE_256-NEXT: ret
306 ; VBITS_GE_512-LABEL: splat_v8i64:
307 ; VBITS_GE_512: // %bb.0:
308 ; VBITS_GE_512-NEXT: mov z0.d, x0
309 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
310 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
311 ; VBITS_GE_512-NEXT: ret
312 %insert = insertelement <8 x i64> undef, i64 %a, i64 0
313 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
314 store <8 x i64> %splat, ptr %b
318 define void @splat_v16i64(i64 %a, ptr %b) vscale_range(8,0) #0 {
319 ; CHECK-LABEL: splat_v16i64:
321 ; CHECK-NEXT: mov z0.d, x0
322 ; CHECK-NEXT: ptrue p0.d, vl16
323 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
325 %insert = insertelement <16 x i64> undef, i64 %a, i64 0
326 %splat = shufflevector <16 x i64> %insert, <16 x i64> undef, <16 x i32> zeroinitializer
327 store <16 x i64> %splat, ptr %b
331 define void @splat_v32i64(i64 %a, ptr %b) vscale_range(16,0) #0 {
332 ; CHECK-LABEL: splat_v32i64:
334 ; CHECK-NEXT: mov z0.d, x0
335 ; CHECK-NEXT: ptrue p0.d, vl32
336 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
338 %insert = insertelement <32 x i64> undef, i64 %a, i64 0
339 %splat = shufflevector <32 x i64> %insert, <32 x i64> undef, <32 x i32> zeroinitializer
340 store <32 x i64> %splat, ptr %b
345 ; DUP (floating-point)
348 ; Don't use SVE for 64-bit vectors.
349 define <4 x half> @splat_v4f16(half %a) vscale_range(2,0) #0 {
350 ; CHECK-LABEL: splat_v4f16:
352 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0
353 ; CHECK-NEXT: dup v0.4h, v0.h[0]
355 %insert = insertelement <4 x half> undef, half %a, i64 0
356 %splat = shufflevector <4 x half> %insert, <4 x half> undef, <4 x i32> zeroinitializer
357 ret <4 x half> %splat
360 ; Don't use SVE for 128-bit vectors.
361 define <8 x half> @splat_v8f16(half %a) vscale_range(2,0) #0 {
362 ; CHECK-LABEL: splat_v8f16:
364 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0
365 ; CHECK-NEXT: dup v0.8h, v0.h[0]
367 %insert = insertelement <8 x half> undef, half %a, i64 0
368 %splat = shufflevector <8 x half> %insert, <8 x half> undef, <8 x i32> zeroinitializer
369 ret <8 x half> %splat
372 define void @splat_v16f16(half %a, ptr %b) vscale_range(2,0) #0 {
373 ; CHECK-LABEL: splat_v16f16:
375 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
376 ; CHECK-NEXT: ptrue p0.h, vl16
377 ; CHECK-NEXT: mov z0.h, h0
378 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
380 %insert = insertelement <16 x half> undef, half %a, i64 0
381 %splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer
382 store <16 x half> %splat, ptr %b
386 define void @splat_v32f16(half %a, ptr %b) #0 {
387 ; VBITS_GE_256-LABEL: splat_v32f16:
388 ; VBITS_GE_256: // %bb.0:
389 ; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 def $z0
390 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
391 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
392 ; VBITS_GE_256-NEXT: mov z0.h, h0
393 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
394 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0]
395 ; VBITS_GE_256-NEXT: ret
397 ; VBITS_GE_512-LABEL: splat_v32f16:
398 ; VBITS_GE_512: // %bb.0:
399 ; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 def $z0
400 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
401 ; VBITS_GE_512-NEXT: mov z0.h, h0
402 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
403 ; VBITS_GE_512-NEXT: ret
404 %insert = insertelement <32 x half> undef, half %a, i64 0
405 %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
406 store <32 x half> %splat, ptr %b
410 define void @splat_v64f16(half %a, ptr %b) vscale_range(8,0) #0 {
411 ; CHECK-LABEL: splat_v64f16:
413 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
414 ; CHECK-NEXT: ptrue p0.h, vl64
415 ; CHECK-NEXT: mov z0.h, h0
416 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
418 %insert = insertelement <64 x half> undef, half %a, i64 0
419 %splat = shufflevector <64 x half> %insert, <64 x half> undef, <64 x i32> zeroinitializer
420 store <64 x half> %splat, ptr %b
424 define void @splat_v128f16(half %a, ptr %b) vscale_range(16,0) #0 {
425 ; CHECK-LABEL: splat_v128f16:
427 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
428 ; CHECK-NEXT: ptrue p0.h, vl128
429 ; CHECK-NEXT: mov z0.h, h0
430 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
432 %insert = insertelement <128 x half> undef, half %a, i64 0
433 %splat = shufflevector <128 x half> %insert, <128 x half> undef, <128 x i32> zeroinitializer
434 store <128 x half> %splat, ptr %b
438 ; Don't use SVE for 64-bit vectors.
439 define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) vscale_range(2,0) #0 {
440 ; CHECK-LABEL: splat_v2f32:
442 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
443 ; CHECK-NEXT: dup v0.2s, v0.s[0]
445 %insert = insertelement <2 x float> undef, float %a, i64 0
446 %splat = shufflevector <2 x float> %insert, <2 x float> undef, <2 x i32> zeroinitializer
447 ret <2 x float> %splat
450 ; Don't use SVE for 128-bit vectors.
451 define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) vscale_range(2,0) #0 {
452 ; CHECK-LABEL: splat_v4f32:
454 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
455 ; CHECK-NEXT: dup v0.4s, v0.s[0]
457 %insert = insertelement <4 x float> undef, float %a, i64 0
458 %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
459 ret <4 x float> %splat
462 define void @splat_v8f32(float %a, ptr %b) vscale_range(2,0) #0 {
463 ; CHECK-LABEL: splat_v8f32:
465 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
466 ; CHECK-NEXT: ptrue p0.s, vl8
467 ; CHECK-NEXT: mov z0.s, s0
468 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
470 %insert = insertelement <8 x float> undef, float %a, i64 0
471 %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer
472 store <8 x float> %splat, ptr %b
476 define void @splat_v16f32(float %a, ptr %b) #0 {
477 ; VBITS_GE_256-LABEL: splat_v16f32:
478 ; VBITS_GE_256: // %bb.0:
479 ; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 def $z0
480 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
481 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
482 ; VBITS_GE_256-NEXT: mov z0.s, s0
483 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
484 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0]
485 ; VBITS_GE_256-NEXT: ret
487 ; VBITS_GE_512-LABEL: splat_v16f32:
488 ; VBITS_GE_512: // %bb.0:
489 ; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 def $z0
490 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
491 ; VBITS_GE_512-NEXT: mov z0.s, s0
492 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
493 ; VBITS_GE_512-NEXT: ret
494 %insert = insertelement <16 x float> undef, float %a, i64 0
495 %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
496 store <16 x float> %splat, ptr %b
500 define void @splat_v32f32(float %a, ptr %b) vscale_range(8,0) #0 {
501 ; CHECK-LABEL: splat_v32f32:
503 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
504 ; CHECK-NEXT: ptrue p0.s, vl32
505 ; CHECK-NEXT: mov z0.s, s0
506 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
508 %insert = insertelement <32 x float> undef, float %a, i64 0
509 %splat = shufflevector <32 x float> %insert, <32 x float> undef, <32 x i32> zeroinitializer
510 store <32 x float> %splat, ptr %b
514 define void @splat_v64f32(float %a, ptr %b) vscale_range(16,0) #0 {
515 ; CHECK-LABEL: splat_v64f32:
517 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
518 ; CHECK-NEXT: ptrue p0.s, vl64
519 ; CHECK-NEXT: mov z0.s, s0
520 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
522 %insert = insertelement <64 x float> undef, float %a, i64 0
523 %splat = shufflevector <64 x float> %insert, <64 x float> undef, <64 x i32> zeroinitializer
524 store <64 x float> %splat, ptr %b
528 ; Don't use SVE for 64-bit vectors.
529 define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) vscale_range(2,0) #0 {
530 ; CHECK-LABEL: splat_v1f64:
533 %insert = insertelement <1 x double> undef, double %a, i64 0
534 %splat = shufflevector <1 x double> %insert, <1 x double> undef, <1 x i32> zeroinitializer
535 ret <1 x double> %splat
538 ; Don't use SVE for 128-bit vectors.
539 define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) vscale_range(2,0) #0 {
540 ; CHECK-LABEL: splat_v2f64:
542 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
543 ; CHECK-NEXT: dup v0.2d, v0.d[0]
545 %insert = insertelement <2 x double> undef, double %a, i64 0
546 %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer
547 ret <2 x double> %splat
550 define void @splat_v4f64(double %a, ptr %b) vscale_range(2,0) #0 {
551 ; CHECK-LABEL: splat_v4f64:
553 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
554 ; CHECK-NEXT: ptrue p0.d, vl4
555 ; CHECK-NEXT: mov z0.d, d0
556 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
558 %insert = insertelement <4 x double> undef, double %a, i64 0
559 %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer
560 store <4 x double> %splat, ptr %b
564 define void @splat_v8f64(double %a, ptr %b) #0 {
565 ; VBITS_GE_256-LABEL: splat_v8f64:
566 ; VBITS_GE_256: // %bb.0:
567 ; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 def $z0
568 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
569 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
570 ; VBITS_GE_256-NEXT: mov z0.d, d0
571 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
572 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0]
573 ; VBITS_GE_256-NEXT: ret
575 ; VBITS_GE_512-LABEL: splat_v8f64:
576 ; VBITS_GE_512: // %bb.0:
577 ; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 def $z0
578 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
579 ; VBITS_GE_512-NEXT: mov z0.d, d0
580 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
581 ; VBITS_GE_512-NEXT: ret
582 %insert = insertelement <8 x double> undef, double %a, i64 0
583 %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
584 store <8 x double> %splat, ptr %b
588 define void @splat_v16f64(double %a, ptr %b) vscale_range(8,0) #0 {
589 ; CHECK-LABEL: splat_v16f64:
591 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
592 ; CHECK-NEXT: ptrue p0.d, vl16
593 ; CHECK-NEXT: mov z0.d, d0
594 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
596 %insert = insertelement <16 x double> undef, double %a, i64 0
597 %splat = shufflevector <16 x double> %insert, <16 x double> undef, <16 x i32> zeroinitializer
598 store <16 x double> %splat, ptr %b
602 define void @splat_v32f64(double %a, ptr %b) vscale_range(16,0) #0 {
603 ; CHECK-LABEL: splat_v32f64:
605 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
606 ; CHECK-NEXT: ptrue p0.d, vl32
607 ; CHECK-NEXT: mov z0.d, d0
608 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
610 %insert = insertelement <32 x double> undef, double %a, i64 0
611 %splat = shufflevector <32 x double> %insert, <32 x double> undef, <32 x i32> zeroinitializer
612 store <32 x double> %splat, ptr %b
617 ; DUP (integer immediate)
620 define void @splat_imm_v64i8(ptr %a) vscale_range(4,0) #0 {
621 ; CHECK-LABEL: splat_imm_v64i8:
623 ; CHECK-NEXT: mov z0.b, #1 // =0x1
624 ; CHECK-NEXT: ptrue p0.b, vl64
625 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
627 %insert = insertelement <64 x i8> undef, i8 1, i64 0
628 %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
629 store <64 x i8> %splat, ptr %a
633 define void @splat_imm_v32i16(ptr %a) vscale_range(4,0) #0 {
634 ; CHECK-LABEL: splat_imm_v32i16:
636 ; CHECK-NEXT: mov z0.h, #2 // =0x2
637 ; CHECK-NEXT: ptrue p0.h, vl32
638 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
640 %insert = insertelement <32 x i16> undef, i16 2, i64 0
641 %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
642 store <32 x i16> %splat, ptr %a
646 define void @splat_imm_v16i32(ptr %a) vscale_range(4,0) #0 {
647 ; CHECK-LABEL: splat_imm_v16i32:
649 ; CHECK-NEXT: mov z0.s, #3 // =0x3
650 ; CHECK-NEXT: ptrue p0.s, vl16
651 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
653 %insert = insertelement <16 x i32> undef, i32 3, i64 0
654 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
655 store <16 x i32> %splat, ptr %a
659 define void @splat_imm_v8i64(ptr %a) vscale_range(4,0) #0 {
660 ; CHECK-LABEL: splat_imm_v8i64:
662 ; CHECK-NEXT: mov z0.d, #4 // =0x4
663 ; CHECK-NEXT: ptrue p0.d, vl8
664 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
666 %insert = insertelement <8 x i64> undef, i64 4, i64 0
667 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
668 store <8 x i64> %splat, ptr %a
673 ; DUP (floating-point immediate)
676 define void @splat_imm_v32f16(ptr %a) vscale_range(4,0) #0 {
677 ; CHECK-LABEL: splat_imm_v32f16:
679 ; CHECK-NEXT: fmov z0.h, #5.00000000
680 ; CHECK-NEXT: ptrue p0.h, vl32
681 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
683 %insert = insertelement <32 x half> undef, half 5.0, i64 0
684 %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
685 store <32 x half> %splat, ptr %a
689 define void @splat_imm_v16f32(ptr %a) vscale_range(4,0) #0 {
690 ; CHECK-LABEL: splat_imm_v16f32:
692 ; CHECK-NEXT: fmov z0.s, #6.00000000
693 ; CHECK-NEXT: ptrue p0.s, vl16
694 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
696 %insert = insertelement <16 x float> undef, float 6.0, i64 0
697 %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
698 store <16 x float> %splat, ptr %a
702 define void @splat_imm_v8f64(ptr %a) vscale_range(4,0) #0 {
703 ; CHECK-LABEL: splat_imm_v8f64:
705 ; CHECK-NEXT: fmov z0.d, #7.00000000
706 ; CHECK-NEXT: ptrue p0.d, vl8
707 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
709 %insert = insertelement <8 x double> undef, double 7.0, i64 0
710 %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
711 store <8 x double> %splat, ptr %a
715 define void @load_splat_v8f32(ptr %a, ptr %b) vscale_range(2,2) #0 {
716 ; CHECK-LABEL: load_splat_v8f32:
718 ; CHECK-NEXT: ptrue p0.s
719 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
720 ; CHECK-NEXT: mov z0.s, s0
721 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
723 %v = load <8 x float>, ptr %a
724 %splat = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> zeroinitializer
725 store <8 x float> %splat, ptr %b
729 define void @load_splat_v4f64(ptr %a, ptr %b) vscale_range(2,2) #0 {
730 ; CHECK-LABEL: load_splat_v4f64:
732 ; CHECK-NEXT: ptrue p0.d
733 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
734 ; CHECK-NEXT: mov z0.d, d0
735 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
737 %v = load <4 x double>, ptr %a
738 %splat = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
739 store <4 x double> %splat, ptr %b
743 define void @load_splat_v32i8(ptr %a, ptr %b) vscale_range(2,2) #0 {
744 ; CHECK-LABEL: load_splat_v32i8:
746 ; CHECK-NEXT: ptrue p0.b
747 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
748 ; CHECK-NEXT: mov z0.b, b0
749 ; CHECK-NEXT: st1b { z0.b }, p0, [x1]
751 %v = load <32 x i8>, ptr %a
752 %splat = shufflevector <32 x i8> %v, <32 x i8> undef, <32 x i32> zeroinitializer
753 store <32 x i8> %splat, ptr %b
757 define void @load_splat_v16i16(ptr %a, ptr %b) vscale_range(2,2) #0 {
758 ; CHECK-LABEL: load_splat_v16i16:
760 ; CHECK-NEXT: ptrue p0.h
761 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
762 ; CHECK-NEXT: mov z0.h, h0
763 ; CHECK-NEXT: st1h { z0.h }, p0, [x1]
765 %v = load <16 x i16>, ptr %a
766 %splat = shufflevector <16 x i16> %v, <16 x i16> undef, <16 x i32> zeroinitializer
767 store <16 x i16> %splat, ptr %b
771 define void @load_splat_v8i32(ptr %a, ptr %b) vscale_range(2,2) #0 {
772 ; CHECK-LABEL: load_splat_v8i32:
774 ; CHECK-NEXT: ptrue p0.s
775 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
776 ; CHECK-NEXT: mov z0.s, s0
777 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
779 %v = load <8 x i32>, ptr %a
780 %splat = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> zeroinitializer
781 store <8 x i32> %splat, ptr %b
785 define void @load_splat_v4i64(ptr %a, ptr %b) vscale_range(2,2) #0 {
786 ; CHECK-LABEL: load_splat_v4i64:
788 ; CHECK-NEXT: ptrue p0.d
789 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
790 ; CHECK-NEXT: mov z0.d, d0
791 ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
793 %v = load <4 x i64>, ptr %a
794 %splat = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> zeroinitializer
795 store <4 x i64> %splat, ptr %b
799 attributes #0 = { "target-features"="+sve" }