1 ; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -check-prefix=NO_SVE
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK
4 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5 ; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 ; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
7 ; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
8 ; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
9 ; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
10 ; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
11 ; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
12 ; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
13 ; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
14 ; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
15 ; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
16 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
18 target triple = "aarch64-unknown-linux-gnu"
20 ; Don't use SVE when its registers are no bigger than NEON.
27 ; Don't use SVE for 64-bit vectors.
28 define <8 x i8> @splat_v8i8(i8 %a) #0 {
29 ; CHECK-LABEL: splat_v8i8:
30 ; CHECK: dup v0.8b, w0
32 %insert = insertelement <8 x i8> undef, i8 %a, i64 0
33 %splat = shufflevector <8 x i8> %insert, <8 x i8> undef, <8 x i32> zeroinitializer
37 ; Don't use SVE for 128-bit vectors.
38 define <16 x i8> @splat_v16i8(i8 %a) #0 {
39 ; CHECK-LABEL: splat_v16i8:
40 ; CHECK: dup v0.16b, w0
42 %insert = insertelement <16 x i8> undef, i8 %a, i64 0
43 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
47 define void @splat_v32i8(i8 %a, <32 x i8>* %b) #0 {
48 ; CHECK-LABEL: splat_v32i8:
49 ; CHECK-DAG: mov [[RES:z[0-9]+]].b, w0
50 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].b, vl32
51 ; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x1]
53 %insert = insertelement <32 x i8> undef, i8 %a, i64 0
54 %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
55 store <32 x i8> %splat, <32 x i8>* %b
59 define void @splat_v64i8(i8 %a, <64 x i8>* %b) #0 {
60 ; CHECK-LABEL: splat_v64i8:
61 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].b, w0
62 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].b, vl64
63 ; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x1]
64 ; VBITS_GE_512-NEXT: ret
66 ; Ensure sensible type legalisation.
67 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].b, w0
68 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
69 ; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
70 ; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1]
71 ; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1, x[[NUMELTS]]]
72 ; VBITS_EQ_256-NEXT: ret
73 %insert = insertelement <64 x i8> undef, i8 %a, i64 0
74 %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
75 store <64 x i8> %splat, <64 x i8>* %b
79 define void @splat_v128i8(i8 %a, <128 x i8>* %b) #0 {
80 ; CHECK-LABEL: splat_v128i8:
81 ; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].b, w0
82 ; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].b, vl128
83 ; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x1]
84 ; VBITS_GE_1024-NEXT: ret
85 %insert = insertelement <128 x i8> undef, i8 %a, i64 0
86 %splat = shufflevector <128 x i8> %insert, <128 x i8> undef, <128 x i32> zeroinitializer
87 store <128 x i8> %splat, <128 x i8>* %b
91 define void @splat_v256i8(i8 %a, <256 x i8>* %b) #0 {
92 ; CHECK-LABEL: splat_v256i8:
93 ; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].b, w0
94 ; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].b, vl256
95 ; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x1]
96 ; VBITS_GE_2048-NEXT: ret
97 %insert = insertelement <256 x i8> undef, i8 %a, i64 0
98 %splat = shufflevector <256 x i8> %insert, <256 x i8> undef, <256 x i32> zeroinitializer
99 store <256 x i8> %splat, <256 x i8>* %b
103 ; Don't use SVE for 64-bit vectors.
104 define <4 x i16> @splat_v4i16(i16 %a) #0 {
105 ; CHECK-LABEL: splat_v4i16:
106 ; CHECK: dup v0.4h, w0
108 %insert = insertelement <4 x i16> undef, i16 %a, i64 0
109 %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer
113 ; Don't use SVE for 128-bit vectors.
114 define <8 x i16> @splat_v8i16(i16 %a) #0 {
115 ; CHECK-LABEL: splat_v8i16:
116 ; CHECK: dup v0.8h, w0
118 %insert = insertelement <8 x i16> undef, i16 %a, i64 0
119 %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
123 define void @splat_v16i16(i16 %a, <16 x i16>* %b) #0 {
124 ; CHECK-LABEL: splat_v16i16:
125 ; CHECK-DAG: mov [[RES:z[0-9]+]].h, w0
126 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].h, vl16
127 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
129 %insert = insertelement <16 x i16> undef, i16 %a, i64 0
130 %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
131 store <16 x i16> %splat, <16 x i16>* %b
135 define void @splat_v32i16(i16 %a, <32 x i16>* %b) #0 {
136 ; CHECK-LABEL: splat_v32i16:
137 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].h, w0
138 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32
139 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
140 ; VBITS_GE_512-NEXT: ret
142 ; Ensure sensible type legalisation.
143 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].h, w0
144 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
145 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
146 ; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x1]
147 ; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1]
148 ; VBITS_EQ_256-NEXT: ret
149 %insert = insertelement <32 x i16> undef, i16 %a, i64 0
150 %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
151 store <32 x i16> %splat, <32 x i16>* %b
155 define void @splat_v64i16(i16 %a, <64 x i16>* %b) #0 {
156 ; CHECK-LABEL: splat_v64i16:
157 ; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].h, w0
158 ; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].h, vl64
159 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
160 ; VBITS_GE_1024-NEXT: ret
161 %insert = insertelement <64 x i16> undef, i16 %a, i64 0
162 %splat = shufflevector <64 x i16> %insert, <64 x i16> undef, <64 x i32> zeroinitializer
163 store <64 x i16> %splat, <64 x i16>* %b
167 define void @splat_v128i16(i16 %a, <128 x i16>* %b) #0 {
168 ; CHECK-LABEL: splat_v128i16:
169 ; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].h, w0
170 ; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].h, vl128
171 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
172 ; VBITS_GE_2048-NEXT: ret
173 %insert = insertelement <128 x i16> undef, i16 %a, i64 0
174 %splat = shufflevector <128 x i16> %insert, <128 x i16> undef, <128 x i32> zeroinitializer
175 store <128 x i16> %splat, <128 x i16>* %b
179 ; Don't use SVE for 64-bit vectors.
180 define <2 x i32> @splat_v2i32(i32 %a) #0 {
181 ; CHECK-LABEL: splat_v2i32:
182 ; CHECK: dup v0.2s, w0
184 %insert = insertelement <2 x i32> undef, i32 %a, i64 0
185 %splat = shufflevector <2 x i32> %insert, <2 x i32> undef, <2 x i32> zeroinitializer
189 ; Don't use SVE for 128-bit vectors.
190 define <4 x i32> @splat_v4i32(i32 %a) #0 {
191 ; CHECK-LABEL: splat_v4i32:
192 ; CHECK: dup v0.4s, w0
194 %insert = insertelement <4 x i32> undef, i32 %a, i64 0
195 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
199 define void @splat_v8i32(i32 %a, <8 x i32>* %b) #0 {
200 ; CHECK-LABEL: splat_v8i32:
201 ; CHECK-DAG: mov [[RES:z[0-9]+]].s, w0
202 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl8
203 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
205 %insert = insertelement <8 x i32> undef, i32 %a, i64 0
206 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
207 store <8 x i32> %splat, <8 x i32>* %b
211 define void @splat_v16i32(i32 %a, <16 x i32>* %b) #0 {
212 ; CHECK-LABEL: splat_v16i32:
213 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].s, w0
214 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16
215 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
216 ; VBITS_GE_512-NEXT: ret
218 ; Ensure sensible type legalisation.
219 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].s, w0
220 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
221 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
222 ; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x1]
223 ; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2]
224 ; VBITS_EQ_256-NEXT: ret
225 %insert = insertelement <16 x i32> undef, i32 %a, i64 0
226 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
227 store <16 x i32> %splat, <16 x i32>* %b
231 define void @splat_v32i32(i32 %a, <32 x i32>* %b) #0 {
232 ; CHECK-LABEL: splat_v32i32:
233 ; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].s, w0
234 ; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].s, vl32
235 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
236 ; VBITS_GE_1024-NEXT: ret
237 %insert = insertelement <32 x i32> undef, i32 %a, i64 0
238 %splat = shufflevector <32 x i32> %insert, <32 x i32> undef, <32 x i32> zeroinitializer
239 store <32 x i32> %splat, <32 x i32>* %b
243 define void @splat_v64i32(i32 %a, <64 x i32>* %b) #0 {
244 ; CHECK-LABEL: splat_v64i32:
245 ; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].s, w0
246 ; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].s, vl64
247 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
248 ; VBITS_GE_2048-NEXT: ret
249 %insert = insertelement <64 x i32> undef, i32 %a, i64 0
250 %splat = shufflevector <64 x i32> %insert, <64 x i32> undef, <64 x i32> zeroinitializer
251 store <64 x i32> %splat, <64 x i32>* %b
255 ; Don't use SVE for 64-bit vectors.
256 define <1 x i64> @splat_v1i64(i64 %a) #0 {
257 ; CHECK-LABEL: splat_v1i64:
260 %insert = insertelement <1 x i64> undef, i64 %a, i64 0
261 %splat = shufflevector <1 x i64> %insert, <1 x i64> undef, <1 x i32> zeroinitializer
265 ; Don't use SVE for 128-bit vectors.
266 define <2 x i64> @splat_v2i64(i64 %a) #0 {
267 ; CHECK-LABEL: splat_v2i64:
268 ; CHECK: dup v0.2d, x0
270 %insert = insertelement <2 x i64> undef, i64 %a, i64 0
271 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
275 define void @splat_v4i64(i64 %a, <4 x i64>* %b) #0 {
276 ; CHECK-LABEL: splat_v4i64:
277 ; CHECK-DAG: mov [[RES:z[0-9]+]].d, x0
278 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d, vl4
279 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
281 %insert = insertelement <4 x i64> undef, i64 %a, i64 0
282 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
283 store <4 x i64> %splat, <4 x i64>* %b
287 define void @splat_v8i64(i64 %a, <8 x i64>* %b) #0 {
288 ; CHECK-LABEL: splat_v8i64:
289 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].d, x0
290 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8
291 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
292 ; VBITS_GE_512-NEXT: ret
294 ; Ensure sensible type legalisation.
295 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].d, x0
296 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
297 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
298 ; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x1]
299 ; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3]
300 ; VBITS_EQ_256-NEXT: ret
301 %insert = insertelement <8 x i64> undef, i64 %a, i64 0
302 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
303 store <8 x i64> %splat, <8 x i64>* %b
307 define void @splat_v16i64(i64 %a, <16 x i64>* %b) #0 {
308 ; CHECK-LABEL: splat_v16i64:
309 ; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].d, x0
310 ; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].d, vl16
311 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
312 ; VBITS_GE_1024-NEXT: ret
313 %insert = insertelement <16 x i64> undef, i64 %a, i64 0
314 %splat = shufflevector <16 x i64> %insert, <16 x i64> undef, <16 x i32> zeroinitializer
315 store <16 x i64> %splat, <16 x i64>* %b
319 define void @splat_v32i64(i64 %a, <32 x i64>* %b) #0 {
320 ; CHECK-LABEL: splat_v32i64:
321 ; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].d, x0
322 ; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].d, vl32
323 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
324 ; VBITS_GE_2048-NEXT: ret
325 %insert = insertelement <32 x i64> undef, i64 %a, i64 0
326 %splat = shufflevector <32 x i64> %insert, <32 x i64> undef, <32 x i32> zeroinitializer
327 store <32 x i64> %splat, <32 x i64>* %b
332 ; DUP (floating-point)
335 ; Don't use SVE for 64-bit vectors.
336 define <4 x half> @splat_v4f16(half %a) #0 {
337 ; CHECK-LABEL: splat_v4f16:
338 ; CHECK: dup v0.4h, v0.h[0]
340 %insert = insertelement <4 x half> undef, half %a, i64 0
341 %splat = shufflevector <4 x half> %insert, <4 x half> undef, <4 x i32> zeroinitializer
342 ret <4 x half> %splat
345 ; Don't use SVE for 128-bit vectors.
346 define <8 x half> @splat_v8f16(half %a) #0 {
347 ; CHECK-LABEL: splat_v8f16:
348 ; CHECK: dup v0.8h, v0.h[0]
350 %insert = insertelement <8 x half> undef, half %a, i64 0
351 %splat = shufflevector <8 x half> %insert, <8 x half> undef, <8 x i32> zeroinitializer
352 ret <8 x half> %splat
355 define void @splat_v16f16(half %a, <16 x half>* %b) #0 {
356 ; CHECK-LABEL: splat_v16f16:
357 ; CHECK-DAG: mov [[RES:z[0-9]+]].h, h0
358 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].h, vl16
359 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
361 %insert = insertelement <16 x half> undef, half %a, i64 0
362 %splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer
363 store <16 x half> %splat, <16 x half>* %b
367 define void @splat_v32f16(half %a, <32 x half>* %b) #0 {
368 ; CHECK-LABEL: splat_v32f16:
369 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].h, h0
370 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32
371 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
372 ; VBITS_GE_512-NEXT: ret
374 ; Ensure sensible type legalisation.
375 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].h, h0
376 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
377 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
378 ; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x0]
379 ; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
380 ; VBITS_EQ_256-NEXT: ret
381 %insert = insertelement <32 x half> undef, half %a, i64 0
382 %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
383 store <32 x half> %splat, <32 x half>* %b
387 define void @splat_v64f16(half %a, <64 x half>* %b) #0 {
388 ; CHECK-LABEL: splat_v64f16:
389 ; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].h, h0
390 ; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].h, vl64
391 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
392 ; VBITS_GE_1024-NEXT: ret
393 %insert = insertelement <64 x half> undef, half %a, i64 0
394 %splat = shufflevector <64 x half> %insert, <64 x half> undef, <64 x i32> zeroinitializer
395 store <64 x half> %splat, <64 x half>* %b
399 define void @splat_v128f16(half %a, <128 x half>* %b) #0 {
400 ; CHECK-LABEL: splat_v128f16:
401 ; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].h, h0
402 ; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].h, vl128
403 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
404 ; VBITS_GE_2048-NEXT: ret
405 %insert = insertelement <128 x half> undef, half %a, i64 0
406 %splat = shufflevector <128 x half> %insert, <128 x half> undef, <128 x i32> zeroinitializer
407 store <128 x half> %splat, <128 x half>* %b
411 ; Don't use SVE for 64-bit vectors.
412 define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) #0 {
413 ; CHECK-LABEL: splat_v2f32:
414 ; CHECK: dup v0.2s, v0.s[0]
416 %insert = insertelement <2 x float> undef, float %a, i64 0
417 %splat = shufflevector <2 x float> %insert, <2 x float> undef, <2 x i32> zeroinitializer
418 ret <2 x float> %splat
421 ; Don't use SVE for 128-bit vectors.
422 define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) #0 {
423 ; CHECK-LABEL: splat_v4f32:
424 ; CHECK: dup v0.4s, v0.s[0]
426 %insert = insertelement <4 x float> undef, float %a, i64 0
427 %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
428 ret <4 x float> %splat
431 define void @splat_v8f32(float %a, <8 x float>* %b) #0 {
432 ; CHECK-LABEL: splat_v8f32:
433 ; CHECK-DAG: mov [[RES:z[0-9]+]].s, s0
434 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl8
435 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
437 %insert = insertelement <8 x float> undef, float %a, i64 0
438 %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer
439 store <8 x float> %splat, <8 x float>* %b
443 define void @splat_v16f32(float %a, <16 x float>* %b) #0 {
444 ; CHECK-LABEL: splat_v16f32:
445 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].s, s0
446 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16
447 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
448 ; VBITS_GE_512-NEXT: ret
450 ; Ensure sensible type legalisation.
451 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].s, s0
452 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
453 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
454 ; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x0]
455 ; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
456 ; VBITS_EQ_256-NEXT: ret
457 %insert = insertelement <16 x float> undef, float %a, i64 0
458 %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
459 store <16 x float> %splat, <16 x float>* %b
463 define void @splat_v32f32(float %a, <32 x float>* %b) #0 {
464 ; CHECK-LABEL: splat_v32f32:
465 ; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].s, s0
466 ; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].s, vl32
467 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
468 ; VBITS_GE_1024-NEXT: ret
469 %insert = insertelement <32 x float> undef, float %a, i64 0
470 %splat = shufflevector <32 x float> %insert, <32 x float> undef, <32 x i32> zeroinitializer
471 store <32 x float> %splat, <32 x float>* %b
475 define void @splat_v64f32(float %a, <64 x float>* %b) #0 {
476 ; CHECK-LABEL: splat_v64f32:
477 ; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].s, s0
478 ; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].s, vl64
479 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
480 ; VBITS_GE_2048-NEXT: ret
481 %insert = insertelement <64 x float> undef, float %a, i64 0
482 %splat = shufflevector <64 x float> %insert, <64 x float> undef, <64 x i32> zeroinitializer
483 store <64 x float> %splat, <64 x float>* %b
487 ; Don't use SVE for 64-bit vectors.
488 define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) #0 {
489 ; CHECK-LABEL: splat_v1f64:
492 %insert = insertelement <1 x double> undef, double %a, i64 0
493 %splat = shufflevector <1 x double> %insert, <1 x double> undef, <1 x i32> zeroinitializer
494 ret <1 x double> %splat
497 ; Don't use SVE for 128-bit vectors.
498 define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) #0 {
499 ; CHECK-LABEL: splat_v2f64:
500 ; CHECK: dup v0.2d, v0.d[0]
502 %insert = insertelement <2 x double> undef, double %a, i64 0
503 %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer
504 ret <2 x double> %splat
507 define void @splat_v4f64(double %a, <4 x double>* %b) #0 {
508 ; CHECK-LABEL: splat_v4f64:
509 ; CHECK-DAG: mov [[RES:z[0-9]+]].d, d0
510 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d, vl4
511 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
513 %insert = insertelement <4 x double> undef, double %a, i64 0
514 %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer
515 store <4 x double> %splat, <4 x double>* %b
519 define void @splat_v8f64(double %a, <8 x double>* %b) #0 {
520 ; CHECK-LABEL: splat_v8f64:
521 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].d, d0
522 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8
523 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
524 ; VBITS_GE_512-NEXT: ret
526 ; Ensure sensible type legalisation.
527 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].d, d0
528 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
529 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
530 ; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x0]
531 ; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
532 ; VBITS_EQ_256-NEXT: ret
533 %insert = insertelement <8 x double> undef, double %a, i64 0
534 %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
535 store <8 x double> %splat, <8 x double>* %b
539 define void @splat_v16f64(double %a, <16 x double>* %b) #0 {
540 ; CHECK-LABEL: splat_v16f64:
541 ; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].d, d0
542 ; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].d, vl16
543 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
544 ; VBITS_GE_1024-NEXT: ret
545 %insert = insertelement <16 x double> undef, double %a, i64 0
546 %splat = shufflevector <16 x double> %insert, <16 x double> undef, <16 x i32> zeroinitializer
547 store <16 x double> %splat, <16 x double>* %b
551 define void @splat_v32f64(double %a, <32 x double>* %b) #0 {
552 ; CHECK-LABEL: splat_v32f64:
553 ; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].d, d0
554 ; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].d, vl32
555 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
556 ; VBITS_GE_2048-NEXT: ret
557 %insert = insertelement <32 x double> undef, double %a, i64 0
558 %splat = shufflevector <32 x double> %insert, <32 x double> undef, <32 x i32> zeroinitializer
559 store <32 x double> %splat, <32 x double>* %b
564 ; DUP (integer immediate)
567 define void @splat_imm_v64i8(<64 x i8>* %a) #0 {
568 ; CHECK-LABEL: splat_imm_v64i8:
569 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].b, #1
570 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].b, vl64
571 ; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
572 ; VBITS_GE_512-NEXT: ret
573 %insert = insertelement <64 x i8> undef, i8 1, i64 0
574 %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
575 store <64 x i8> %splat, <64 x i8>* %a
579 define void @splat_imm_v32i16(<32 x i16>* %a) #0 {
580 ; CHECK-LABEL: splat_imm_v32i16:
581 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].h, #2
582 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32
583 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
584 ; VBITS_GE_512-NEXT: ret
585 %insert = insertelement <32 x i16> undef, i16 2, i64 0
586 %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
587 store <32 x i16> %splat, <32 x i16>* %a
591 define void @splat_imm_v16i32(<16 x i32>* %a) #0 {
592 ; CHECK-LABEL: splat_imm_v16i32:
593 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].s, #3
594 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16
595 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
596 ; VBITS_GE_512-NEXT: ret
597 %insert = insertelement <16 x i32> undef, i32 3, i64 0
598 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
599 store <16 x i32> %splat, <16 x i32>* %a
603 define void @splat_imm_v8i64(<8 x i64>* %a) #0 {
604 ; CHECK-LABEL: splat_imm_v8i64:
605 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].d, #4
606 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8
607 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
608 ; VBITS_GE_512-NEXT: ret
609 %insert = insertelement <8 x i64> undef, i64 4, i64 0
610 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
611 store <8 x i64> %splat, <8 x i64>* %a
616 ; DUP (floating-point immediate)
619 define void @splat_imm_v32f16(<32 x half>* %a) #0 {
620 ; CHECK-LABEL: splat_imm_v32f16:
621 ; VBITS_GE_512-DAG: fmov [[RES:z[0-9]+]].h, #5.00000000
622 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32
623 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
624 ; VBITS_GE_512-NEXT: ret
625 %insert = insertelement <32 x half> undef, half 5.0, i64 0
626 %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
627 store <32 x half> %splat, <32 x half>* %a
631 define void @splat_imm_v16f32(<16 x float>* %a) #0 {
632 ; CHECK-LABEL: splat_imm_v16f32:
633 ; VBITS_GE_512-DAG: fmov [[RES:z[0-9]+]].s, #6.00000000
634 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16
635 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
636 ; VBITS_GE_512-NEXT: ret
637 %insert = insertelement <16 x float> undef, float 6.0, i64 0
638 %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
639 store <16 x float> %splat, <16 x float>* %a
643 define void @splat_imm_v8f64(<8 x double>* %a) #0 {
644 ; CHECK-LABEL: splat_imm_v8f64:
645 ; VBITS_GE_512-DAG: fmov [[RES:z[0-9]+]].d, #7.00000000
646 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8
647 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
648 ; VBITS_GE_512-NEXT: ret
649 %insert = insertelement <8 x double> undef, double 7.0, i64 0
650 %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
651 store <8 x double> %splat, <8 x double>* %a
654 attributes #0 = { "target-features"="+sve" }