1 ; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -D#VBYTES=16 -check-prefix=NO_SVE
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
4 ; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512
5 ; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512
6 ; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512
7 ; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512
8 ; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
9 ; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
10 ; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
11 ; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
12 ; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
13 ; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
14 ; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
15 ; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024
16 ; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
18 target triple = "aarch64-unknown-linux-gnu"
20 ; Don't use SVE when its registers are no bigger than NEON.
27 ; Don't use SVE for 64-bit vectors.
28 define <4 x half> @insertelement_v4f16(<4 x half> %op1) #0 {
29 ; CHECK-LABEL: insertelement_v4f16:
30 ; CHECK: fmov h1, #5.00000000
31 ; CHECK-NEXT: mov v0.h[3], v1.h[0]
33 %r = insertelement <4 x half> %op1, half 5.0, i64 3
37 ; Don't use SVE for 128-bit vectors.
38 define <8 x half> @insertelement_v8f16(<8 x half> %op1) #0 {
39 ; CHECK-LABEL: insertelement_v8f16:
40 ; CHECK: fmov h1, #5.00000000
41 ; CHECK-NEXT: mov v0.h[7], v1.h[0]
43 %r = insertelement <8 x half> %op1, half 5.0, i64 7
47 define <16 x half> @insertelement_v16f16(<16 x half>* %a) #0 {
48 ; CHECK-LABEL: insertelement_v16f16:
49 ; VBITS_GE_256: ptrue p0.h, vl16
50 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
51 ; VBITS_GE_256-NEXT: mov w9, #15
52 ; VBITS_GE_256-NEXT: mov z1.h, w9
53 ; VBITS_GE_256-NEXT: index z2.h, #0, #1
54 ; VBITS_GE_256-NEXT: ptrue p1.h
55 ; VBITS_GE_256-NEXT: cmpeq p1.h, p1/z, z2.h, z1.h
56 ; VBITS_GE_256-NEXT: fmov h1, #5.00000000
57 ; VBITS_GE_256-NEXT: mov z0.h, p1/m, h1
58 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x8]
59 ; VBITS_GE_256-NEXT: ret
60 %op1 = load <16 x half>, <16 x half>* %a
61 %r = insertelement <16 x half> %op1, half 5.0, i64 15
65 define <32 x half> @insertelement_v32f16(<32 x half>* %a) #0 {
66 ; CHECK-LABEL: insertelement_v32f16:
67 ; VBITS_GE_512: ptrue p0.h, vl32
68 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
69 ; VBITS_GE_512-NEXT: mov w9, #31
70 ; VBITS_GE_512-NEXT: mov z1.h, w9
71 ; VBITS_GE_512-NEXT: index z2.h, #0, #1
72 ; VBITS_GE_512-NEXT: ptrue p1.h
73 ; VBITS_GE_512-NEXT: cmpeq p1.h, p1/z, z2.h, z1.h
74 ; VBITS_GE_512-NEXT: fmov h1, #5.00000000
75 ; VBITS_GE_512-NEXT: mov z0.h, p1/m, h1
76 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x8]
77 ; VBITS_GE_512-NEXT: ret
78 %op1 = load <32 x half>, <32 x half>* %a
79 %r = insertelement <32 x half> %op1, half 5.0, i64 31
83 define <64 x half> @insertelement_v64f16(<64 x half>* %a) #0 {
84 ; CHECK-LABEL: insertelement_v64f16:
85 ; VBITS_GE_1024: ptrue p0.h, vl64
86 ; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0]
87 ; VBITS_GE_1024-NEXT: mov w9, #63
88 ; VBITS_GE_1024-NEXT: mov z1.h, w9
89 ; VBITS_GE_1024-NEXT: index z2.h, #0, #1
90 ; VBITS_GE_1024-NEXT: ptrue p1.h
91 ; VBITS_GE_1024-NEXT: cmpeq p1.h, p1/z, z2.h, z1.h
92 ; VBITS_GE_1024-NEXT: fmov h1, #5.00000000
93 ; VBITS_GE_1024-NEXT: mov z0.h, p1/m, h1
94 ; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x8]
95 ; VBITS_GE_1024-NEXT: ret
96 %op1 = load <64 x half>, <64 x half>* %a
97 %r = insertelement <64 x half> %op1, half 5.0, i64 63
101 define <128 x half> @insertelement_v128f16(<128 x half>* %a) #0 {
102 ; CHECK-LABEL: insertelement_v128f16:
103 ; VBITS_GE_2048: ptrue p0.h, vl128
104 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0]
105 ; VBITS_GE_2048-NEXT: mov w9, #127
106 ; VBITS_GE_2048-NEXT: mov z1.h, w9
107 ; VBITS_GE_2048-NEXT: index z2.h, #0, #1
108 ; VBITS_GE_2048-NEXT: ptrue p1.h
109 ; VBITS_GE_2048-NEXT: cmpeq p1.h, p1/z, z2.h, z1.h
110 ; VBITS_GE_2048-NEXT: fmov h1, #5.00000000
111 ; VBITS_GE_2048-NEXT: mov z0.h, p1/m, h1
112 ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x8]
113 ; VBITS_GE_2048-NEXT: ret
114 %op1 = load <128 x half>, <128 x half>* %a
115 %r = insertelement <128 x half> %op1, half 5.0, i64 127
119 ; Don't use SVE for 64-bit vectors.
120 define <2 x float> @insertelement_v2f32(<2 x float> %op1) #0 {
121 ; CHECK-LABEL: insertelement_v2f32:
122 ; CHECK: fmov s1, #5.00000000
123 ; CHECK-NEXT: mov v0.s[1], v1.s[0]
125 %r = insertelement <2 x float> %op1, float 5.0, i64 1
129 ; Don't use SVE for 128-bit vectors.
130 define <4 x float> @insertelement_v4f32(<4 x float> %op1) #0 {
131 ; CHECK-LABEL: insertelement_v4f32:
132 ; CHECK: fmov s1, #5.00000000
133 ; CHECK-NEXT: mov v0.s[3], v1.s[0]
135 %r = insertelement <4 x float> %op1, float 5.0, i64 3
139 define <8 x float> @insertelement_v8f32(<8 x float>* %a) #0 {
140 ; CHECK-LABEL: insertelement_v8f32:
141 ; VBITS_GE_256: ptrue p0.s, vl8
142 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
143 ; VBITS_GE_256-NEXT: mov w9, #7
144 ; VBITS_GE_256-NEXT: mov z1.s, w9
145 ; VBITS_GE_256-NEXT: index z2.s, #0, #1
146 ; VBITS_GE_256-NEXT: ptrue p1.s
147 ; VBITS_GE_256-NEXT: cmpeq p1.s, p1/z, z2.s, z1.s
148 ; VBITS_GE_256-NEXT: fmov s1, #5.00000000
149 ; VBITS_GE_256-NEXT: mov z0.s, p1/m, s1
150 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8]
151 ; VBITS_GE_256-NEXT: ret
152 %op1 = load <8 x float>, <8 x float>* %a
153 %r = insertelement <8 x float> %op1, float 5.0, i64 7
157 define <16 x float> @insertelement_v16f32(<16 x float>* %a) #0 {
158 ; CHECK-LABEL: insertelement_v16f32:
159 ; VBITS_GE_512: ptrue p0.s, vl16
160 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
161 ; VBITS_GE_512-NEXT: mov w9, #15
162 ; VBITS_GE_512-NEXT: mov z1.s, w9
163 ; VBITS_GE_512-NEXT: index z2.s, #0, #1
164 ; VBITS_GE_512-NEXT: ptrue p1.s
165 ; VBITS_GE_512-NEXT: cmpeq p1.s, p1/z, z2.s, z1.s
166 ; VBITS_GE_512-NEXT: fmov s1, #5.00000000
167 ; VBITS_GE_512-NEXT: mov z0.s, p1/m, s1
168 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
169 ; VBITS_GE_512-NEXT: ret
170 %op1 = load <16 x float>, <16 x float>* %a
171 %r = insertelement <16 x float> %op1, float 5.0, i64 15
175 define <32 x float> @insertelement_v32f32(<32 x float>* %a) #0 {
176 ; CHECK-LABEL: insertelement_v32f32:
177 ; VBITS_GE_1024: ptrue p0.s, vl32
178 ; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
179 ; VBITS_GE_1024-NEXT: mov w9, #31
180 ; VBITS_GE_1024-NEXT: mov z1.s, w9
181 ; VBITS_GE_1024-NEXT: index z2.s, #0, #1
182 ; VBITS_GE_1024-NEXT: ptrue p1.s
183 ; VBITS_GE_1024-NEXT: cmpeq p1.s, p1/z, z2.s, z1.s
184 ; VBITS_GE_1024-NEXT: fmov s1, #5.00000000
185 ; VBITS_GE_1024-NEXT: mov z0.s, p1/m, s1
186 ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8]
187 ; VBITS_GE_1024-NEXT: ret
188 %op1 = load <32 x float>, <32 x float>* %a
189 %r = insertelement <32 x float> %op1, float 5.0, i64 31
193 define <64 x float> @insertelement_v64f32(<64 x float>* %a) #0 {
194 ; CHECK-LABEL: insertelement_v64f32:
195 ; VBITS_GE_2048: ptrue p0.s, vl64
196 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
197 ; VBITS_GE_2048-NEXT: mov w9, #63
198 ; VBITS_GE_2048-NEXT: mov z1.s, w9
199 ; VBITS_GE_2048-NEXT: index z2.s, #0, #1
200 ; VBITS_GE_2048-NEXT: ptrue p1.s
201 ; VBITS_GE_2048-NEXT: cmpeq p1.s, p1/z, z2.s, z1.s
202 ; VBITS_GE_2048-NEXT: fmov s1, #5.00000000
203 ; VBITS_GE_2048-NEXT: mov z0.s, p1/m, s1
204 ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
205 ; VBITS_GE_2048-NEXT: ret
206 %op1 = load <64 x float>, <64 x float>* %a
207 %r = insertelement <64 x float> %op1, float 5.0, i64 63
211 ; Don't use SVE for 64-bit vectors.
212 define <1 x double> @insertelement_v1f64(<1 x double> %op1) #0 {
213 ; CHECK-LABEL: insertelement_v1f64:
214 ; CHECK: fmov d0, #5.00000000
216 %r = insertelement <1 x double> %op1, double 5.0, i64 0
220 ; Don't use SVE for 128-bit vectors.
221 define <2 x double> @insertelement_v2f64(<2 x double> %op1) #0 {
222 ; CHECK-LABEL: insertelement_v2f64:
223 ; CHECK: fmov d1, #5.00000000
224 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
226 %r = insertelement <2 x double> %op1, double 5.0, i64 1
230 define <4 x double> @insertelement_v4f64(<4 x double>* %a) #0 {
231 ; CHECK-LABEL: insertelement_v4f64:
232 ; VBITS_GE_256: ptrue p0.d, vl4
233 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0]
234 ; VBITS_GE_256-NEXT: mov w9, #3
235 ; VBITS_GE_256-NEXT: mov z1.d, x9
236 ; VBITS_GE_256-NEXT: index z2.d, #0, #1
237 ; VBITS_GE_256-NEXT: ptrue p1.d
238 ; VBITS_GE_256-NEXT: cmpeq p1.d, p1/z, z2.d, z1.d
239 ; VBITS_GE_256-NEXT: fmov d1, #5.00000000
240 ; VBITS_GE_256-NEXT: mov z0.d, p1/m, d1
241 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8]
242 ; VBITS_GE_256-NEXT: ret
243 %op1 = load <4 x double>, <4 x double>* %a
244 %r = insertelement <4 x double> %op1, double 5.0, i64 3
248 define <8 x double> @insertelement_v8f64(<8 x double>* %a) #0 {
249 ; CHECK-LABEL: insertelement_v8f64:
250 ; VBITS_GE_512: ptrue p0.d, vl8
251 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
252 ; VBITS_GE_512-NEXT: mov w9, #7
253 ; VBITS_GE_512-NEXT: mov z1.d, x9
254 ; VBITS_GE_512-NEXT: index z2.d, #0, #1
255 ; VBITS_GE_512-NEXT: ptrue p1.d
256 ; VBITS_GE_512-NEXT: cmpeq p1.d, p1/z, z2.d, z1.d
257 ; VBITS_GE_512-NEXT: fmov d1, #5.00000000
258 ; VBITS_GE_512-NEXT: mov z0.d, p1/m, d1
259 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8]
260 ; VBITS_GE_512-NEXT: ret
261 %op1 = load <8 x double>, <8 x double>* %a
262 %r = insertelement <8 x double> %op1, double 5.0, i64 7
266 define <16 x double> @insertelement_v16f64(<16 x double>* %a) #0 {
267 ; CHECK-LABEL: insertelement_v16f64:
268 ; VBITS_GE_1024: ptrue p0.d, vl16
269 ; VBITS_GE_1024-NEXT: ld1d { z0.d }, p0/z, [x0]
270 ; VBITS_GE_1024-NEXT: mov w9, #15
271 ; VBITS_GE_1024-NEXT: mov z1.d, x9
272 ; VBITS_GE_1024-NEXT: index z2.d, #0, #1
273 ; VBITS_GE_1024-NEXT: ptrue p1.d
274 ; VBITS_GE_1024-NEXT: cmpeq p1.d, p1/z, z2.d, z1.d
275 ; VBITS_GE_1024-NEXT: fmov d1, #5.00000000
276 ; VBITS_GE_1024-NEXT: mov z0.d, p1/m, d1
277 ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8]
278 ; VBITS_GE_1024-NEXT: ret
279 %op1 = load <16 x double>, <16 x double>* %a
280 %r = insertelement <16 x double> %op1, double 5.0, i64 15
284 define <32 x double> @insertelement_v32f64(<32 x double>* %a) #0 {
285 ; CHECK-LABEL: insertelement_v32f64:
286 ; VBITS_GE_2048: ptrue p0.d, vl32
287 ; VBITS_GE_2048-NEXT: ld1d { z0.d }, p0/z, [x0]
288 ; VBITS_GE_2048-NEXT: mov w9, #31
289 ; VBITS_GE_2048-NEXT: mov z1.d, x9
290 ; VBITS_GE_2048-NEXT: index z2.d, #0, #1
291 ; VBITS_GE_2048-NEXT: ptrue p1.d
292 ; VBITS_GE_2048-NEXT: cmpeq p1.d, p1/z, z2.d, z1.d
293 ; VBITS_GE_2048-NEXT: fmov d1, #5.00000000
294 ; VBITS_GE_2048-NEXT: mov z0.d, p1/m, d1
295 ; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x8]
296 ; VBITS_GE_2048-NEXT: ret
297 %op1 = load <32 x double>, <32 x double>* %a
298 %r = insertelement <32 x double> %op1, double 5.0, i64 31
302 attributes #0 = { "target-features"="+sve" }