1 ; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -D#VBYTES=16 -check-prefix=NO_SVE
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK
3 ; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK
4 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
5 ; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
6 ; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
7 ; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
8 ; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
9 ; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
10 ; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
11 ; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
12 ; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
13 ; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
14 ; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
15 ; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512
16 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK,VBITS_GE_2048,VBITS_GE_1024,VBITS_GE_512
18 target triple = "aarch64-unknown-linux-gnu"
20 ; Don't use SVE when its registers are no bigger than NEON.
26 define void @masked_store_v2f16(<2 x half>* %ap, <2 x half>* %bp) #0 {
27 ; CHECK-LABEL: masked_store_v2f16:
29 ; CHECK-NEXT: ldr s1, [x1]
30 ; CHECK-NEXT: movi [[D0:d[0-9]+]], #0000000000000000
31 ; CHECK-NEXT: ptrue p[[P0:[0-9]+]].h, vl4
32 ; CHECK-NEXT: fcmeq v[[P1:[0-9]+]].4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
33 ; CHECK-NEXT: umov [[W0:w[0-9]+]], v[[P1]].h[0]
34 ; CHECK-NEXT: umov [[W1:w[0-9]+]], v[[P1]].h[1]
35 ; CHECK-NEXT: fmov s[[V0:[0-9]+]], [[W0]]
36 ; CHECK-NEXT: mov v[[V0]].s[1], [[W1]]
37 ; CHECK-NEXT: shl v[[V0]].2s, v[[V0]].2s, #16
38 ; CHECK-NEXT: sshr v[[V0]].2s, v[[V0]].2s, #16
39 ; CHECK-NEXT: fmov [[W1]], s[[V0]]
40 ; CHECK-NEXT: mov [[W0]], v[[V0]].s[1]
41 ; CHECK-NEXT: mov [[V1:v[0-9]+]].h[0], [[W1]]
42 ; CHECK-NEXT: mov [[V1]].h[1], [[W0]]
43 ; CHECK-NEXT: shl v[[V0]].4h, [[V1]].4h, #15
44 ; CHECK-NEXT: sshr v[[V0]].4h, v[[V0]].4h, #15
45 ; CHECK-NEXT: cmpne p[[P2:[0-9]+]].h, p[[P0]]/z, z[[P1]].h, #0
46 ; CHECK-NEXT: st1h { z0.h }, p[[P2]], [x{{[0-9]+}}]
48 %a = load <2 x half>, <2 x half>* %ap
49 %b = load <2 x half>, <2 x half>* %bp
50 %mask = fcmp oeq <2 x half> %a, %b
51 call void @llvm.masked.store.v2f16(<2 x half> %a, <2 x half>* %bp, i32 8, <2 x i1> %mask)
56 define void @masked_store_v2f32(<2 x float>* %ap, <2 x float>* %bp) #0 {
57 ; CHECK-LABEL: masked_store_v2f32:
59 ; CHECK-NEXT: ldr d1, [x1]
60 ; CHECK-NEXT: ptrue p[[P0:[0-9]+]].s, vl2
61 ; CHECK-NEXT: fcmeq v[[P1:[0-9]+]].2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
62 ; CHECK-NEXT: cmpne p[[P2:[0-9]+]].s, p[[P0]]/z, z[[P1]].s, #0
63 ; CHECK-NEXT: st1w { z0.s }, p[[P2]], [x{{[0-9]+}}]
65 %a = load <2 x float>, <2 x float>* %ap
66 %b = load <2 x float>, <2 x float>* %bp
67 %mask = fcmp oeq <2 x float> %a, %b
68 call void @llvm.masked.store.v2f32(<2 x float> %a, <2 x float>* %bp, i32 8, <2 x i1> %mask)
72 define void @masked_store_v4f32(<4 x float>* %ap, <4 x float>* %bp) #0 {
73 ; CHECK-LABEL: masked_store_v4f32:
75 ; CHECK-NEXT: ldr q1, [x1]
76 ; CHECK-NEXT: ptrue p[[P0:[0-9]+]].s, vl4
77 ; CHECK-NEXT: fcmeq v[[P1:[0-9]+]].4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
78 ; CHECK-NEXT: cmpne p[[P2:[0-9]+]].s, p[[P0]]/z, z[[P1]].s, #0
79 ; CHECK-NEXT: st1w { z0.s }, p[[P2]], [x{{[0-9]+}}]
81 %a = load <4 x float>, <4 x float>* %ap
82 %b = load <4 x float>, <4 x float>* %bp
83 %mask = fcmp oeq <4 x float> %a, %b
84 call void @llvm.masked.store.v4f32(<4 x float> %a, <4 x float>* %bp, i32 8, <4 x i1> %mask)
88 define void @masked_store_v8f32(<8 x float>* %ap, <8 x float>* %bp) #0 {
89 ; CHECK-LABEL: masked_store_v8f32:
90 ; CHECK: ptrue [[PG0:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]]
91 ; CHECK-NEXT: ld1w { [[Z0:z[0-9]+]].s }, [[PG0]]/z, [x0]
92 ; CHECK-NEXT: ld1w { [[Z1:z[0-9]+]].s }, [[PG0]]/z, [x1]
93 ; CHECK-NEXT: fcmeq [[PG1:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, [[Z1]].s
94 ; CHECK-NEXT: st1w { z0.s }, [[PG1]], [x{{[0-9]+}}]
96 %a = load <8 x float>, <8 x float>* %ap
97 %b = load <8 x float>, <8 x float>* %bp
98 %mask = fcmp oeq <8 x float> %a, %b
99 call void @llvm.masked.store.v8f32(<8 x float> %a, <8 x float>* %bp, i32 8, <8 x i1> %mask)
103 define void @masked_store_v16f32(<16 x float>* %ap, <16 x float>* %bp) #0 {
104 ; CHECK-LABEL: masked_store_v16f32:
105 ; VBITS_GE_512: ptrue p[[P0:[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]]
106 ; VBITS_GE_512-NEXT: ld1w { [[Z0:z[0-9]+]].s }, [[PG0]]/z, [x0]
107 ; VBITS_GE_512-NEXT: ld1w { [[Z1:z[0-9]+]].s }, [[PG0]]/z, [x1]
108 ; VBITS_GE_512-NEXT: fcmeq [[PG1:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, [[Z1]].s
109 ; VBITS_GE_512-NEXT: st1w { z0.s }, [[PG1]], [x{{[0-9]+}}]
110 ; VBITS_GE_512-NEXT: ret
111 %a = load <16 x float>, <16 x float>* %ap
112 %b = load <16 x float>, <16 x float>* %bp
113 %mask = fcmp oeq <16 x float> %a, %b
114 call void @llvm.masked.store.v16f32(<16 x float> %a, <16 x float>* %ap, i32 8, <16 x i1> %mask)
118 define void @masked_store_v32f32(<32 x float>* %ap, <32 x float>* %bp) #0 {
119 ; CHECK-LABEL: masked_store_v32f32:
120 ; VBITS_GE_1024: ptrue p[[P0:[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]]
121 ; VBITS_GE_1024-NEXT: ld1w { [[Z0:z[0-9]+]].s }, [[PG0]]/z, [x0]
122 ; VBITS_GE_1024-NEXT: ld1w { [[Z1:z[0-9]+]].s }, [[PG0]]/z, [x1]
123 ; VBITS_GE_1024-NEXT: fcmeq [[PG1:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, [[Z1]].s
124 ; VBITS_GE_1024-NEXT: st1w { z0.s }, [[PG1]], [x{{[0-9]+}}]
125 ; VBITS_GE_1024-NEXT: ret
126 %a = load <32 x float>, <32 x float>* %ap
127 %b = load <32 x float>, <32 x float>* %bp
128 %mask = fcmp oeq <32 x float> %a, %b
129 call void @llvm.masked.store.v32f32(<32 x float> %a, <32 x float>* %ap, i32 8, <32 x i1> %mask)
133 define void @masked_store_v64f32(<64 x float>* %ap, <64 x float>* %bp) #0 {
134 ; CHECK-LABEL: masked_store_v64f32:
135 ; VBITS_GE_2048: ptrue p[[P0:[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]]
136 ; VBITS_GE_2048-NEXT: ld1w { [[Z0:z[0-9]+]].s }, [[PG0]]/z, [x0]
137 ; VBITS_GE_2048-NEXT: ld1w { [[Z1:z[0-9]+]].s }, [[PG0]]/z, [x1]
138 ; VBITS_GE_2048-NEXT: fcmeq [[PG1:p[0-9]+]].s, [[PG0]]/z, [[Z0]].s, [[Z1]].s
139 ; VBITS_GE_2048-NEXT: st1w { z0.s }, [[PG1]], [x{{[0-9]+}}]
140 ; VBITS_GE_2048-NEXT: ret
141 %a = load <64 x float>, <64 x float>* %ap
142 %b = load <64 x float>, <64 x float>* %bp
143 %mask = fcmp oeq <64 x float> %a, %b
144 call void @llvm.masked.store.v64f32(<64 x float> %a, <64 x float>* %ap, i32 8, <64 x i1> %mask)
148 define void @masked_store_trunc_v8i64i8(<8 x i64>* %ap, <8 x i64>* %bp, <8 x i8>* %dest) #0 {
149 ; CHECK-LABEL: masked_store_trunc_v8i64i8:
150 ; VBITS_GE_512: ptrue p[[P0:[0-9]+]].d, vl8
151 ; VBITS_GE_512-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0]
152 ; VBITS_GE_512-NEXT: ld1d { [[Z1:z[0-9]+]].d }, p0/z, [x1]
153 ; VBITS_GE_512-NEXT: cmpeq p[[P1:[0-9]+]].d, p[[P0]]/z, [[Z0]].d, [[Z1]].d
154 ; VBITS_GE_512-DAG: uzp1 [[Z1]].s, [[Z1]].s, [[Z1]].s
155 ; VBITS_GE_512-DAG: uzp1 [[Z1]].h, [[Z1]].h, [[Z1]].h
156 ; VBITS_GE_512-DAG: uzp1 [[Z1]].b, [[Z1]].b, [[Z1]].b
157 ; VBITS_GE_512-DAG: cmpne p[[P2:[0-9]+]].b, p{{[0-9]+}}/z, [[Z1]].b, #0
158 ; VBITS_GE_512-DAG: uzp1 [[Z0]].s, [[Z0]].s, [[Z0]].s
159 ; VBITS_GE_512-DAG: uzp1 [[Z0]].h, [[Z0]].h, [[Z0]].h
160 ; VBITS_GE_512-DAG: uzp1 [[Z0]].b, [[Z0]].b, [[Z0]].b
161 ; VBITS_GE_512-NEXT: st1b { [[Z0]].b }, p[[P2]], [x{{[0-9]+}}]
162 ; VBITS_GE_512-NEXT: ret
163 %a = load <8 x i64>, <8 x i64>* %ap
164 %b = load <8 x i64>, <8 x i64>* %bp
165 %mask = icmp eq <8 x i64> %a, %b
166 %val = trunc <8 x i64> %a to <8 x i8>
167 call void @llvm.masked.store.v8i8(<8 x i8> %val, <8 x i8>* %dest, i32 8, <8 x i1> %mask)
171 define void @masked_store_trunc_v8i64i16(<8 x i64>* %ap, <8 x i64>* %bp, <8 x i16>* %dest) #0 {
172 ; CHECK-LABEL: masked_store_trunc_v8i64i16:
173 ; VBITS_GE_512: ptrue p[[P0:[0-9]+]].d, vl8
174 ; VBITS_GE_512-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0]
175 ; VBITS_GE_512-NEXT: ld1d { [[Z1:z[0-9]+]].d }, p0/z, [x1]
176 ; VBITS_GE_512-DAG: ptrue p{{[0-9]+}}.h, vl8
177 ; VBITS_GE_512-DAG: cmpeq p[[P1:[0-9]+]].d, p[[P0]]/z, [[Z0]].d, [[Z1]].d
178 ; VBITS_GE_512-NEXT: mov [[Z1]].d, p[[P0]]/z, #-1
179 ; VBITS_GE_512-DAG: uzp1 [[Z1]].s, [[Z1]].s, [[Z1]].s
180 ; VBITS_GE_512-DAG: uzp1 [[Z1]].h, [[Z1]].h, [[Z1]].h
181 ; VBITS_GE_512-DAG: cmpne p[[P2:[0-9]+]].h, p{{[0-9]+}}/z, [[Z1]].h, #0
182 ; VBITS_GE_512-DAG: uzp1 [[Z0]].s, [[Z0]].s, [[Z0]].s
183 ; VBITS_GE_512-DAG: uzp1 [[Z0]].h, [[Z0]].h, [[Z0]].h
184 ; VBITS_GE_512-NEXT: st1h { [[Z0]].h }, p[[P2]], [x{{[0-9]+}}]
185 ; VBITS_GE_512-NEXT: ret
186 %a = load <8 x i64>, <8 x i64>* %ap
187 %b = load <8 x i64>, <8 x i64>* %bp
188 %mask = icmp eq <8 x i64> %a, %b
189 %val = trunc <8 x i64> %a to <8 x i16>
190 call void @llvm.masked.store.v8i16(<8 x i16> %val, <8 x i16>* %dest, i32 8, <8 x i1> %mask)
194 define void @masked_store_trunc_v8i64i32(<8 x i64>* %ap, <8 x i64>* %bp, <8 x i32>* %dest) #0 {
195 ; CHECK-LABEL: masked_store_trunc_v8i64i32:
196 ; VBITS_GE_512: ptrue p[[P0:[0-9]+]].d, vl8
197 ; VBITS_GE_512-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0]
198 ; VBITS_GE_512-NEXT: ld1d { [[Z1:z[0-9]+]].d }, p0/z, [x1]
199 ; VBITS_GE_512-DAG: ptrue p{{[0-9]+}}.s, vl8
200 ; VBITS_GE_512-DAG: cmpeq p[[P1:[0-9]+]].d, p[[P0]]/z, [[Z0]].d, [[Z1]].d
201 ; VBITS_GE_512-NEXT: mov [[Z1]].d, p[[P0]]/z, #-1
202 ; VBITS_GE_512-DAG: uzp1 [[Z1]].s, [[Z1]].s, [[Z1]].s
203 ; VBITS_GE_512-DAG: cmpne p[[P2:[0-9]+]].s, p{{[0-9]+}}/z, [[Z1]].s, #0
204 ; VBITS_GE_512-DAG: uzp1 [[Z0]].s, [[Z0]].s, [[Z0]].s
205 ; VBITS_GE_512-NEXT: st1w { [[Z0]].s }, p[[P2]], [x{{[0-9]+}}]
206 ; VBITS_GE_512-NEXT: ret
207 %a = load <8 x i64>, <8 x i64>* %ap
208 %b = load <8 x i64>, <8 x i64>* %bp
209 %mask = icmp eq <8 x i64> %a, %b
210 %val = trunc <8 x i64> %a to <8 x i32>
211 call void @llvm.masked.store.v8i32(<8 x i32> %val, <8 x i32>* %dest, i32 8, <8 x i1> %mask)
215 define void @masked_store_trunc_v16i32i8(<16 x i32>* %ap, <16 x i32>* %bp, <16 x i8>* %dest) #0 {
216 ; CHECK-LABEL: masked_store_trunc_v16i32i8:
217 ; VBITS_GE_512: ptrue p[[P0:[0-9]+]].s, vl16
218 ; VBITS_GE_512-NEXT: ld1w { [[Z0:z[0-9]+]].s }, p0/z, [x0]
219 ; VBITS_GE_512-NEXT: ld1w { [[Z1:z[0-9]+]].s }, p0/z, [x1]
220 ; VBITS_GE_512-DAG: ptrue p{{[0-9]+}}.b, vl16
221 ; VBITS_GE_512-DAG: cmpeq p[[P1:[0-9]+]].s, p[[P0]]/z, [[Z0]].s, [[Z1]].s
222 ; VBITS_GE_512-NEXT: mov [[Z1]].s, p[[P0]]/z, #-1
223 ; VBITS_GE_512-DAG: uzp1 [[Z1]].h, [[Z1]].h, [[Z1]].h
224 ; VBITS_GE_512-DAG: uzp1 [[Z1]].b, [[Z1]].b, [[Z1]].b
225 ; VBITS_GE_512-DAG: cmpne p[[P2:[0-9]+]].b, p{{[0-9]+}}/z, [[Z1]].b, #0
226 ; VBITS_GE_512-DAG: uzp1 [[Z0]].h, [[Z0]].h, [[Z0]].h
227 ; VBITS_GE_512-DAG: uzp1 [[Z0]].b, [[Z0]].b, [[Z0]].b
228 ; VBITS_GE_512-NEXT: st1b { [[Z0]].b }, p[[P2]], [x{{[0-9]+}}]
229 ; VBITS_GE_512-NEXT: ret
230 %a = load <16 x i32>, <16 x i32>* %ap
231 %b = load <16 x i32>, <16 x i32>* %bp
232 %mask = icmp eq <16 x i32> %a, %b
233 %val = trunc <16 x i32> %a to <16 x i8>
234 call void @llvm.masked.store.v16i8(<16 x i8> %val, <16 x i8>* %dest, i32 8, <16 x i1> %mask)
238 define void @masked_store_trunc_v16i32i16(<16 x i32>* %ap, <16 x i32>* %bp, <16 x i16>* %dest) #0 {
239 ; CHECK-LABEL: masked_store_trunc_v16i32i16:
240 ; VBITS_GE_512: ptrue p[[P0:[0-9]+]].s, vl16
241 ; VBITS_GE_512-NEXT: ld1w { [[Z0:z[0-9]+]].s }, p0/z, [x0]
242 ; VBITS_GE_512-NEXT: ld1w { [[Z1:z[0-9]+]].s }, p0/z, [x1]
243 ; VBITS_GE_512-DAG: ptrue p{{[0-9]+}}.h, vl16
244 ; VBITS_GE_512-DAG: cmpeq p[[P1:[0-9]+]].s, p[[P0]]/z, [[Z0]].s, [[Z1]].s
245 ; VBITS_GE_512-NEXT: mov [[Z1]].s, p[[P0]]/z, #-1
246 ; VBITS_GE_512-DAG: uzp1 [[Z1]].h, [[Z1]].h, [[Z1]].h
247 ; VBITS_GE_512-DAG: cmpne p[[P2:[0-9]+]].h, p{{[0-9]+}}/z, [[Z1]].h, #0
248 ; VBITS_GE_512-DAG: uzp1 [[Z0]].h, [[Z0]].h, [[Z0]].h
249 ; VBITS_GE_512-NEXT: st1h { [[Z0]].h }, p[[P2]], [x{{[0-9]+}}]
250 ; VBITS_GE_512-NEXT: ret
251 %a = load <16 x i32>, <16 x i32>* %ap
252 %b = load <16 x i32>, <16 x i32>* %bp
253 %mask = icmp eq <16 x i32> %a, %b
254 %val = trunc <16 x i32> %a to <16 x i16>
255 call void @llvm.masked.store.v16i16(<16 x i16> %val, <16 x i16>* %dest, i32 8, <16 x i1> %mask)
259 define void @masked_store_trunc_v32i16i8(<32 x i16>* %ap, <32 x i16>* %bp, <32 x i8>* %dest) #0 {
260 ; CHECK-LABEL: masked_store_trunc_v32i16i8:
261 ; VBITS_GE_512: ptrue p[[P0:[0-9]+]].h, vl32
262 ; VBITS_GE_512-NEXT: ld1h { [[Z0:z[0-9]+]].h }, p0/z, [x0]
263 ; VBITS_GE_512-NEXT: ld1h { [[Z1:z[0-9]+]].h }, p0/z, [x1]
264 ; VBITS_GE_512-DAG: ptrue p{{[0-9]+}}.b, vl32
265 ; VBITS_GE_512-DAG: cmpeq p[[P1:[0-9]+]].h, p[[P0]]/z, [[Z0]].h, [[Z1]].h
266 ; VBITS_GE_512-NEXT: mov [[Z1]].h, p[[P0]]/z, #-1
267 ; VBITS_GE_512-DAG: uzp1 [[Z1]].b, [[Z1]].b, [[Z1]].b
268 ; VBITS_GE_512-DAG: cmpne p[[P2:[0-9]+]].b, p{{[0-9]+}}/z, [[Z1]].b, #0
269 ; VBITS_GE_512-DAG: uzp1 [[Z0]].b, [[Z0]].b, [[Z0]].b
270 ; VBITS_GE_512-NEXT: st1b { [[Z0]].b }, p[[P2]], [x{{[0-9]+}}]
271 ; VBITS_GE_512-NEXT: ret
272 %a = load <32 x i16>, <32 x i16>* %ap
273 %b = load <32 x i16>, <32 x i16>* %bp
274 %mask = icmp eq <32 x i16> %a, %b
275 %val = trunc <32 x i16> %a to <32 x i8>
276 call void @llvm.masked.store.v32i8(<32 x i8> %val, <32 x i8>* %dest, i32 8, <32 x i1> %mask)
280 declare void @llvm.masked.store.v2f16(<2 x half>, <2 x half>*, i32, <2 x i1>)
281 declare void @llvm.masked.store.v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
282 declare void @llvm.masked.store.v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>)
283 declare void @llvm.masked.store.v8f32(<8 x float>, <8 x float>*, i32, <8 x i1>)
284 declare void @llvm.masked.store.v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
285 declare void @llvm.masked.store.v32f32(<32 x float>, <32 x float>*, i32, <32 x i1>)
286 declare void @llvm.masked.store.v64f32(<64 x float>, <64 x float>*, i32, <64 x i1>)
288 declare void @llvm.masked.store.v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>)
289 declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
290 declare void @llvm.masked.store.v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
291 declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
292 declare void @llvm.masked.store.v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>)
293 declare void @llvm.masked.store.v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>)
295 attributes #0 = { "target-features"="+sve" }