1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 define void @masked_store_v1i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
6 ; CHECK-LABEL: masked_store_v1i8:
8 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
9 ; CHECK-NEXT: vle8.v v8, (a2)
10 ; CHECK-NEXT: vle8.v v9, (a0)
11 ; CHECK-NEXT: vmseq.vi v0, v8, 0
12 ; CHECK-NEXT: vse8.v v9, (a1), v0.t
14 %m = load <1 x i8>, ptr %m_ptr
15 %mask = icmp eq <1 x i8> %m, zeroinitializer
16 %val = load <1 x i8>, ptr %val_ptr
17 call void @llvm.masked.store.v1i8.p0(<1 x i8> %val, ptr %a, i32 8, <1 x i1> %mask)
20 declare void @llvm.masked.store.v1i8.p0(<1 x i8>, ptr, i32, <1 x i1>)
22 define void @masked_store_v1i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
23 ; CHECK-LABEL: masked_store_v1i16:
25 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
26 ; CHECK-NEXT: vle16.v v8, (a2)
27 ; CHECK-NEXT: vle16.v v9, (a0)
28 ; CHECK-NEXT: vmseq.vi v0, v8, 0
29 ; CHECK-NEXT: vse16.v v9, (a1), v0.t
31 %m = load <1 x i16>, ptr %m_ptr
32 %mask = icmp eq <1 x i16> %m, zeroinitializer
33 %val = load <1 x i16>, ptr %val_ptr
34 call void @llvm.masked.store.v1i16.p0(<1 x i16> %val, ptr %a, i32 8, <1 x i1> %mask)
37 declare void @llvm.masked.store.v1i16.p0(<1 x i16>, ptr, i32, <1 x i1>)
39 define void @masked_store_v1i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
40 ; CHECK-LABEL: masked_store_v1i32:
42 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
43 ; CHECK-NEXT: vle32.v v8, (a2)
44 ; CHECK-NEXT: vle32.v v9, (a0)
45 ; CHECK-NEXT: vmseq.vi v0, v8, 0
46 ; CHECK-NEXT: vse32.v v9, (a1), v0.t
48 %m = load <1 x i32>, ptr %m_ptr
49 %mask = icmp eq <1 x i32> %m, zeroinitializer
50 %val = load <1 x i32>, ptr %val_ptr
51 call void @llvm.masked.store.v1i32.p0(<1 x i32> %val, ptr %a, i32 8, <1 x i1> %mask)
54 declare void @llvm.masked.store.v1i32.p0(<1 x i32>, ptr, i32, <1 x i1>)
56 define void @masked_store_v1i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
57 ; CHECK-LABEL: masked_store_v1i64:
59 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
60 ; CHECK-NEXT: vle64.v v8, (a2)
61 ; CHECK-NEXT: vle64.v v9, (a0)
62 ; CHECK-NEXT: vmseq.vi v0, v8, 0
63 ; CHECK-NEXT: vse64.v v9, (a1), v0.t
65 %m = load <1 x i64>, ptr %m_ptr
66 %mask = icmp eq <1 x i64> %m, zeroinitializer
67 %val = load <1 x i64>, ptr %val_ptr
68 call void @llvm.masked.store.v1i64.p0(<1 x i64> %val, ptr %a, i32 8, <1 x i1> %mask)
71 declare void @llvm.masked.store.v1i64.p0(<1 x i64>, ptr, i32, <1 x i1>)
73 define void @masked_store_v2i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
74 ; CHECK-LABEL: masked_store_v2i8:
76 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
77 ; CHECK-NEXT: vle8.v v8, (a2)
78 ; CHECK-NEXT: vle8.v v9, (a0)
79 ; CHECK-NEXT: vmseq.vi v0, v8, 0
80 ; CHECK-NEXT: vse8.v v9, (a1), v0.t
82 %m = load <2 x i8>, ptr %m_ptr
83 %mask = icmp eq <2 x i8> %m, zeroinitializer
84 %val = load <2 x i8>, ptr %val_ptr
85 call void @llvm.masked.store.v2i8.p0(<2 x i8> %val, ptr %a, i32 8, <2 x i1> %mask)
88 declare void @llvm.masked.store.v2i8.p0(<2 x i8>, ptr, i32, <2 x i1>)
90 define void @masked_store_v2i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
91 ; CHECK-LABEL: masked_store_v2i16:
93 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
94 ; CHECK-NEXT: vle16.v v8, (a2)
95 ; CHECK-NEXT: vle16.v v9, (a0)
96 ; CHECK-NEXT: vmseq.vi v0, v8, 0
97 ; CHECK-NEXT: vse16.v v9, (a1), v0.t
99 %m = load <2 x i16>, ptr %m_ptr
100 %mask = icmp eq <2 x i16> %m, zeroinitializer
101 %val = load <2 x i16>, ptr %val_ptr
102 call void @llvm.masked.store.v2i16.p0(<2 x i16> %val, ptr %a, i32 8, <2 x i1> %mask)
105 declare void @llvm.masked.store.v2i16.p0(<2 x i16>, ptr, i32, <2 x i1>)
107 define void @masked_store_v2i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
108 ; CHECK-LABEL: masked_store_v2i32:
110 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
111 ; CHECK-NEXT: vle32.v v8, (a2)
112 ; CHECK-NEXT: vle32.v v9, (a0)
113 ; CHECK-NEXT: vmseq.vi v0, v8, 0
114 ; CHECK-NEXT: vse32.v v9, (a1), v0.t
116 %m = load <2 x i32>, ptr %m_ptr
117 %mask = icmp eq <2 x i32> %m, zeroinitializer
118 %val = load <2 x i32>, ptr %val_ptr
119 call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %a, i32 8, <2 x i1> %mask)
122 declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>)
124 define void @masked_store_v2i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
125 ; CHECK-LABEL: masked_store_v2i64:
127 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
128 ; CHECK-NEXT: vle64.v v8, (a2)
129 ; CHECK-NEXT: vle64.v v9, (a0)
130 ; CHECK-NEXT: vmseq.vi v0, v8, 0
131 ; CHECK-NEXT: vse64.v v9, (a1), v0.t
133 %m = load <2 x i64>, ptr %m_ptr
134 %mask = icmp eq <2 x i64> %m, zeroinitializer
135 %val = load <2 x i64>, ptr %val_ptr
136 call void @llvm.masked.store.v2i64.p0(<2 x i64> %val, ptr %a, i32 8, <2 x i1> %mask)
139 declare void @llvm.masked.store.v2i64.p0(<2 x i64>, ptr, i32, <2 x i1>)
141 define void @masked_store_v4i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
142 ; CHECK-LABEL: masked_store_v4i8:
144 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
145 ; CHECK-NEXT: vle8.v v8, (a2)
146 ; CHECK-NEXT: vle8.v v9, (a0)
147 ; CHECK-NEXT: vmseq.vi v0, v8, 0
148 ; CHECK-NEXT: vse8.v v9, (a1), v0.t
150 %m = load <4 x i8>, ptr %m_ptr
151 %mask = icmp eq <4 x i8> %m, zeroinitializer
152 %val = load <4 x i8>, ptr %val_ptr
153 call void @llvm.masked.store.v4i8.p0(<4 x i8> %val, ptr %a, i32 8, <4 x i1> %mask)
156 declare void @llvm.masked.store.v4i8.p0(<4 x i8>, ptr, i32, <4 x i1>)
158 define void @masked_store_v4i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
159 ; CHECK-LABEL: masked_store_v4i16:
161 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
162 ; CHECK-NEXT: vle16.v v8, (a2)
163 ; CHECK-NEXT: vle16.v v9, (a0)
164 ; CHECK-NEXT: vmseq.vi v0, v8, 0
165 ; CHECK-NEXT: vse16.v v9, (a1), v0.t
167 %m = load <4 x i16>, ptr %m_ptr
168 %mask = icmp eq <4 x i16> %m, zeroinitializer
169 %val = load <4 x i16>, ptr %val_ptr
170 call void @llvm.masked.store.v4i16.p0(<4 x i16> %val, ptr %a, i32 8, <4 x i1> %mask)
173 declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32, <4 x i1>)
175 define void @masked_store_v4i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
176 ; CHECK-LABEL: masked_store_v4i32:
178 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
179 ; CHECK-NEXT: vle32.v v8, (a2)
180 ; CHECK-NEXT: vle32.v v9, (a0)
181 ; CHECK-NEXT: vmseq.vi v0, v8, 0
182 ; CHECK-NEXT: vse32.v v9, (a1), v0.t
184 %m = load <4 x i32>, ptr %m_ptr
185 %mask = icmp eq <4 x i32> %m, zeroinitializer
186 %val = load <4 x i32>, ptr %val_ptr
187 call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %a, i32 8, <4 x i1> %mask)
190 declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>)
192 define void @masked_store_v4i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
193 ; CHECK-LABEL: masked_store_v4i64:
195 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
196 ; CHECK-NEXT: vle64.v v8, (a2)
197 ; CHECK-NEXT: vle64.v v10, (a0)
198 ; CHECK-NEXT: vmseq.vi v0, v8, 0
199 ; CHECK-NEXT: vse64.v v10, (a1), v0.t
201 %m = load <4 x i64>, ptr %m_ptr
202 %mask = icmp eq <4 x i64> %m, zeroinitializer
203 %val = load <4 x i64>, ptr %val_ptr
204 call void @llvm.masked.store.v4i64.p0(<4 x i64> %val, ptr %a, i32 8, <4 x i1> %mask)
207 declare void @llvm.masked.store.v4i64.p0(<4 x i64>, ptr, i32, <4 x i1>)
209 define void @masked_store_v8i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
210 ; CHECK-LABEL: masked_store_v8i8:
212 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
213 ; CHECK-NEXT: vle8.v v8, (a2)
214 ; CHECK-NEXT: vle8.v v9, (a0)
215 ; CHECK-NEXT: vmseq.vi v0, v8, 0
216 ; CHECK-NEXT: vse8.v v9, (a1), v0.t
218 %m = load <8 x i8>, ptr %m_ptr
219 %mask = icmp eq <8 x i8> %m, zeroinitializer
220 %val = load <8 x i8>, ptr %val_ptr
221 call void @llvm.masked.store.v8i8.p0(<8 x i8> %val, ptr %a, i32 8, <8 x i1> %mask)
224 declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32, <8 x i1>)
226 define void @masked_store_v8i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
227 ; CHECK-LABEL: masked_store_v8i16:
229 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
230 ; CHECK-NEXT: vle16.v v8, (a2)
231 ; CHECK-NEXT: vle16.v v9, (a0)
232 ; CHECK-NEXT: vmseq.vi v0, v8, 0
233 ; CHECK-NEXT: vse16.v v9, (a1), v0.t
235 %m = load <8 x i16>, ptr %m_ptr
236 %mask = icmp eq <8 x i16> %m, zeroinitializer
237 %val = load <8 x i16>, ptr %val_ptr
238 call void @llvm.masked.store.v8i16.p0(<8 x i16> %val, ptr %a, i32 8, <8 x i1> %mask)
241 declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>)
243 define void @masked_store_v8i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
244 ; CHECK-LABEL: masked_store_v8i32:
246 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
247 ; CHECK-NEXT: vle32.v v8, (a2)
248 ; CHECK-NEXT: vle32.v v10, (a0)
249 ; CHECK-NEXT: vmseq.vi v0, v8, 0
250 ; CHECK-NEXT: vse32.v v10, (a1), v0.t
252 %m = load <8 x i32>, ptr %m_ptr
253 %mask = icmp eq <8 x i32> %m, zeroinitializer
254 %val = load <8 x i32>, ptr %val_ptr
255 call void @llvm.masked.store.v8i32.p0(<8 x i32> %val, ptr %a, i32 8, <8 x i1> %mask)
258 declare void @llvm.masked.store.v8i32.p0(<8 x i32>, ptr, i32, <8 x i1>)
260 define void @masked_store_v8i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
261 ; CHECK-LABEL: masked_store_v8i64:
263 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
264 ; CHECK-NEXT: vle64.v v8, (a2)
265 ; CHECK-NEXT: vle64.v v12, (a0)
266 ; CHECK-NEXT: vmseq.vi v0, v8, 0
267 ; CHECK-NEXT: vse64.v v12, (a1), v0.t
269 %m = load <8 x i64>, ptr %m_ptr
270 %mask = icmp eq <8 x i64> %m, zeroinitializer
271 %val = load <8 x i64>, ptr %val_ptr
272 call void @llvm.masked.store.v8i64.p0(<8 x i64> %val, ptr %a, i32 8, <8 x i1> %mask)
275 declare void @llvm.masked.store.v8i64.p0(<8 x i64>, ptr, i32, <8 x i1>)
277 define void @masked_store_v16i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
278 ; CHECK-LABEL: masked_store_v16i8:
280 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
281 ; CHECK-NEXT: vle8.v v8, (a2)
282 ; CHECK-NEXT: vle8.v v9, (a0)
283 ; CHECK-NEXT: vmseq.vi v0, v8, 0
284 ; CHECK-NEXT: vse8.v v9, (a1), v0.t
286 %m = load <16 x i8>, ptr %m_ptr
287 %mask = icmp eq <16 x i8> %m, zeroinitializer
288 %val = load <16 x i8>, ptr %val_ptr
289 call void @llvm.masked.store.v16i8.p0(<16 x i8> %val, ptr %a, i32 8, <16 x i1> %mask)
292 declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32, <16 x i1>)
294 define void @masked_store_v16i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
295 ; CHECK-LABEL: masked_store_v16i16:
297 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
298 ; CHECK-NEXT: vle16.v v8, (a2)
299 ; CHECK-NEXT: vle16.v v10, (a0)
300 ; CHECK-NEXT: vmseq.vi v0, v8, 0
301 ; CHECK-NEXT: vse16.v v10, (a1), v0.t
303 %m = load <16 x i16>, ptr %m_ptr
304 %mask = icmp eq <16 x i16> %m, zeroinitializer
305 %val = load <16 x i16>, ptr %val_ptr
306 call void @llvm.masked.store.v16i16.p0(<16 x i16> %val, ptr %a, i32 8, <16 x i1> %mask)
309 declare void @llvm.masked.store.v16i16.p0(<16 x i16>, ptr, i32, <16 x i1>)
311 define void @masked_store_v16i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
312 ; CHECK-LABEL: masked_store_v16i32:
314 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
315 ; CHECK-NEXT: vle32.v v8, (a2)
316 ; CHECK-NEXT: vle32.v v12, (a0)
317 ; CHECK-NEXT: vmseq.vi v0, v8, 0
318 ; CHECK-NEXT: vse32.v v12, (a1), v0.t
320 %m = load <16 x i32>, ptr %m_ptr
321 %mask = icmp eq <16 x i32> %m, zeroinitializer
322 %val = load <16 x i32>, ptr %val_ptr
323 call void @llvm.masked.store.v16i32.p0(<16 x i32> %val, ptr %a, i32 8, <16 x i1> %mask)
326 declare void @llvm.masked.store.v16i32.p0(<16 x i32>, ptr, i32, <16 x i1>)
328 define void @masked_store_v16i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
329 ; CHECK-LABEL: masked_store_v16i64:
331 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
332 ; CHECK-NEXT: vle64.v v8, (a2)
333 ; CHECK-NEXT: vle64.v v16, (a0)
334 ; CHECK-NEXT: vmseq.vi v0, v8, 0
335 ; CHECK-NEXT: vse64.v v16, (a1), v0.t
337 %m = load <16 x i64>, ptr %m_ptr
338 %mask = icmp eq <16 x i64> %m, zeroinitializer
339 %val = load <16 x i64>, ptr %val_ptr
340 call void @llvm.masked.store.v16i64.p0(<16 x i64> %val, ptr %a, i32 8, <16 x i1> %mask)
343 declare void @llvm.masked.store.v16i64.p0(<16 x i64>, ptr, i32, <16 x i1>)
345 define void @masked_store_v32i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
346 ; CHECK-LABEL: masked_store_v32i8:
348 ; CHECK-NEXT: li a3, 32
349 ; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, ma
350 ; CHECK-NEXT: vle8.v v8, (a2)
351 ; CHECK-NEXT: vle8.v v10, (a0)
352 ; CHECK-NEXT: vmseq.vi v0, v8, 0
353 ; CHECK-NEXT: vse8.v v10, (a1), v0.t
355 %m = load <32 x i8>, ptr %m_ptr
356 %mask = icmp eq <32 x i8> %m, zeroinitializer
357 %val = load <32 x i8>, ptr %val_ptr
358 call void @llvm.masked.store.v32i8.p0(<32 x i8> %val, ptr %a, i32 8, <32 x i1> %mask)
361 declare void @llvm.masked.store.v32i8.p0(<32 x i8>, ptr, i32, <32 x i1>)
363 define void @masked_store_v32i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
364 ; CHECK-LABEL: masked_store_v32i16:
366 ; CHECK-NEXT: li a3, 32
367 ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma
368 ; CHECK-NEXT: vle16.v v8, (a2)
369 ; CHECK-NEXT: vle16.v v12, (a0)
370 ; CHECK-NEXT: vmseq.vi v0, v8, 0
371 ; CHECK-NEXT: vse16.v v12, (a1), v0.t
373 %m = load <32 x i16>, ptr %m_ptr
374 %mask = icmp eq <32 x i16> %m, zeroinitializer
375 %val = load <32 x i16>, ptr %val_ptr
376 call void @llvm.masked.store.v32i16.p0(<32 x i16> %val, ptr %a, i32 8, <32 x i1> %mask)
379 declare void @llvm.masked.store.v32i16.p0(<32 x i16>, ptr, i32, <32 x i1>)
381 define void @masked_store_v32i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
382 ; CHECK-LABEL: masked_store_v32i32:
384 ; CHECK-NEXT: li a3, 32
385 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
386 ; CHECK-NEXT: vle32.v v8, (a2)
387 ; CHECK-NEXT: vle32.v v16, (a0)
388 ; CHECK-NEXT: vmseq.vi v0, v8, 0
389 ; CHECK-NEXT: vse32.v v16, (a1), v0.t
391 %m = load <32 x i32>, ptr %m_ptr
392 %mask = icmp eq <32 x i32> %m, zeroinitializer
393 %val = load <32 x i32>, ptr %val_ptr
394 call void @llvm.masked.store.v32i32.p0(<32 x i32> %val, ptr %a, i32 8, <32 x i1> %mask)
397 declare void @llvm.masked.store.v32i32.p0(<32 x i32>, ptr, i32, <32 x i1>)
399 define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
400 ; RV32-LABEL: masked_store_v32i64:
402 ; RV32-NEXT: addi sp, sp, -16
403 ; RV32-NEXT: csrr a3, vlenb
404 ; RV32-NEXT: slli a3, a3, 4
405 ; RV32-NEXT: sub sp, sp, a3
406 ; RV32-NEXT: addi a3, a2, 128
407 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
408 ; RV32-NEXT: vle64.v v24, (a2)
409 ; RV32-NEXT: vle64.v v8, (a3)
410 ; RV32-NEXT: csrr a2, vlenb
411 ; RV32-NEXT: slli a2, a2, 3
412 ; RV32-NEXT: add a2, sp, a2
413 ; RV32-NEXT: addi a2, a2, 16
414 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
415 ; RV32-NEXT: li a2, 32
416 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
417 ; RV32-NEXT: vmv.v.i v8, 0
418 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
419 ; RV32-NEXT: vmseq.vv v7, v24, v8
420 ; RV32-NEXT: addi a2, a0, 128
421 ; RV32-NEXT: vle64.v v24, (a2)
422 ; RV32-NEXT: vle64.v v16, (a0)
423 ; RV32-NEXT: addi a0, sp, 16
424 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
425 ; RV32-NEXT: csrr a0, vlenb
426 ; RV32-NEXT: slli a0, a0, 3
427 ; RV32-NEXT: add a0, sp, a0
428 ; RV32-NEXT: addi a0, a0, 16
429 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
430 ; RV32-NEXT: vmseq.vv v0, v16, v8
431 ; RV32-NEXT: addi a0, a1, 128
432 ; RV32-NEXT: vse64.v v24, (a0), v0.t
433 ; RV32-NEXT: vmv1r.v v0, v7
434 ; RV32-NEXT: addi a0, sp, 16
435 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
436 ; RV32-NEXT: vse64.v v8, (a1), v0.t
437 ; RV32-NEXT: csrr a0, vlenb
438 ; RV32-NEXT: slli a0, a0, 4
439 ; RV32-NEXT: add sp, sp, a0
440 ; RV32-NEXT: addi sp, sp, 16
443 ; RV64-LABEL: masked_store_v32i64:
445 ; RV64-NEXT: addi sp, sp, -16
446 ; RV64-NEXT: csrr a3, vlenb
447 ; RV64-NEXT: slli a3, a3, 4
448 ; RV64-NEXT: sub sp, sp, a3
449 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
450 ; RV64-NEXT: vle64.v v8, (a2)
451 ; RV64-NEXT: addi a2, a2, 128
452 ; RV64-NEXT: vle64.v v16, (a2)
453 ; RV64-NEXT: csrr a2, vlenb
454 ; RV64-NEXT: slli a2, a2, 3
455 ; RV64-NEXT: add a2, sp, a2
456 ; RV64-NEXT: addi a2, a2, 16
457 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
458 ; RV64-NEXT: vmseq.vi v0, v8, 0
459 ; RV64-NEXT: vle64.v v24, (a0)
460 ; RV64-NEXT: addi a0, a0, 128
461 ; RV64-NEXT: vle64.v v8, (a0)
462 ; RV64-NEXT: addi a0, sp, 16
463 ; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
464 ; RV64-NEXT: csrr a0, vlenb
465 ; RV64-NEXT: slli a0, a0, 3
466 ; RV64-NEXT: add a0, sp, a0
467 ; RV64-NEXT: addi a0, a0, 16
468 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
469 ; RV64-NEXT: vmseq.vi v8, v16, 0
470 ; RV64-NEXT: vse64.v v24, (a1), v0.t
471 ; RV64-NEXT: addi a0, a1, 128
472 ; RV64-NEXT: vmv1r.v v0, v8
473 ; RV64-NEXT: addi a1, sp, 16
474 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
475 ; RV64-NEXT: vse64.v v8, (a0), v0.t
476 ; RV64-NEXT: csrr a0, vlenb
477 ; RV64-NEXT: slli a0, a0, 4
478 ; RV64-NEXT: add sp, sp, a0
479 ; RV64-NEXT: addi sp, sp, 16
481 %m = load <32 x i64>, ptr %m_ptr
482 %mask = icmp eq <32 x i64> %m, zeroinitializer
483 %val = load <32 x i64>, ptr %val_ptr
484 call void @llvm.masked.store.v32i64.p0(<32 x i64> %val, ptr %a, i32 8, <32 x i1> %mask)
487 declare void @llvm.masked.store.v32i64.p0(<32 x i64>, ptr, i32, <32 x i1>)
489 define void @masked_store_v64i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
490 ; CHECK-LABEL: masked_store_v64i8:
492 ; CHECK-NEXT: li a3, 64
493 ; CHECK-NEXT: vsetvli zero, a3, e8, m4, ta, ma
494 ; CHECK-NEXT: vle8.v v8, (a2)
495 ; CHECK-NEXT: vle8.v v12, (a0)
496 ; CHECK-NEXT: vmseq.vi v0, v8, 0
497 ; CHECK-NEXT: vse8.v v12, (a1), v0.t
499 %m = load <64 x i8>, ptr %m_ptr
500 %mask = icmp eq <64 x i8> %m, zeroinitializer
501 %val = load <64 x i8>, ptr %val_ptr
502 call void @llvm.masked.store.v64i8.p0(<64 x i8> %val, ptr %a, i32 8, <64 x i1> %mask)
505 declare void @llvm.masked.store.v64i8.p0(<64 x i8>, ptr, i32, <64 x i1>)
507 define void @masked_store_v64i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
508 ; CHECK-LABEL: masked_store_v64i16:
510 ; CHECK-NEXT: li a3, 64
511 ; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma
512 ; CHECK-NEXT: vle16.v v8, (a2)
513 ; CHECK-NEXT: vle16.v v16, (a0)
514 ; CHECK-NEXT: vmseq.vi v0, v8, 0
515 ; CHECK-NEXT: vse16.v v16, (a1), v0.t
517 %m = load <64 x i16>, ptr %m_ptr
518 %mask = icmp eq <64 x i16> %m, zeroinitializer
519 %val = load <64 x i16>, ptr %val_ptr
520 call void @llvm.masked.store.v64i16.p0(<64 x i16> %val, ptr %a, i32 8, <64 x i1> %mask)
523 declare void @llvm.masked.store.v64i16.p0(<64 x i16>, ptr, i32, <64 x i1>)
525 define void @masked_store_v64i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
526 ; CHECK-LABEL: masked_store_v64i32:
528 ; CHECK-NEXT: addi sp, sp, -16
529 ; CHECK-NEXT: csrr a3, vlenb
530 ; CHECK-NEXT: slli a3, a3, 4
531 ; CHECK-NEXT: sub sp, sp, a3
532 ; CHECK-NEXT: li a3, 32
533 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
534 ; CHECK-NEXT: vle32.v v8, (a2)
535 ; CHECK-NEXT: addi a2, a2, 128
536 ; CHECK-NEXT: vle32.v v16, (a2)
537 ; CHECK-NEXT: csrr a2, vlenb
538 ; CHECK-NEXT: slli a2, a2, 3
539 ; CHECK-NEXT: add a2, sp, a2
540 ; CHECK-NEXT: addi a2, a2, 16
541 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
542 ; CHECK-NEXT: vmseq.vi v0, v8, 0
543 ; CHECK-NEXT: vle32.v v24, (a0)
544 ; CHECK-NEXT: addi a0, a0, 128
545 ; CHECK-NEXT: vle32.v v8, (a0)
546 ; CHECK-NEXT: addi a0, sp, 16
547 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
548 ; CHECK-NEXT: csrr a0, vlenb
549 ; CHECK-NEXT: slli a0, a0, 3
550 ; CHECK-NEXT: add a0, sp, a0
551 ; CHECK-NEXT: addi a0, a0, 16
552 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
553 ; CHECK-NEXT: vmseq.vi v8, v16, 0
554 ; CHECK-NEXT: vse32.v v24, (a1), v0.t
555 ; CHECK-NEXT: addi a0, a1, 128
556 ; CHECK-NEXT: vmv1r.v v0, v8
557 ; CHECK-NEXT: addi a1, sp, 16
558 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
559 ; CHECK-NEXT: vse32.v v8, (a0), v0.t
560 ; CHECK-NEXT: csrr a0, vlenb
561 ; CHECK-NEXT: slli a0, a0, 4
562 ; CHECK-NEXT: add sp, sp, a0
563 ; CHECK-NEXT: addi sp, sp, 16
565 %m = load <64 x i32>, ptr %m_ptr
566 %mask = icmp eq <64 x i32> %m, zeroinitializer
567 %val = load <64 x i32>, ptr %val_ptr
568 call void @llvm.masked.store.v64i32.p0(<64 x i32> %val, ptr %a, i32 8, <64 x i1> %mask)
571 declare void @llvm.masked.store.v64i32.p0(<64 x i32>, ptr, i32, <64 x i1>)
573 define void @masked_store_v128i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
574 ; CHECK-LABEL: masked_store_v128i8:
576 ; CHECK-NEXT: li a3, 128
577 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
578 ; CHECK-NEXT: vle8.v v8, (a2)
579 ; CHECK-NEXT: vle8.v v16, (a0)
580 ; CHECK-NEXT: vmseq.vi v0, v8, 0
581 ; CHECK-NEXT: vse8.v v16, (a1), v0.t
583 %m = load <128 x i8>, ptr %m_ptr
584 %mask = icmp eq <128 x i8> %m, zeroinitializer
585 %val = load <128 x i8>, ptr %val_ptr
586 call void @llvm.masked.store.v128i8.p0(<128 x i8> %val, ptr %a, i32 8, <128 x i1> %mask)
589 declare void @llvm.masked.store.v128i8.p0(<128 x i8>, ptr, i32, <128 x i1>)
591 define void @masked_store_v128i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
592 ; CHECK-LABEL: masked_store_v128i16:
594 ; CHECK-NEXT: addi sp, sp, -16
595 ; CHECK-NEXT: csrr a3, vlenb
596 ; CHECK-NEXT: slli a3, a3, 4
597 ; CHECK-NEXT: sub sp, sp, a3
598 ; CHECK-NEXT: li a3, 64
599 ; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma
600 ; CHECK-NEXT: vle16.v v8, (a2)
601 ; CHECK-NEXT: addi a2, a2, 128
602 ; CHECK-NEXT: vle16.v v16, (a2)
603 ; CHECK-NEXT: csrr a2, vlenb
604 ; CHECK-NEXT: slli a2, a2, 3
605 ; CHECK-NEXT: add a2, sp, a2
606 ; CHECK-NEXT: addi a2, a2, 16
607 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
608 ; CHECK-NEXT: vmseq.vi v0, v8, 0
609 ; CHECK-NEXT: vle16.v v24, (a0)
610 ; CHECK-NEXT: addi a0, a0, 128
611 ; CHECK-NEXT: vle16.v v8, (a0)
612 ; CHECK-NEXT: addi a0, sp, 16
613 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
614 ; CHECK-NEXT: csrr a0, vlenb
615 ; CHECK-NEXT: slli a0, a0, 3
616 ; CHECK-NEXT: add a0, sp, a0
617 ; CHECK-NEXT: addi a0, a0, 16
618 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
619 ; CHECK-NEXT: vmseq.vi v8, v16, 0
620 ; CHECK-NEXT: vse16.v v24, (a1), v0.t
621 ; CHECK-NEXT: addi a0, a1, 128
622 ; CHECK-NEXT: vmv1r.v v0, v8
623 ; CHECK-NEXT: addi a1, sp, 16
624 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
625 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
626 ; CHECK-NEXT: csrr a0, vlenb
627 ; CHECK-NEXT: slli a0, a0, 4
628 ; CHECK-NEXT: add sp, sp, a0
629 ; CHECK-NEXT: addi sp, sp, 16
631 %m = load <128 x i16>, ptr %m_ptr
632 %mask = icmp eq <128 x i16> %m, zeroinitializer
633 %val = load <128 x i16>, ptr %val_ptr
634 call void @llvm.masked.store.v128i16.p0(<128 x i16> %val, ptr %a, i32 8, <128 x i1> %mask)
637 declare void @llvm.masked.store.v128i16.p0(<128 x i16>, ptr, i32, <128 x i1>)
639 define void @masked_store_v256i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
640 ; CHECK-LABEL: masked_store_v256i8:
642 ; CHECK-NEXT: addi sp, sp, -16
643 ; CHECK-NEXT: csrr a3, vlenb
644 ; CHECK-NEXT: slli a3, a3, 4
645 ; CHECK-NEXT: sub sp, sp, a3
646 ; CHECK-NEXT: li a3, 128
647 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
648 ; CHECK-NEXT: vle8.v v8, (a2)
649 ; CHECK-NEXT: addi a2, a2, 128
650 ; CHECK-NEXT: vle8.v v16, (a2)
651 ; CHECK-NEXT: csrr a2, vlenb
652 ; CHECK-NEXT: slli a2, a2, 3
653 ; CHECK-NEXT: add a2, sp, a2
654 ; CHECK-NEXT: addi a2, a2, 16
655 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
656 ; CHECK-NEXT: vmseq.vi v0, v8, 0
657 ; CHECK-NEXT: vle8.v v24, (a0)
658 ; CHECK-NEXT: addi a0, a0, 128
659 ; CHECK-NEXT: vle8.v v8, (a0)
660 ; CHECK-NEXT: addi a0, sp, 16
661 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
662 ; CHECK-NEXT: csrr a0, vlenb
663 ; CHECK-NEXT: slli a0, a0, 3
664 ; CHECK-NEXT: add a0, sp, a0
665 ; CHECK-NEXT: addi a0, a0, 16
666 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
667 ; CHECK-NEXT: vmseq.vi v8, v16, 0
668 ; CHECK-NEXT: vse8.v v24, (a1), v0.t
669 ; CHECK-NEXT: addi a0, a1, 128
670 ; CHECK-NEXT: vmv1r.v v0, v8
671 ; CHECK-NEXT: addi a1, sp, 16
672 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
673 ; CHECK-NEXT: vse8.v v8, (a0), v0.t
674 ; CHECK-NEXT: csrr a0, vlenb
675 ; CHECK-NEXT: slli a0, a0, 4
676 ; CHECK-NEXT: add sp, sp, a0
677 ; CHECK-NEXT: addi sp, sp, 16
679 %m = load <256 x i8>, ptr %m_ptr
680 %mask = icmp eq <256 x i8> %m, zeroinitializer
681 %val = load <256 x i8>, ptr %val_ptr
682 call void @llvm.masked.store.v256i8.p0(<256 x i8> %val, ptr %a, i32 8, <256 x i1> %mask)
685 declare void @llvm.masked.store.v256i8.p0(<256 x i8>, ptr, i32, <256 x i1>)