1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
7 declare void @llvm.vp.scatter.v2i8.v2p0(<2 x i8>, <2 x ptr>, <2 x i1>, i32)
9 define void @vpscatter_v2i8(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
10 ; RV32-LABEL: vpscatter_v2i8:
12 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
13 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
16 ; RV64-LABEL: vpscatter_v2i8:
18 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
19 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
21 call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
25 define void @vpscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
26 ; RV32-LABEL: vpscatter_v2i16_truncstore_v2i8:
28 ; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
29 ; RV32-NEXT: vnsrl.wi v8, v8, 0
30 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
31 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
34 ; RV64-LABEL: vpscatter_v2i16_truncstore_v2i8:
36 ; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
37 ; RV64-NEXT: vnsrl.wi v8, v8, 0
38 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
39 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
41 %tval = trunc <2 x i16> %val to <2 x i8>
42 call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
46 define void @vpscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
47 ; RV32-LABEL: vpscatter_v2i32_truncstore_v2i8:
49 ; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
50 ; RV32-NEXT: vnsrl.wi v8, v8, 0
51 ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
52 ; RV32-NEXT: vnsrl.wi v8, v8, 0
53 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
54 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
57 ; RV64-LABEL: vpscatter_v2i32_truncstore_v2i8:
59 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
60 ; RV64-NEXT: vnsrl.wi v8, v8, 0
61 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
62 ; RV64-NEXT: vnsrl.wi v8, v8, 0
63 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
64 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
66 %tval = trunc <2 x i32> %val to <2 x i8>
67 call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
71 define void @vpscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
72 ; RV32-LABEL: vpscatter_v2i64_truncstore_v2i8:
74 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
75 ; RV32-NEXT: vnsrl.wi v8, v8, 0
76 ; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
77 ; RV32-NEXT: vnsrl.wi v8, v8, 0
78 ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
79 ; RV32-NEXT: vnsrl.wi v8, v8, 0
80 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
81 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
84 ; RV64-LABEL: vpscatter_v2i64_truncstore_v2i8:
86 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
87 ; RV64-NEXT: vnsrl.wi v8, v8, 0
88 ; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
89 ; RV64-NEXT: vnsrl.wi v8, v8, 0
90 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
91 ; RV64-NEXT: vnsrl.wi v8, v8, 0
92 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
93 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
95 %tval = trunc <2 x i64> %val to <2 x i8>
96 call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
100 declare void @llvm.vp.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, <4 x i1>, i32)
102 define void @vpscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
103 ; RV32-LABEL: vpscatter_v4i8:
105 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
106 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
109 ; RV64-LABEL: vpscatter_v4i8:
111 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
112 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
114 call void @llvm.vp.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
118 define void @vpscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
119 ; RV32-LABEL: vpscatter_truemask_v4i8:
121 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
122 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
125 ; RV64-LABEL: vpscatter_truemask_v4i8:
127 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
128 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
130 call void @llvm.vp.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
134 declare void @llvm.vp.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, <8 x i1>, i32)
136 define void @vpscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
137 ; RV32-LABEL: vpscatter_v8i8:
139 ; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
140 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
143 ; RV64-LABEL: vpscatter_v8i8:
145 ; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
146 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
148 call void @llvm.vp.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
152 define void @vpscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
153 ; RV32-LABEL: vpscatter_baseidx_v8i8:
155 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
156 ; RV32-NEXT: vsext.vf4 v10, v9
157 ; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
158 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
161 ; RV64-LABEL: vpscatter_baseidx_v8i8:
163 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
164 ; RV64-NEXT: vsext.vf8 v12, v9
165 ; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
166 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
168 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
169 call void @llvm.vp.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
173 declare void @llvm.vp.scatter.v2i16.v2p0(<2 x i16>, <2 x ptr>, <2 x i1>, i32)
175 define void @vpscatter_v2i16(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
176 ; RV32-LABEL: vpscatter_v2i16:
178 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
179 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
182 ; RV64-LABEL: vpscatter_v2i16:
184 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
185 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
187 call void @llvm.vp.scatter.v2i16.v2p0(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
191 define void @vpscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
192 ; RV32-LABEL: vpscatter_v2i32_truncstore_v2i16:
194 ; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
195 ; RV32-NEXT: vnsrl.wi v8, v8, 0
196 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
197 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
200 ; RV64-LABEL: vpscatter_v2i32_truncstore_v2i16:
202 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
203 ; RV64-NEXT: vnsrl.wi v8, v8, 0
204 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
205 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
207 %tval = trunc <2 x i32> %val to <2 x i16>
208 call void @llvm.vp.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
212 define void @vpscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
213 ; RV32-LABEL: vpscatter_v2i64_truncstore_v2i16:
215 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
216 ; RV32-NEXT: vnsrl.wi v8, v8, 0
217 ; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
218 ; RV32-NEXT: vnsrl.wi v8, v8, 0
219 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
220 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
223 ; RV64-LABEL: vpscatter_v2i64_truncstore_v2i16:
225 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
226 ; RV64-NEXT: vnsrl.wi v8, v8, 0
227 ; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
228 ; RV64-NEXT: vnsrl.wi v8, v8, 0
229 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
230 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
232 %tval = trunc <2 x i64> %val to <2 x i16>
233 call void @llvm.vp.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
237 declare void @llvm.vp.scatter.v3i16.v3p0(<3 x i16>, <3 x ptr>, <3 x i1>, i32)
239 define void @vpscatter_v3i16(<3 x i16> %val, <3 x ptr> %ptrs, <3 x i1> %m, i32 zeroext %evl) {
240 ; RV32-LABEL: vpscatter_v3i16:
242 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
243 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
246 ; RV64-LABEL: vpscatter_v3i16:
248 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
249 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
251 call void @llvm.vp.scatter.v3i16.v3p0(<3 x i16> %val, <3 x ptr> %ptrs, <3 x i1> %m, i32 %evl)
255 define void @vpscatter_truemask_v3i16(<3 x i16> %val, <3 x ptr> %ptrs, i32 zeroext %evl) {
256 ; RV32-LABEL: vpscatter_truemask_v3i16:
258 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
259 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
262 ; RV64-LABEL: vpscatter_truemask_v3i16:
264 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
265 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
267 call void @llvm.vp.scatter.v3i16.v3p0(<3 x i16> %val, <3 x ptr> %ptrs, <3 x i1> splat (i1 1), i32 %evl)
271 declare void @llvm.vp.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, <4 x i1>, i32)
273 define void @vpscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
274 ; RV32-LABEL: vpscatter_v4i16:
276 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
277 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
280 ; RV64-LABEL: vpscatter_v4i16:
282 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
283 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
285 call void @llvm.vp.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
289 define void @vpscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
290 ; RV32-LABEL: vpscatter_truemask_v4i16:
292 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
293 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
296 ; RV64-LABEL: vpscatter_truemask_v4i16:
298 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
299 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
301 call void @llvm.vp.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
305 declare void @llvm.vp.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, <8 x i1>, i32)
307 define void @vpscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
308 ; RV32-LABEL: vpscatter_v8i16:
310 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
311 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
314 ; RV64-LABEL: vpscatter_v8i16:
316 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
317 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
319 call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
323 define void @vpscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
324 ; RV32-LABEL: vpscatter_baseidx_v8i8_v8i16:
326 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
327 ; RV32-NEXT: vsext.vf4 v10, v9
328 ; RV32-NEXT: vadd.vv v10, v10, v10
329 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
330 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
333 ; RV64-LABEL: vpscatter_baseidx_v8i8_v8i16:
335 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
336 ; RV64-NEXT: vsext.vf8 v12, v9
337 ; RV64-NEXT: vadd.vv v12, v12, v12
338 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
339 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
341 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
342 call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
346 define void @vpscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
347 ; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8i16:
349 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
350 ; RV32-NEXT: vsext.vf4 v10, v9
351 ; RV32-NEXT: vadd.vv v10, v10, v10
352 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
353 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
356 ; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8i16:
358 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
359 ; RV64-NEXT: vsext.vf8 v12, v9
360 ; RV64-NEXT: vadd.vv v12, v12, v12
361 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
362 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
364 %eidxs = sext <8 x i8> %idxs to <8 x i16>
365 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
366 call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
370 define void @vpscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
371 ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i16:
373 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
374 ; RV32-NEXT: vwaddu.vv v10, v9, v9
375 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
376 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
379 ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i16:
381 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
382 ; RV64-NEXT: vwaddu.vv v10, v9, v9
383 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
384 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
386 %eidxs = zext <8 x i8> %idxs to <8 x i16>
387 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
388 call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
392 define void @vpscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
393 ; RV32-LABEL: vpscatter_baseidx_v8i16:
395 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
396 ; RV32-NEXT: vwadd.vv v10, v9, v9
397 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
398 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
401 ; RV64-LABEL: vpscatter_baseidx_v8i16:
403 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
404 ; RV64-NEXT: vsext.vf4 v12, v9
405 ; RV64-NEXT: vadd.vv v12, v12, v12
406 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
407 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
409 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
410 call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
414 declare void @llvm.vp.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, <2 x i1>, i32)
416 define void @vpscatter_v2i32(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
417 ; RV32-LABEL: vpscatter_v2i32:
419 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
420 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
423 ; RV64-LABEL: vpscatter_v2i32:
425 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
426 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
428 call void @llvm.vp.scatter.v2i32.v2p0(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
432 define void @vpscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
433 ; RV32-LABEL: vpscatter_v2i64_truncstore_v2i32:
435 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
436 ; RV32-NEXT: vnsrl.wi v8, v8, 0
437 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
438 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
441 ; RV64-LABEL: vpscatter_v2i64_truncstore_v2i32:
443 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
444 ; RV64-NEXT: vnsrl.wi v8, v8, 0
445 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
446 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
448 %tval = trunc <2 x i64> %val to <2 x i32>
449 call void @llvm.vp.scatter.v2i32.v2p0(<2 x i32> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
453 declare void @llvm.vp.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, <4 x i1>, i32)
455 define void @vpscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
456 ; RV32-LABEL: vpscatter_v4i32:
458 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
459 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
462 ; RV64-LABEL: vpscatter_v4i32:
464 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
465 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
467 call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
471 define void @vpscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
472 ; RV32-LABEL: vpscatter_truemask_v4i32:
474 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
475 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
478 ; RV64-LABEL: vpscatter_truemask_v4i32:
480 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
481 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
483 call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
487 declare void @llvm.vp.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, <8 x i1>, i32)
489 define void @vpscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
490 ; RV32-LABEL: vpscatter_v8i32:
492 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
493 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
496 ; RV64-LABEL: vpscatter_v8i32:
498 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
499 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
501 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
505 define void @vpscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
506 ; RV32-LABEL: vpscatter_baseidx_v8i8_v8i32:
508 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
509 ; RV32-NEXT: vsext.vf4 v12, v10
510 ; RV32-NEXT: vsll.vi v10, v12, 2
511 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
512 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
515 ; RV64-LABEL: vpscatter_baseidx_v8i8_v8i32:
517 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
518 ; RV64-NEXT: vsext.vf8 v12, v10
519 ; RV64-NEXT: vsll.vi v12, v12, 2
520 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
521 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
523 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
524 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
528 define void @vpscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
529 ; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8i32:
531 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
532 ; RV32-NEXT: vsext.vf4 v12, v10
533 ; RV32-NEXT: vsll.vi v10, v12, 2
534 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
535 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
538 ; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8i32:
540 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
541 ; RV64-NEXT: vsext.vf8 v12, v10
542 ; RV64-NEXT: vsll.vi v12, v12, 2
543 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
544 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
546 %eidxs = sext <8 x i8> %idxs to <8 x i32>
547 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
548 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
552 define void @vpscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
553 ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i32:
555 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
556 ; RV32-NEXT: vzext.vf2 v11, v10
557 ; RV32-NEXT: vsll.vi v10, v11, 2
558 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
559 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
562 ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i32:
564 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
565 ; RV64-NEXT: vzext.vf2 v11, v10
566 ; RV64-NEXT: vsll.vi v10, v11, 2
567 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
568 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
570 %eidxs = zext <8 x i8> %idxs to <8 x i32>
571 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
572 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
576 define void @vpscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
577 ; RV32-LABEL: vpscatter_baseidx_v8i16_v8i32:
579 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
580 ; RV32-NEXT: vsext.vf2 v12, v10
581 ; RV32-NEXT: vsll.vi v10, v12, 2
582 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
583 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
586 ; RV64-LABEL: vpscatter_baseidx_v8i16_v8i32:
588 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
589 ; RV64-NEXT: vsext.vf4 v12, v10
590 ; RV64-NEXT: vsll.vi v12, v12, 2
591 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
592 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
594 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
595 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
599 define void @vpscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
600 ; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8i32:
602 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
603 ; RV32-NEXT: vsext.vf2 v12, v10
604 ; RV32-NEXT: vsll.vi v10, v12, 2
605 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
606 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
609 ; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8i32:
611 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
612 ; RV64-NEXT: vsext.vf4 v12, v10
613 ; RV64-NEXT: vsll.vi v12, v12, 2
614 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
615 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
617 %eidxs = sext <8 x i16> %idxs to <8 x i32>
618 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
619 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
623 define void @vpscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
624 ; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8i32:
626 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
627 ; RV32-NEXT: vzext.vf2 v12, v10
628 ; RV32-NEXT: vsll.vi v10, v12, 2
629 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
630 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
633 ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8i32:
635 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
636 ; RV64-NEXT: vzext.vf2 v12, v10
637 ; RV64-NEXT: vsll.vi v10, v12, 2
638 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
639 ; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t
641 %eidxs = zext <8 x i16> %idxs to <8 x i32>
642 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
643 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
647 define void @vpscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
648 ; RV32-LABEL: vpscatter_baseidx_v8i32:
650 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
651 ; RV32-NEXT: vsll.vi v10, v10, 2
652 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
653 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
656 ; RV64-LABEL: vpscatter_baseidx_v8i32:
658 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
659 ; RV64-NEXT: vsext.vf2 v12, v10
660 ; RV64-NEXT: vsll.vi v12, v12, 2
661 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
662 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
664 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
665 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
669 declare void @llvm.vp.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, <2 x i1>, i32)
671 define void @vpscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
672 ; RV32-LABEL: vpscatter_v2i64:
674 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
675 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
678 ; RV64-LABEL: vpscatter_v2i64:
680 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
681 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
683 call void @llvm.vp.scatter.v2i64.v2p0(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
687 declare void @llvm.vp.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, <4 x i1>, i32)
689 define void @vpscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
690 ; RV32-LABEL: vpscatter_v4i64:
692 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
693 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
696 ; RV64-LABEL: vpscatter_v4i64:
698 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
699 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
701 call void @llvm.vp.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
705 define void @vpscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
706 ; RV32-LABEL: vpscatter_truemask_v4i64:
708 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
709 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
712 ; RV64-LABEL: vpscatter_truemask_v4i64:
714 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
715 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
717 call void @llvm.vp.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
721 declare void @llvm.vp.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, <8 x i1>, i32)
723 define void @vpscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
724 ; RV32-LABEL: vpscatter_v8i64:
726 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
727 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
730 ; RV64-LABEL: vpscatter_v8i64:
732 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
733 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
735 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
739 define void @vpscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
740 ; RV32-LABEL: vpscatter_baseidx_v8i8_v8i64:
742 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
743 ; RV32-NEXT: vsext.vf4 v14, v12
744 ; RV32-NEXT: vsll.vi v12, v14, 3
745 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
746 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
749 ; RV64-LABEL: vpscatter_baseidx_v8i8_v8i64:
751 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
752 ; RV64-NEXT: vsext.vf8 v16, v12
753 ; RV64-NEXT: vsll.vi v12, v16, 3
754 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
755 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
757 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs
758 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
762 define void @vpscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
763 ; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8i64:
765 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
766 ; RV32-NEXT: vsext.vf4 v14, v12
767 ; RV32-NEXT: vsll.vi v12, v14, 3
768 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
769 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
772 ; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8i64:
774 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
775 ; RV64-NEXT: vsext.vf8 v16, v12
776 ; RV64-NEXT: vsll.vi v12, v16, 3
777 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
778 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
780 %eidxs = sext <8 x i8> %idxs to <8 x i64>
781 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
782 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
786 define void @vpscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
787 ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i64:
789 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
790 ; RV32-NEXT: vzext.vf2 v13, v12
791 ; RV32-NEXT: vsll.vi v12, v13, 3
792 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
793 ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
796 ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i64:
798 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
799 ; RV64-NEXT: vzext.vf2 v13, v12
800 ; RV64-NEXT: vsll.vi v12, v13, 3
801 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
802 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
804 %eidxs = zext <8 x i8> %idxs to <8 x i64>
805 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
806 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
810 define void @vpscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
811 ; RV32-LABEL: vpscatter_baseidx_v8i16_v8i64:
813 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
814 ; RV32-NEXT: vsext.vf2 v14, v12
815 ; RV32-NEXT: vsll.vi v12, v14, 3
816 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
817 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
820 ; RV64-LABEL: vpscatter_baseidx_v8i16_v8i64:
822 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
823 ; RV64-NEXT: vsext.vf4 v16, v12
824 ; RV64-NEXT: vsll.vi v12, v16, 3
825 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
826 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
828 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs
829 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
833 define void @vpscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
834 ; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8i64:
836 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
837 ; RV32-NEXT: vsext.vf2 v14, v12
838 ; RV32-NEXT: vsll.vi v12, v14, 3
839 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
840 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
843 ; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8i64:
845 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
846 ; RV64-NEXT: vsext.vf4 v16, v12
847 ; RV64-NEXT: vsll.vi v12, v16, 3
848 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
849 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
851 %eidxs = sext <8 x i16> %idxs to <8 x i64>
852 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
853 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
857 define void @vpscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
858 ; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8i64:
860 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
861 ; RV32-NEXT: vzext.vf2 v14, v12
862 ; RV32-NEXT: vsll.vi v12, v14, 3
863 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
864 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
867 ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8i64:
869 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
870 ; RV64-NEXT: vzext.vf2 v14, v12
871 ; RV64-NEXT: vsll.vi v12, v14, 3
872 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
873 ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
875 %eidxs = zext <8 x i16> %idxs to <8 x i64>
876 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
877 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
881 define void @vpscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
882 ; RV32-LABEL: vpscatter_baseidx_v8i32_v8i64:
884 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
885 ; RV32-NEXT: vsll.vi v12, v12, 3
886 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
887 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
890 ; RV64-LABEL: vpscatter_baseidx_v8i32_v8i64:
892 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
893 ; RV64-NEXT: vsext.vf2 v16, v12
894 ; RV64-NEXT: vsll.vi v12, v16, 3
895 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
896 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
898 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs
899 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
903 define void @vpscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
904 ; RV32-LABEL: vpscatter_baseidx_sext_v8i32_v8i64:
906 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
907 ; RV32-NEXT: vsll.vi v12, v12, 3
908 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
909 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
912 ; RV64-LABEL: vpscatter_baseidx_sext_v8i32_v8i64:
914 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
915 ; RV64-NEXT: vsext.vf2 v16, v12
916 ; RV64-NEXT: vsll.vi v12, v16, 3
917 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
918 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
920 %eidxs = sext <8 x i32> %idxs to <8 x i64>
921 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
922 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
926 define void @vpscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
927 ; RV32-LABEL: vpscatter_baseidx_zext_v8i32_v8i64:
929 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
930 ; RV32-NEXT: vsll.vi v12, v12, 3
931 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
932 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
935 ; RV64-LABEL: vpscatter_baseidx_zext_v8i32_v8i64:
937 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
938 ; RV64-NEXT: vzext.vf2 v16, v12
939 ; RV64-NEXT: vsll.vi v12, v16, 3
940 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
941 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
943 %eidxs = zext <8 x i32> %idxs to <8 x i64>
944 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
945 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
949 define void @vpscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) {
950 ; RV32-LABEL: vpscatter_baseidx_v8i64:
952 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
953 ; RV32-NEXT: vnsrl.wi v16, v12, 0
954 ; RV32-NEXT: vsll.vi v12, v16, 3
955 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
956 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
959 ; RV64-LABEL: vpscatter_baseidx_v8i64:
961 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
962 ; RV64-NEXT: vsll.vi v12, v12, 3
963 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
964 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
966 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
967 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
971 declare void @llvm.vp.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, <2 x i1>, i32)
973 define void @vpscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
974 ; RV32-LABEL: vpscatter_v2f16:
976 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
977 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
980 ; RV64-LABEL: vpscatter_v2f16:
982 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
983 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
985 call void @llvm.vp.scatter.v2f16.v2p0(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
989 declare void @llvm.vp.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, <4 x i1>, i32)
991 define void @vpscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
992 ; RV32-LABEL: vpscatter_v4f16:
994 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
995 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
998 ; RV64-LABEL: vpscatter_v4f16:
1000 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1001 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1003 call void @llvm.vp.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1007 define void @vpscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
1008 ; RV32-LABEL: vpscatter_truemask_v4f16:
1010 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1011 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
1014 ; RV64-LABEL: vpscatter_truemask_v4f16:
1016 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1017 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
1019 call void @llvm.vp.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
1023 declare void @llvm.vp.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, <8 x i1>, i32)
1025 define void @vpscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1026 ; RV32-LABEL: vpscatter_v8f16:
1028 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1029 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1032 ; RV64-LABEL: vpscatter_v8f16:
1034 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1035 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1037 call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1041 define void @vpscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1042 ; RV32-LABEL: vpscatter_baseidx_v8i8_v8f16:
1044 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1045 ; RV32-NEXT: vsext.vf4 v10, v9
1046 ; RV32-NEXT: vadd.vv v10, v10, v10
1047 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1048 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1051 ; RV64-LABEL: vpscatter_baseidx_v8i8_v8f16:
1053 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1054 ; RV64-NEXT: vsext.vf8 v12, v9
1055 ; RV64-NEXT: vadd.vv v12, v12, v12
1056 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1057 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1059 %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
1060 call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1064 define void @vpscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1065 ; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8f16:
1067 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1068 ; RV32-NEXT: vsext.vf4 v10, v9
1069 ; RV32-NEXT: vadd.vv v10, v10, v10
1070 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1071 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1074 ; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8f16:
1076 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1077 ; RV64-NEXT: vsext.vf8 v12, v9
1078 ; RV64-NEXT: vadd.vv v12, v12, v12
1079 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1080 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1082 %eidxs = sext <8 x i8> %idxs to <8 x i16>
1083 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
1084 call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1088 define void @vpscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1089 ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f16:
1091 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1092 ; RV32-NEXT: vwaddu.vv v10, v9, v9
1093 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1094 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1097 ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f16:
1099 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1100 ; RV64-NEXT: vwaddu.vv v10, v9, v9
1101 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1102 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1104 %eidxs = zext <8 x i8> %idxs to <8 x i16>
1105 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
1106 call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1110 define void @vpscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1111 ; RV32-LABEL: vpscatter_baseidx_v8f16:
1113 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1114 ; RV32-NEXT: vwadd.vv v10, v9, v9
1115 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1116 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1119 ; RV64-LABEL: vpscatter_baseidx_v8f16:
1121 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1122 ; RV64-NEXT: vsext.vf4 v12, v9
1123 ; RV64-NEXT: vadd.vv v12, v12, v12
1124 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1125 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1127 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
1128 call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1132 declare void @llvm.vp.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, <2 x i1>, i32)
1134 define void @vpscatter_v2f32(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1135 ; RV32-LABEL: vpscatter_v2f32:
1137 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1138 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1141 ; RV64-LABEL: vpscatter_v2f32:
1143 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1144 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1146 call void @llvm.vp.scatter.v2f32.v2p0(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1150 declare void @llvm.vp.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, <4 x i1>, i32)
1152 define void @vpscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1153 ; RV32-LABEL: vpscatter_v4f32:
1155 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1156 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1159 ; RV64-LABEL: vpscatter_v4f32:
1161 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1162 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1164 call void @llvm.vp.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1168 define void @vpscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
1169 ; RV32-LABEL: vpscatter_truemask_v4f32:
1171 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1172 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
1175 ; RV64-LABEL: vpscatter_truemask_v4f32:
1177 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1178 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
1180 call void @llvm.vp.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
1184 declare void @llvm.vp.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, <8 x i1>, i32)
1186 define void @vpscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1187 ; RV32-LABEL: vpscatter_v8f32:
1189 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1190 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1193 ; RV64-LABEL: vpscatter_v8f32:
1195 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1196 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1198 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1202 define void @vpscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1203 ; RV32-LABEL: vpscatter_baseidx_v8i8_v8f32:
1205 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1206 ; RV32-NEXT: vsext.vf4 v12, v10
1207 ; RV32-NEXT: vsll.vi v10, v12, 2
1208 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1209 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1212 ; RV64-LABEL: vpscatter_baseidx_v8i8_v8f32:
1214 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1215 ; RV64-NEXT: vsext.vf8 v12, v10
1216 ; RV64-NEXT: vsll.vi v12, v12, 2
1217 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1218 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1220 %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
1221 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1225 define void @vpscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1226 ; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8f32:
1228 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1229 ; RV32-NEXT: vsext.vf4 v12, v10
1230 ; RV32-NEXT: vsll.vi v10, v12, 2
1231 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1232 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1235 ; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8f32:
1237 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1238 ; RV64-NEXT: vsext.vf8 v12, v10
1239 ; RV64-NEXT: vsll.vi v12, v12, 2
1240 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1241 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1243 %eidxs = sext <8 x i8> %idxs to <8 x i32>
1244 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1245 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1249 define void @vpscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1250 ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f32:
1252 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1253 ; RV32-NEXT: vzext.vf2 v11, v10
1254 ; RV32-NEXT: vsll.vi v10, v11, 2
1255 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1256 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1259 ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f32:
1261 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1262 ; RV64-NEXT: vzext.vf2 v11, v10
1263 ; RV64-NEXT: vsll.vi v10, v11, 2
1264 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1265 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1267 %eidxs = zext <8 x i8> %idxs to <8 x i32>
1268 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1269 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1273 define void @vpscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1274 ; RV32-LABEL: vpscatter_baseidx_v8i16_v8f32:
1276 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1277 ; RV32-NEXT: vsext.vf2 v12, v10
1278 ; RV32-NEXT: vsll.vi v10, v12, 2
1279 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1280 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1283 ; RV64-LABEL: vpscatter_baseidx_v8i16_v8f32:
1285 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1286 ; RV64-NEXT: vsext.vf4 v12, v10
1287 ; RV64-NEXT: vsll.vi v12, v12, 2
1288 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1289 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1291 %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
1292 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1296 define void @vpscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1297 ; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8f32:
1299 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1300 ; RV32-NEXT: vsext.vf2 v12, v10
1301 ; RV32-NEXT: vsll.vi v10, v12, 2
1302 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1303 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1306 ; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8f32:
1308 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1309 ; RV64-NEXT: vsext.vf4 v12, v10
1310 ; RV64-NEXT: vsll.vi v12, v12, 2
1311 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1312 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1314 %eidxs = sext <8 x i16> %idxs to <8 x i32>
1315 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1316 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1320 define void @vpscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1321 ; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8f32:
1323 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1324 ; RV32-NEXT: vzext.vf2 v12, v10
1325 ; RV32-NEXT: vsll.vi v10, v12, 2
1326 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1327 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1330 ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8f32:
1332 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1333 ; RV64-NEXT: vzext.vf2 v12, v10
1334 ; RV64-NEXT: vsll.vi v10, v12, 2
1335 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1336 ; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1338 %eidxs = zext <8 x i16> %idxs to <8 x i32>
1339 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1340 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1344 define void @vpscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1345 ; RV32-LABEL: vpscatter_baseidx_v8f32:
1347 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1348 ; RV32-NEXT: vsll.vi v10, v10, 2
1349 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1350 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1353 ; RV64-LABEL: vpscatter_baseidx_v8f32:
1355 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1356 ; RV64-NEXT: vsext.vf2 v12, v10
1357 ; RV64-NEXT: vsll.vi v12, v12, 2
1358 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1359 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1361 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
1362 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1366 declare void @llvm.vp.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, <2 x i1>, i32)
1368 define void @vpscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1369 ; RV32-LABEL: vpscatter_v2f64:
1371 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1372 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1375 ; RV64-LABEL: vpscatter_v2f64:
1377 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1378 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1380 call void @llvm.vp.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1384 declare void @llvm.vp.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, <4 x i1>, i32)
1386 define void @vpscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1387 ; RV32-LABEL: vpscatter_v4f64:
1389 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1390 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1393 ; RV64-LABEL: vpscatter_v4f64:
1395 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1396 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1398 call void @llvm.vp.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1402 define void @vpscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
1403 ; RV32-LABEL: vpscatter_truemask_v4f64:
1405 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1406 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
1409 ; RV64-LABEL: vpscatter_truemask_v4f64:
1411 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1412 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
1414 call void @llvm.vp.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
1418 declare void @llvm.vp.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, <8 x i1>, i32)
1420 define void @vpscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1421 ; RV32-LABEL: vpscatter_v8f64:
1423 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1424 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1427 ; RV64-LABEL: vpscatter_v8f64:
1429 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1430 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1432 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1436 define void @vpscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1437 ; RV32-LABEL: vpscatter_baseidx_v8i8_v8f64:
1439 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1440 ; RV32-NEXT: vsext.vf4 v14, v12
1441 ; RV32-NEXT: vsll.vi v12, v14, 3
1442 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1443 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1446 ; RV64-LABEL: vpscatter_baseidx_v8i8_v8f64:
1448 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1449 ; RV64-NEXT: vsext.vf8 v16, v12
1450 ; RV64-NEXT: vsll.vi v12, v16, 3
1451 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1452 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1454 %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
1455 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1459 define void @vpscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1460 ; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8f64:
1462 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1463 ; RV32-NEXT: vsext.vf4 v14, v12
1464 ; RV32-NEXT: vsll.vi v12, v14, 3
1465 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1466 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1469 ; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8f64:
1471 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1472 ; RV64-NEXT: vsext.vf8 v16, v12
1473 ; RV64-NEXT: vsll.vi v12, v16, 3
1474 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1475 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1477 %eidxs = sext <8 x i8> %idxs to <8 x i64>
1478 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1479 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1483 define void @vpscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1484 ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f64:
1486 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1487 ; RV32-NEXT: vzext.vf2 v13, v12
1488 ; RV32-NEXT: vsll.vi v12, v13, 3
1489 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1490 ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1493 ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f64:
1495 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1496 ; RV64-NEXT: vzext.vf2 v13, v12
1497 ; RV64-NEXT: vsll.vi v12, v13, 3
1498 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1499 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1501 %eidxs = zext <8 x i8> %idxs to <8 x i64>
1502 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1503 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1507 define void @vpscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1508 ; RV32-LABEL: vpscatter_baseidx_v8i16_v8f64:
1510 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1511 ; RV32-NEXT: vsext.vf2 v14, v12
1512 ; RV32-NEXT: vsll.vi v12, v14, 3
1513 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1514 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1517 ; RV64-LABEL: vpscatter_baseidx_v8i16_v8f64:
1519 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1520 ; RV64-NEXT: vsext.vf4 v16, v12
1521 ; RV64-NEXT: vsll.vi v12, v16, 3
1522 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1523 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1525 %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
1526 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1530 define void @vpscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1531 ; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8f64:
1533 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1534 ; RV32-NEXT: vsext.vf2 v14, v12
1535 ; RV32-NEXT: vsll.vi v12, v14, 3
1536 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1537 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1540 ; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8f64:
1542 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1543 ; RV64-NEXT: vsext.vf4 v16, v12
1544 ; RV64-NEXT: vsll.vi v12, v16, 3
1545 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1546 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1548 %eidxs = sext <8 x i16> %idxs to <8 x i64>
1549 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1550 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1554 define void @vpscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1555 ; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8f64:
1557 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1558 ; RV32-NEXT: vzext.vf2 v14, v12
1559 ; RV32-NEXT: vsll.vi v12, v14, 3
1560 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1561 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1564 ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8f64:
1566 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1567 ; RV64-NEXT: vzext.vf2 v14, v12
1568 ; RV64-NEXT: vsll.vi v12, v14, 3
1569 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1570 ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1572 %eidxs = zext <8 x i16> %idxs to <8 x i64>
1573 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1574 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1578 define void @vpscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1579 ; RV32-LABEL: vpscatter_baseidx_v8i32_v8f64:
1581 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1582 ; RV32-NEXT: vsll.vi v12, v12, 3
1583 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1584 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1587 ; RV64-LABEL: vpscatter_baseidx_v8i32_v8f64:
1589 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1590 ; RV64-NEXT: vsext.vf2 v16, v12
1591 ; RV64-NEXT: vsll.vi v12, v16, 3
1592 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1593 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1595 %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
1596 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1600 define void @vpscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1601 ; RV32-LABEL: vpscatter_baseidx_sext_v8i32_v8f64:
1603 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1604 ; RV32-NEXT: vsll.vi v12, v12, 3
1605 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1606 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1609 ; RV64-LABEL: vpscatter_baseidx_sext_v8i32_v8f64:
1611 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1612 ; RV64-NEXT: vsext.vf2 v16, v12
1613 ; RV64-NEXT: vsll.vi v12, v16, 3
1614 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1615 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1617 %eidxs = sext <8 x i32> %idxs to <8 x i64>
1618 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1619 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1623 define void @vpscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1624 ; RV32-LABEL: vpscatter_baseidx_zext_v8i32_v8f64:
1626 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1627 ; RV32-NEXT: vsll.vi v12, v12, 3
1628 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1629 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1632 ; RV64-LABEL: vpscatter_baseidx_zext_v8i32_v8f64:
1634 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1635 ; RV64-NEXT: vzext.vf2 v16, v12
1636 ; RV64-NEXT: vsll.vi v12, v16, 3
1637 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1638 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1640 %eidxs = zext <8 x i32> %idxs to <8 x i64>
1641 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1642 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1646 define void @vpscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1647 ; RV32-LABEL: vpscatter_baseidx_v8f64:
1649 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1650 ; RV32-NEXT: vnsrl.wi v16, v12, 0
1651 ; RV32-NEXT: vsll.vi v12, v16, 3
1652 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1653 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1656 ; RV64-LABEL: vpscatter_baseidx_v8f64:
1658 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1659 ; RV64-NEXT: vsll.vi v12, v12, 3
1660 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1661 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1663 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
1664 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1668 declare void @llvm.vp.scatter.v32f64.v32p0(<32 x double>, <32 x ptr>, <32 x i1>, i32)
1670 define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 zeroext %evl) {
1671 ; RV32-LABEL: vpscatter_v32f64:
1673 ; RV32-NEXT: li a2, 32
1674 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
1675 ; RV32-NEXT: vle32.v v24, (a0)
1676 ; RV32-NEXT: li a2, 16
1677 ; RV32-NEXT: mv a0, a1
1678 ; RV32-NEXT: bltu a1, a2, .LBB79_2
1679 ; RV32-NEXT: # %bb.1:
1680 ; RV32-NEXT: li a0, 16
1681 ; RV32-NEXT: .LBB79_2:
1682 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1683 ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t
1684 ; RV32-NEXT: addi a0, a1, -16
1685 ; RV32-NEXT: sltu a1, a1, a0
1686 ; RV32-NEXT: addi a1, a1, -1
1687 ; RV32-NEXT: and a0, a1, a0
1688 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1689 ; RV32-NEXT: vslidedown.vi v0, v0, 2
1690 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1691 ; RV32-NEXT: vslidedown.vi v8, v24, 16
1692 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1693 ; RV32-NEXT: vsoxei32.v v16, (zero), v8, v0.t
1696 ; RV64-LABEL: vpscatter_v32f64:
1698 ; RV64-NEXT: addi sp, sp, -16
1699 ; RV64-NEXT: .cfi_def_cfa_offset 16
1700 ; RV64-NEXT: csrr a1, vlenb
1701 ; RV64-NEXT: slli a1, a1, 3
1702 ; RV64-NEXT: sub sp, sp, a1
1703 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1704 ; RV64-NEXT: addi a1, a0, 128
1705 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1706 ; RV64-NEXT: vle64.v v24, (a1)
1707 ; RV64-NEXT: addi a1, sp, 16
1708 ; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
1709 ; RV64-NEXT: vle64.v v24, (a0)
1710 ; RV64-NEXT: li a1, 16
1711 ; RV64-NEXT: mv a0, a2
1712 ; RV64-NEXT: bltu a2, a1, .LBB79_2
1713 ; RV64-NEXT: # %bb.1:
1714 ; RV64-NEXT: li a0, 16
1715 ; RV64-NEXT: .LBB79_2:
1716 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1717 ; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t
1718 ; RV64-NEXT: addi a0, a2, -16
1719 ; RV64-NEXT: sltu a1, a2, a0
1720 ; RV64-NEXT: addi a1, a1, -1
1721 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1722 ; RV64-NEXT: vslidedown.vi v0, v0, 2
1723 ; RV64-NEXT: and a0, a1, a0
1724 ; RV64-NEXT: addi a1, sp, 16
1725 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
1726 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1727 ; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t
1728 ; RV64-NEXT: csrr a0, vlenb
1729 ; RV64-NEXT: slli a0, a0, 3
1730 ; RV64-NEXT: add sp, sp, a0
1731 ; RV64-NEXT: addi sp, sp, 16
1733 call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
1737 define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
1738 ; RV32-LABEL: vpscatter_baseidx_v32i32_v32f64:
1740 ; RV32-NEXT: li a3, 32
1741 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1742 ; RV32-NEXT: vle32.v v24, (a1)
1743 ; RV32-NEXT: li a3, 16
1744 ; RV32-NEXT: vsll.vi v24, v24, 3
1745 ; RV32-NEXT: mv a1, a2
1746 ; RV32-NEXT: bltu a2, a3, .LBB80_2
1747 ; RV32-NEXT: # %bb.1:
1748 ; RV32-NEXT: li a1, 16
1749 ; RV32-NEXT: .LBB80_2:
1750 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1751 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
1752 ; RV32-NEXT: addi a1, a2, -16
1753 ; RV32-NEXT: sltu a2, a2, a1
1754 ; RV32-NEXT: addi a2, a2, -1
1755 ; RV32-NEXT: and a1, a2, a1
1756 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1757 ; RV32-NEXT: vslidedown.vi v0, v0, 2
1758 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1759 ; RV32-NEXT: vslidedown.vi v8, v24, 16
1760 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1761 ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t
1764 ; RV64-LABEL: vpscatter_baseidx_v32i32_v32f64:
1766 ; RV64-NEXT: addi sp, sp, -16
1767 ; RV64-NEXT: .cfi_def_cfa_offset 16
1768 ; RV64-NEXT: csrr a3, vlenb
1769 ; RV64-NEXT: slli a3, a3, 3
1770 ; RV64-NEXT: sub sp, sp, a3
1771 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1772 ; RV64-NEXT: li a3, 32
1773 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1774 ; RV64-NEXT: vle32.v v24, (a1)
1775 ; RV64-NEXT: vmv1r.v v7, v0
1776 ; RV64-NEXT: addi a1, sp, 16
1777 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1778 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1779 ; RV64-NEXT: vslidedown.vi v16, v24, 16
1780 ; RV64-NEXT: vmv4r.v v0, v24
1781 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1782 ; RV64-NEXT: vsext.vf2 v24, v16
1783 ; RV64-NEXT: vsll.vi v16, v24, 3
1784 ; RV64-NEXT: vsext.vf2 v24, v0
1785 ; RV64-NEXT: li a3, 16
1786 ; RV64-NEXT: vsll.vi v24, v24, 3
1787 ; RV64-NEXT: mv a1, a2
1788 ; RV64-NEXT: bltu a2, a3, .LBB80_2
1789 ; RV64-NEXT: # %bb.1:
1790 ; RV64-NEXT: li a1, 16
1791 ; RV64-NEXT: .LBB80_2:
1792 ; RV64-NEXT: vmv1r.v v0, v7
1793 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1794 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
1795 ; RV64-NEXT: addi a1, a2, -16
1796 ; RV64-NEXT: sltu a2, a2, a1
1797 ; RV64-NEXT: addi a2, a2, -1
1798 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1799 ; RV64-NEXT: vslidedown.vi v0, v7, 2
1800 ; RV64-NEXT: and a1, a2, a1
1801 ; RV64-NEXT: addi a2, sp, 16
1802 ; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
1803 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1804 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1805 ; RV64-NEXT: csrr a0, vlenb
1806 ; RV64-NEXT: slli a0, a0, 3
1807 ; RV64-NEXT: add sp, sp, a0
1808 ; RV64-NEXT: addi sp, sp, 16
1810 %ptrs = getelementptr inbounds double, ptr %base, <32 x i32> %idxs
1811 call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
1815 define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
1816 ; RV32-LABEL: vpscatter_baseidx_sext_v32i32_v32f64:
1818 ; RV32-NEXT: li a3, 32
1819 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1820 ; RV32-NEXT: vle32.v v24, (a1)
1821 ; RV32-NEXT: li a3, 16
1822 ; RV32-NEXT: vsll.vi v24, v24, 3
1823 ; RV32-NEXT: mv a1, a2
1824 ; RV32-NEXT: bltu a2, a3, .LBB81_2
1825 ; RV32-NEXT: # %bb.1:
1826 ; RV32-NEXT: li a1, 16
1827 ; RV32-NEXT: .LBB81_2:
1828 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1829 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
1830 ; RV32-NEXT: addi a1, a2, -16
1831 ; RV32-NEXT: sltu a2, a2, a1
1832 ; RV32-NEXT: addi a2, a2, -1
1833 ; RV32-NEXT: and a1, a2, a1
1834 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1835 ; RV32-NEXT: vslidedown.vi v0, v0, 2
1836 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1837 ; RV32-NEXT: vslidedown.vi v8, v24, 16
1838 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1839 ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t
1842 ; RV64-LABEL: vpscatter_baseidx_sext_v32i32_v32f64:
1844 ; RV64-NEXT: addi sp, sp, -16
1845 ; RV64-NEXT: .cfi_def_cfa_offset 16
1846 ; RV64-NEXT: csrr a3, vlenb
1847 ; RV64-NEXT: li a4, 10
1848 ; RV64-NEXT: mul a3, a3, a4
1849 ; RV64-NEXT: sub sp, sp, a3
1850 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
1851 ; RV64-NEXT: li a3, 32
1852 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1853 ; RV64-NEXT: vle32.v v24, (a1)
1854 ; RV64-NEXT: addi a1, sp, 16
1855 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
1856 ; RV64-NEXT: csrr a1, vlenb
1857 ; RV64-NEXT: add a1, sp, a1
1858 ; RV64-NEXT: addi a1, a1, 16
1859 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1860 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1861 ; RV64-NEXT: vsext.vf2 v0, v24
1862 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1863 ; RV64-NEXT: vslidedown.vi v24, v24, 16
1864 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1865 ; RV64-NEXT: vsext.vf2 v16, v24
1866 ; RV64-NEXT: vsll.vi v16, v16, 3
1867 ; RV64-NEXT: li a3, 16
1868 ; RV64-NEXT: vsll.vi v24, v0, 3
1869 ; RV64-NEXT: mv a1, a2
1870 ; RV64-NEXT: bltu a2, a3, .LBB81_2
1871 ; RV64-NEXT: # %bb.1:
1872 ; RV64-NEXT: li a1, 16
1873 ; RV64-NEXT: .LBB81_2:
1874 ; RV64-NEXT: addi a3, sp, 16
1875 ; RV64-NEXT: vl1r.v v0, (a3) # Unknown-size Folded Reload
1876 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1877 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
1878 ; RV64-NEXT: addi a1, a2, -16
1879 ; RV64-NEXT: sltu a2, a2, a1
1880 ; RV64-NEXT: addi a2, a2, -1
1881 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1882 ; RV64-NEXT: vslidedown.vi v0, v0, 2
1883 ; RV64-NEXT: and a1, a2, a1
1884 ; RV64-NEXT: csrr a2, vlenb
1885 ; RV64-NEXT: add a2, sp, a2
1886 ; RV64-NEXT: addi a2, a2, 16
1887 ; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
1888 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1889 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1890 ; RV64-NEXT: csrr a0, vlenb
1891 ; RV64-NEXT: li a1, 10
1892 ; RV64-NEXT: mul a0, a0, a1
1893 ; RV64-NEXT: add sp, sp, a0
1894 ; RV64-NEXT: addi sp, sp, 16
1896 %eidxs = sext <32 x i32> %idxs to <32 x i64>
1897 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
1898 call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
1902 define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
1903 ; RV32-LABEL: vpscatter_baseidx_zext_v32i32_v32f64:
1905 ; RV32-NEXT: li a3, 32
1906 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1907 ; RV32-NEXT: vle32.v v24, (a1)
1908 ; RV32-NEXT: li a3, 16
1909 ; RV32-NEXT: vsll.vi v24, v24, 3
1910 ; RV32-NEXT: mv a1, a2
1911 ; RV32-NEXT: bltu a2, a3, .LBB82_2
1912 ; RV32-NEXT: # %bb.1:
1913 ; RV32-NEXT: li a1, 16
1914 ; RV32-NEXT: .LBB82_2:
1915 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1916 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
1917 ; RV32-NEXT: addi a1, a2, -16
1918 ; RV32-NEXT: sltu a2, a2, a1
1919 ; RV32-NEXT: addi a2, a2, -1
1920 ; RV32-NEXT: and a1, a2, a1
1921 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1922 ; RV32-NEXT: vslidedown.vi v0, v0, 2
1923 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1924 ; RV32-NEXT: vslidedown.vi v8, v24, 16
1925 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1926 ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t
1929 ; RV64-LABEL: vpscatter_baseidx_zext_v32i32_v32f64:
1931 ; RV64-NEXT: addi sp, sp, -16
1932 ; RV64-NEXT: .cfi_def_cfa_offset 16
1933 ; RV64-NEXT: csrr a3, vlenb
1934 ; RV64-NEXT: li a4, 10
1935 ; RV64-NEXT: mul a3, a3, a4
1936 ; RV64-NEXT: sub sp, sp, a3
1937 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
1938 ; RV64-NEXT: li a3, 32
1939 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1940 ; RV64-NEXT: vle32.v v24, (a1)
1941 ; RV64-NEXT: addi a1, sp, 16
1942 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
1943 ; RV64-NEXT: csrr a1, vlenb
1944 ; RV64-NEXT: add a1, sp, a1
1945 ; RV64-NEXT: addi a1, a1, 16
1946 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1947 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1948 ; RV64-NEXT: vzext.vf2 v0, v24
1949 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1950 ; RV64-NEXT: vslidedown.vi v24, v24, 16
1951 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1952 ; RV64-NEXT: vzext.vf2 v16, v24
1953 ; RV64-NEXT: vsll.vi v16, v16, 3
1954 ; RV64-NEXT: li a3, 16
1955 ; RV64-NEXT: vsll.vi v24, v0, 3
1956 ; RV64-NEXT: mv a1, a2
1957 ; RV64-NEXT: bltu a2, a3, .LBB82_2
1958 ; RV64-NEXT: # %bb.1:
1959 ; RV64-NEXT: li a1, 16
1960 ; RV64-NEXT: .LBB82_2:
1961 ; RV64-NEXT: addi a3, sp, 16
1962 ; RV64-NEXT: vl1r.v v0, (a3) # Unknown-size Folded Reload
1963 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1964 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
1965 ; RV64-NEXT: addi a1, a2, -16
1966 ; RV64-NEXT: sltu a2, a2, a1
1967 ; RV64-NEXT: addi a2, a2, -1
1968 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1969 ; RV64-NEXT: vslidedown.vi v0, v0, 2
1970 ; RV64-NEXT: and a1, a2, a1
1971 ; RV64-NEXT: csrr a2, vlenb
1972 ; RV64-NEXT: add a2, sp, a2
1973 ; RV64-NEXT: addi a2, a2, 16
1974 ; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
1975 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1976 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1977 ; RV64-NEXT: csrr a0, vlenb
1978 ; RV64-NEXT: li a1, 10
1979 ; RV64-NEXT: mul a0, a0, a1
1980 ; RV64-NEXT: add sp, sp, a0
1981 ; RV64-NEXT: addi sp, sp, 16
1983 %eidxs = zext <32 x i32> %idxs to <32 x i64>
1984 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
1985 call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)