1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v,+m \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v,+m \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
6 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v,+m \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v,+m \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
11 declare void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
13 define void @vpscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
14 ; RV32-LABEL: vpscatter_nxv1i8:
16 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
17 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
20 ; RV64-LABEL: vpscatter_nxv1i8:
22 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
23 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
25 call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
29 declare void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
31 define void @vpscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
32 ; RV32-LABEL: vpscatter_nxv2i8:
34 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
35 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
38 ; RV64-LABEL: vpscatter_nxv2i8:
40 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
41 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
43 call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
47 define void @vpscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
48 ; RV32-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
50 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
51 ; RV32-NEXT: vnsrl.wi v8, v8, 0
52 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
55 ; RV64-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
57 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
58 ; RV64-NEXT: vnsrl.wi v8, v8, 0
59 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
61 %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8>
62 call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
66 define void @vpscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
67 ; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
69 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
70 ; RV32-NEXT: vnsrl.wi v8, v8, 0
71 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
72 ; RV32-NEXT: vnsrl.wi v8, v8, 0
73 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
76 ; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
78 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
79 ; RV64-NEXT: vnsrl.wi v8, v8, 0
80 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
81 ; RV64-NEXT: vnsrl.wi v8, v8, 0
82 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
84 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
85 call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
89 define void @vpscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
90 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
92 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
93 ; RV32-NEXT: vnsrl.wi v11, v8, 0
94 ; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
95 ; RV32-NEXT: vnsrl.wi v8, v11, 0
96 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
97 ; RV32-NEXT: vnsrl.wi v8, v8, 0
98 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
101 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
103 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
104 ; RV64-NEXT: vnsrl.wi v12, v8, 0
105 ; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
106 ; RV64-NEXT: vnsrl.wi v8, v12, 0
107 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
108 ; RV64-NEXT: vnsrl.wi v8, v8, 0
109 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
111 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
112 call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
116 declare void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
118 define void @vpscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
119 ; RV32-LABEL: vpscatter_nxv4i8:
121 ; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
122 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
125 ; RV64-LABEL: vpscatter_nxv4i8:
127 ; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
128 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
130 call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
134 define void @vpscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
135 ; RV32-LABEL: vpscatter_truemask_nxv4i8:
137 ; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
138 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
141 ; RV64-LABEL: vpscatter_truemask_nxv4i8:
143 ; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
144 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
146 call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
150 declare void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
152 define void @vpscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
153 ; RV32-LABEL: vpscatter_nxv8i8:
155 ; RV32-NEXT: vsetvli zero, a0, e8, m1, ta, ma
156 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
159 ; RV64-LABEL: vpscatter_nxv8i8:
161 ; RV64-NEXT: vsetvli zero, a0, e8, m1, ta, ma
162 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
164 call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
168 define void @vpscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
169 ; RV32-LABEL: vpscatter_baseidx_nxv8i8:
171 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
172 ; RV32-NEXT: vsext.vf4 v12, v9
173 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
174 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
177 ; RV64-LABEL: vpscatter_baseidx_nxv8i8:
179 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
180 ; RV64-NEXT: vsext.vf8 v16, v9
181 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
182 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
184 %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
185 call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
189 declare void @llvm.vp.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
191 define void @vpscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
192 ; RV32-LABEL: vpscatter_nxv1i16:
194 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
195 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
198 ; RV64-LABEL: vpscatter_nxv1i16:
200 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
201 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
203 call void @llvm.vp.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
207 declare void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
209 define void @vpscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
210 ; RV32-LABEL: vpscatter_nxv2i16:
212 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
213 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
216 ; RV64-LABEL: vpscatter_nxv2i16:
218 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
219 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
221 call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
225 define void @vpscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
226 ; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
228 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
229 ; RV32-NEXT: vnsrl.wi v8, v8, 0
230 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
233 ; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
235 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
236 ; RV64-NEXT: vnsrl.wi v8, v8, 0
237 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
239 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16>
240 call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
244 define void @vpscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
245 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
247 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
248 ; RV32-NEXT: vnsrl.wi v11, v8, 0
249 ; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
250 ; RV32-NEXT: vnsrl.wi v8, v11, 0
251 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
254 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
256 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
257 ; RV64-NEXT: vnsrl.wi v12, v8, 0
258 ; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
259 ; RV64-NEXT: vnsrl.wi v8, v12, 0
260 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
262 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
263 call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
267 declare void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
269 define void @vpscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
270 ; RV32-LABEL: vpscatter_nxv4i16:
272 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
273 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
276 ; RV64-LABEL: vpscatter_nxv4i16:
278 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
279 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
281 call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
285 define void @vpscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
286 ; RV32-LABEL: vpscatter_truemask_nxv4i16:
288 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
289 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
292 ; RV64-LABEL: vpscatter_truemask_nxv4i16:
294 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
295 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
297 call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
301 declare void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
303 define void @vpscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
304 ; RV32-LABEL: vpscatter_nxv8i16:
306 ; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
307 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
310 ; RV64-LABEL: vpscatter_nxv8i16:
312 ; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
313 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
315 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
319 define void @vpscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
320 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16:
322 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
323 ; RV32-NEXT: vsext.vf4 v12, v10
324 ; RV32-NEXT: vadd.vv v12, v12, v12
325 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
326 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
329 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16:
331 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
332 ; RV64-NEXT: vsext.vf8 v16, v10
333 ; RV64-NEXT: vadd.vv v16, v16, v16
334 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
335 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
337 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
338 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
342 define void @vpscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
343 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16:
345 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
346 ; RV32-NEXT: vsext.vf4 v12, v10
347 ; RV32-NEXT: vadd.vv v12, v12, v12
348 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
349 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
352 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16:
354 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
355 ; RV64-NEXT: vsext.vf8 v16, v10
356 ; RV64-NEXT: vadd.vv v16, v16, v16
357 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
358 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
360 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
361 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
362 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
366 define void @vpscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
367 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16:
369 ; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
370 ; RV32-NEXT: vwaddu.vv v12, v10, v10
371 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
372 ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
375 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16:
377 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
378 ; RV64-NEXT: vwaddu.vv v12, v10, v10
379 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
380 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
382 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
383 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
384 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
388 define void @vpscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
389 ; RV32-LABEL: vpscatter_baseidx_nxv8i16:
391 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
392 ; RV32-NEXT: vwadd.vv v12, v10, v10
393 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
396 ; RV64-LABEL: vpscatter_baseidx_nxv8i16:
398 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
399 ; RV64-NEXT: vsext.vf4 v16, v10
400 ; RV64-NEXT: vadd.vv v16, v16, v16
401 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
402 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
404 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
405 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
409 declare <vscale x 8 x i32> @llvm.vp.sext.nxv8i16.nxv8i32(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
410 define void @vpscatter_baseidx_vpsext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
411 ; RV32-LABEL: vpscatter_baseidx_vpsext_nxv8i16_nxv8i16:
413 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
414 ; RV32-NEXT: vwadd.vv v12, v10, v10, v0.t
415 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
418 ; RV64-LABEL: vpscatter_baseidx_vpsext_nxv8i16_nxv8i16:
420 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
421 ; RV64-NEXT: vsext.vf2 v12, v10, v0.t
422 ; RV64-NEXT: vwadd.vv v16, v12, v12
423 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
424 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
426 %eidxs = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl)
427 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i32> %eidxs
428 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
432 declare <vscale x 8 x i32> @llvm.vp.zext.nxv8i16.nxv8i32(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
433 define void @vpscatter_baseidx_vpzext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
434 ; RV32-LABEL: vpscatter_baseidx_vpzext_nxv8i16_nxv8i16:
436 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
437 ; RV32-NEXT: vwaddu.vv v12, v10, v10, v0.t
438 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
441 ; RV64-LABEL: vpscatter_baseidx_vpzext_nxv8i16_nxv8i16:
443 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
444 ; RV64-NEXT: vzext.vf2 v12, v10, v0.t
445 ; RV64-NEXT: vwadd.vv v16, v12, v12
446 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
447 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
449 %eidxs = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl)
450 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i32> %eidxs
451 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
455 declare <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
456 define void @vpscatter_baseidx_vpsext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
457 ; RV32-LABEL: vpscatter_baseidx_vpsext_nxv8i32_nxv8i16:
459 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
460 ; RV32-NEXT: vsext.vf2 v16, v12, v0.t
461 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
462 ; RV32-NEXT: vnsrl.wi v12, v16, 0
463 ; RV32-NEXT: vadd.vv v12, v12, v12
464 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
465 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
468 ; RV64-LABEL: vpscatter_baseidx_vpsext_nxv8i32_nxv8i16:
470 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
471 ; RV64-NEXT: vwadd.vv v16, v12, v12, v0.t
472 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
473 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
475 %eidxs = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 %evl)
476 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i64> %eidxs
477 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
481 declare <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
482 define void @vpscatter_baseidx_vpzext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
483 ; RV32-LABEL: vpscatter_baseidx_vpzext_nxv8i32_nxv8i16:
485 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
486 ; RV32-NEXT: vzext.vf2 v16, v12, v0.t
487 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
488 ; RV32-NEXT: vnsrl.wi v12, v16, 0
489 ; RV32-NEXT: vadd.vv v12, v12, v12
490 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
491 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
494 ; RV64-LABEL: vpscatter_baseidx_vpzext_nxv8i32_nxv8i16:
496 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
497 ; RV64-NEXT: vwaddu.vv v16, v12, v12, v0.t
498 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
499 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
501 %eidxs = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 %evl)
502 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i64> %eidxs
503 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
507 declare void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
509 define void @vpscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
510 ; RV32-LABEL: vpscatter_nxv1i32:
512 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
513 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
516 ; RV64-LABEL: vpscatter_nxv1i32:
518 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
519 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
521 call void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
525 declare void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
527 define void @vpscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
528 ; RV32-LABEL: vpscatter_nxv2i32:
530 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
531 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
534 ; RV64-LABEL: vpscatter_nxv2i32:
536 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
537 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
539 call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
543 define void @vpscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
544 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
546 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
547 ; RV32-NEXT: vnsrl.wi v11, v8, 0
548 ; RV32-NEXT: vsoxei32.v v11, (zero), v10, v0.t
551 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
553 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
554 ; RV64-NEXT: vnsrl.wi v12, v8, 0
555 ; RV64-NEXT: vsoxei64.v v12, (zero), v10, v0.t
557 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
558 call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
562 declare void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
564 define void @vpscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
565 ; RV32-LABEL: vpscatter_nxv4i32:
567 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
568 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
571 ; RV64-LABEL: vpscatter_nxv4i32:
573 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
574 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
576 call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
580 define void @vpscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
581 ; RV32-LABEL: vpscatter_truemask_nxv4i32:
583 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
584 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
587 ; RV64-LABEL: vpscatter_truemask_nxv4i32:
589 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
590 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
592 call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
596 declare void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
598 define void @vpscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
599 ; RV32-LABEL: vpscatter_nxv8i32:
601 ; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
602 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
605 ; RV64-LABEL: vpscatter_nxv8i32:
607 ; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
608 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
610 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
614 define void @vpscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
615 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32:
617 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
618 ; RV32-NEXT: vsext.vf4 v16, v12
619 ; RV32-NEXT: vsll.vi v12, v16, 2
620 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
623 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32:
625 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
626 ; RV64-NEXT: vsext.vf8 v16, v12
627 ; RV64-NEXT: vsll.vi v16, v16, 2
628 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
629 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
631 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
632 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
636 define void @vpscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
637 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32:
639 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
640 ; RV32-NEXT: vsext.vf4 v16, v12
641 ; RV32-NEXT: vsll.vi v12, v16, 2
642 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
645 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32:
647 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
648 ; RV64-NEXT: vsext.vf8 v16, v12
649 ; RV64-NEXT: vsll.vi v16, v16, 2
650 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
651 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
653 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
654 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
655 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
659 define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
660 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32:
662 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
663 ; RV32-NEXT: vzext.vf2 v14, v12
664 ; RV32-NEXT: vsll.vi v12, v14, 2
665 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
666 ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
669 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32:
671 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
672 ; RV64-NEXT: vzext.vf2 v14, v12
673 ; RV64-NEXT: vsll.vi v12, v14, 2
674 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
675 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
677 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
678 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
679 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
683 define void @vpscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
684 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32:
686 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
687 ; RV32-NEXT: vsext.vf2 v16, v12
688 ; RV32-NEXT: vsll.vi v12, v16, 2
689 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
692 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32:
694 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
695 ; RV64-NEXT: vsext.vf4 v16, v12
696 ; RV64-NEXT: vsll.vi v16, v16, 2
697 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
698 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
700 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
701 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
705 define void @vpscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
706 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32:
708 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
709 ; RV32-NEXT: vsext.vf2 v16, v12
710 ; RV32-NEXT: vsll.vi v12, v16, 2
711 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
714 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32:
716 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
717 ; RV64-NEXT: vsext.vf4 v16, v12
718 ; RV64-NEXT: vsll.vi v16, v16, 2
719 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
720 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
722 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
723 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
724 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
728 define void @vpscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
729 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32:
731 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
732 ; RV32-NEXT: vzext.vf2 v16, v12
733 ; RV32-NEXT: vsll.vi v12, v16, 2
734 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
737 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32:
739 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
740 ; RV64-NEXT: vzext.vf2 v16, v12
741 ; RV64-NEXT: vsll.vi v12, v16, 2
742 ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
744 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
745 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
746 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
750 define void @vpscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
751 ; RV32-LABEL: vpscatter_baseidx_nxv8i32:
753 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
754 ; RV32-NEXT: vsll.vi v12, v12, 2
755 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
758 ; RV64-LABEL: vpscatter_baseidx_nxv8i32:
760 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
761 ; RV64-NEXT: vsext.vf2 v16, v12
762 ; RV64-NEXT: vsll.vi v16, v16, 2
763 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
764 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
766 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
767 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
771 declare void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
773 define void @vpscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
774 ; RV32-LABEL: vpscatter_nxv1i64:
776 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
777 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
780 ; RV64-LABEL: vpscatter_nxv1i64:
782 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
783 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
785 call void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
789 declare void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
791 define void @vpscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
792 ; RV32-LABEL: vpscatter_nxv2i64:
794 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
795 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
798 ; RV64-LABEL: vpscatter_nxv2i64:
800 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
801 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
803 call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
807 declare void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
809 define void @vpscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
810 ; RV32-LABEL: vpscatter_nxv4i64:
812 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
813 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
816 ; RV64-LABEL: vpscatter_nxv4i64:
818 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
819 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
821 call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
825 define void @vpscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
826 ; RV32-LABEL: vpscatter_truemask_nxv4i64:
828 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
829 ; RV32-NEXT: vsoxei32.v v8, (zero), v12
832 ; RV64-LABEL: vpscatter_truemask_nxv4i64:
834 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
835 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
837 call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
841 declare void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
843 define void @vpscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
844 ; RV32-LABEL: vpscatter_nxv8i64:
846 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
847 ; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t
850 ; RV64-LABEL: vpscatter_nxv8i64:
852 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
853 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
855 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
859 define void @vpscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
860 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64:
862 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
863 ; RV32-NEXT: vsext.vf4 v20, v16
864 ; RV32-NEXT: vsll.vi v16, v20, 3
865 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
866 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
869 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64:
871 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
872 ; RV64-NEXT: vsext.vf8 v24, v16
873 ; RV64-NEXT: vsll.vi v16, v24, 3
874 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
876 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
877 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
881 define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
882 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64:
884 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
885 ; RV32-NEXT: vsext.vf4 v20, v16
886 ; RV32-NEXT: vsll.vi v16, v20, 3
887 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
888 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
891 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64:
893 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
894 ; RV64-NEXT: vsext.vf8 v24, v16
895 ; RV64-NEXT: vsll.vi v16, v24, 3
896 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
898 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
899 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
900 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
904 define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
905 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64:
907 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
908 ; RV32-NEXT: vzext.vf2 v18, v16
909 ; RV32-NEXT: vsll.vi v16, v18, 3
910 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
911 ; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t
914 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64:
916 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
917 ; RV64-NEXT: vzext.vf2 v18, v16
918 ; RV64-NEXT: vsll.vi v16, v18, 3
919 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
920 ; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t
922 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
923 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
924 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
928 define void @vpscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
929 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64:
931 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
932 ; RV32-NEXT: vsext.vf2 v20, v16
933 ; RV32-NEXT: vsll.vi v16, v20, 3
934 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
935 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
938 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64:
940 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
941 ; RV64-NEXT: vsext.vf4 v24, v16
942 ; RV64-NEXT: vsll.vi v16, v24, 3
943 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
945 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
946 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
950 define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
951 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64:
953 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
954 ; RV32-NEXT: vsext.vf2 v20, v16
955 ; RV32-NEXT: vsll.vi v16, v20, 3
956 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
957 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
960 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64:
962 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
963 ; RV64-NEXT: vsext.vf4 v24, v16
964 ; RV64-NEXT: vsll.vi v16, v24, 3
965 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
967 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
968 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
969 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
973 define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
974 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64:
976 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
977 ; RV32-NEXT: vzext.vf2 v20, v16
978 ; RV32-NEXT: vsll.vi v16, v20, 3
979 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
980 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
983 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64:
985 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
986 ; RV64-NEXT: vzext.vf2 v20, v16
987 ; RV64-NEXT: vsll.vi v16, v20, 3
988 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
989 ; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t
991 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
992 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
993 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
997 define void @vpscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
998 ; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64:
1000 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1001 ; RV32-NEXT: vsll.vi v16, v16, 3
1002 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1003 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1006 ; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64:
1008 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1009 ; RV64-NEXT: vsext.vf2 v24, v16
1010 ; RV64-NEXT: vsll.vi v16, v24, 3
1011 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1013 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
1014 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1018 define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1019 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64:
1021 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1022 ; RV32-NEXT: vsll.vi v16, v16, 3
1023 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1024 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1027 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64:
1029 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1030 ; RV64-NEXT: vsext.vf2 v24, v16
1031 ; RV64-NEXT: vsll.vi v16, v24, 3
1032 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1034 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1035 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1036 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1040 define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1041 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64:
1043 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1044 ; RV32-NEXT: vsll.vi v16, v16, 3
1045 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1046 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1049 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64:
1051 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1052 ; RV64-NEXT: vzext.vf2 v24, v16
1053 ; RV64-NEXT: vsll.vi v16, v24, 3
1054 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1056 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1057 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1058 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1062 define void @vpscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1063 ; RV32-LABEL: vpscatter_baseidx_nxv8i64:
1065 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1066 ; RV32-NEXT: vnsrl.wi v24, v16, 0
1067 ; RV32-NEXT: vsll.vi v16, v24, 3
1068 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1069 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1072 ; RV64-LABEL: vpscatter_baseidx_nxv8i64:
1074 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1075 ; RV64-NEXT: vsll.vi v16, v16, 3
1076 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1078 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
1079 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1083 declare void @llvm.vp.scatter.nxv1bf16.nxv1p0(<vscale x 1 x bfloat>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
1085 define void @vpscatter_nxv1bf16(<vscale x 1 x bfloat> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1086 ; RV32-LABEL: vpscatter_nxv1bf16:
1088 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1089 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1092 ; RV64-LABEL: vpscatter_nxv1bf16:
1094 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1095 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1097 call void @llvm.vp.scatter.nxv1bf16.nxv1p0(<vscale x 1 x bfloat> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1101 declare void @llvm.vp.scatter.nxv2bf16.nxv2p0(<vscale x 2 x bfloat>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
1103 define void @vpscatter_nxv2bf16(<vscale x 2 x bfloat> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1104 ; RV32-LABEL: vpscatter_nxv2bf16:
1106 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1107 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1110 ; RV64-LABEL: vpscatter_nxv2bf16:
1112 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1113 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1115 call void @llvm.vp.scatter.nxv2bf16.nxv2p0(<vscale x 2 x bfloat> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1119 declare void @llvm.vp.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
1121 define void @vpscatter_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1122 ; RV32-LABEL: vpscatter_nxv4bf16:
1124 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1125 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1128 ; RV64-LABEL: vpscatter_nxv4bf16:
1130 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1131 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1133 call void @llvm.vp.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1137 define void @vpscatter_truemask_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
1138 ; RV32-LABEL: vpscatter_truemask_nxv4bf16:
1140 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1141 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
1144 ; RV64-LABEL: vpscatter_truemask_nxv4bf16:
1146 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1147 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1149 call void @llvm.vp.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
1153 declare void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
1155 define void @vpscatter_nxv8bf16(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1156 ; RV32-LABEL: vpscatter_nxv8bf16:
1158 ; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
1159 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1162 ; RV64-LABEL: vpscatter_nxv8bf16:
1164 ; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
1165 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1167 call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1171 define void @vpscatter_baseidx_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1172 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16:
1174 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1175 ; RV32-NEXT: vsext.vf4 v12, v10
1176 ; RV32-NEXT: vadd.vv v12, v12, v12
1177 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1178 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1181 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16:
1183 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1184 ; RV64-NEXT: vsext.vf8 v16, v10
1185 ; RV64-NEXT: vadd.vv v16, v16, v16
1186 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1187 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1189 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i8> %idxs
1190 call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1194 define void @vpscatter_baseidx_sext_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1195 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16:
1197 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1198 ; RV32-NEXT: vsext.vf4 v12, v10
1199 ; RV32-NEXT: vadd.vv v12, v12, v12
1200 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1201 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1204 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16:
1206 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1207 ; RV64-NEXT: vsext.vf8 v16, v10
1208 ; RV64-NEXT: vadd.vv v16, v16, v16
1209 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1210 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1212 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1213 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
1214 call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1218 define void @vpscatter_baseidx_zext_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1219 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16:
1221 ; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
1222 ; RV32-NEXT: vwaddu.vv v12, v10, v10
1223 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1224 ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1227 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16:
1229 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
1230 ; RV64-NEXT: vwaddu.vv v12, v10, v10
1231 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1232 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1234 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1235 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
1236 call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1240 define void @vpscatter_baseidx_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1241 ; RV32-LABEL: vpscatter_baseidx_nxv8bf16:
1243 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
1244 ; RV32-NEXT: vwadd.vv v12, v10, v10
1245 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1248 ; RV64-LABEL: vpscatter_baseidx_nxv8bf16:
1250 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1251 ; RV64-NEXT: vsext.vf4 v16, v10
1252 ; RV64-NEXT: vadd.vv v16, v16, v16
1253 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1254 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1256 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %idxs
1257 call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1261 declare void @llvm.vp.scatter.nxv1f16.nxv1p0(<vscale x 1 x half>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
1263 define void @vpscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1264 ; RV32-LABEL: vpscatter_nxv1f16:
1266 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1267 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1270 ; RV64-LABEL: vpscatter_nxv1f16:
1272 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1273 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1275 call void @llvm.vp.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1279 declare void @llvm.vp.scatter.nxv2f16.nxv2p0(<vscale x 2 x half>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
1281 define void @vpscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1282 ; RV32-LABEL: vpscatter_nxv2f16:
1284 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1285 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1288 ; RV64-LABEL: vpscatter_nxv2f16:
1290 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1291 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1293 call void @llvm.vp.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1297 declare void @llvm.vp.scatter.nxv4f16.nxv4p0(<vscale x 4 x half>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
1299 define void @vpscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1300 ; RV32-LABEL: vpscatter_nxv4f16:
1302 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1303 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1306 ; RV64-LABEL: vpscatter_nxv4f16:
1308 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1309 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1311 call void @llvm.vp.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1315 define void @vpscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
1316 ; RV32-LABEL: vpscatter_truemask_nxv4f16:
1318 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1319 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
1322 ; RV64-LABEL: vpscatter_truemask_nxv4f16:
1324 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1325 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1327 call void @llvm.vp.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
1331 declare void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
1333 define void @vpscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1334 ; RV32-LABEL: vpscatter_nxv8f16:
1336 ; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
1337 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1340 ; RV64-LABEL: vpscatter_nxv8f16:
1342 ; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
1343 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1345 call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1349 define void @vpscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1350 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16:
1352 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1353 ; RV32-NEXT: vsext.vf4 v12, v10
1354 ; RV32-NEXT: vadd.vv v12, v12, v12
1355 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1356 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1359 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16:
1361 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1362 ; RV64-NEXT: vsext.vf8 v16, v10
1363 ; RV64-NEXT: vadd.vv v16, v16, v16
1364 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1365 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1367 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
1368 call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1372 define void @vpscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1373 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16:
1375 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1376 ; RV32-NEXT: vsext.vf4 v12, v10
1377 ; RV32-NEXT: vadd.vv v12, v12, v12
1378 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1379 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1382 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16:
1384 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1385 ; RV64-NEXT: vsext.vf8 v16, v10
1386 ; RV64-NEXT: vadd.vv v16, v16, v16
1387 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1388 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1390 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1391 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1392 call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1396 define void @vpscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1397 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16:
1399 ; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
1400 ; RV32-NEXT: vwaddu.vv v12, v10, v10
1401 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1402 ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1405 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16:
1407 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
1408 ; RV64-NEXT: vwaddu.vv v12, v10, v10
1409 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1410 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1412 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1413 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1414 call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1418 define void @vpscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1419 ; RV32-LABEL: vpscatter_baseidx_nxv8f16:
1421 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
1422 ; RV32-NEXT: vwadd.vv v12, v10, v10
1423 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1426 ; RV64-LABEL: vpscatter_baseidx_nxv8f16:
1428 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1429 ; RV64-NEXT: vsext.vf4 v16, v10
1430 ; RV64-NEXT: vadd.vv v16, v16, v16
1431 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1432 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1434 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
1435 call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1439 declare void @llvm.vp.scatter.nxv1f32.nxv1p0(<vscale x 1 x float>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
1441 define void @vpscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1442 ; RV32-LABEL: vpscatter_nxv1f32:
1444 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1445 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1448 ; RV64-LABEL: vpscatter_nxv1f32:
1450 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1451 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1453 call void @llvm.vp.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1457 declare void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
1459 define void @vpscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1460 ; RV32-LABEL: vpscatter_nxv2f32:
1462 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1463 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1466 ; RV64-LABEL: vpscatter_nxv2f32:
1468 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1469 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1471 call void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1475 declare void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
1477 define void @vpscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1478 ; RV32-LABEL: vpscatter_nxv4f32:
1480 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1481 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1484 ; RV64-LABEL: vpscatter_nxv4f32:
1486 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1487 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1489 call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1493 define void @vpscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
1494 ; RV32-LABEL: vpscatter_truemask_nxv4f32:
1496 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1497 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
1500 ; RV64-LABEL: vpscatter_truemask_nxv4f32:
1502 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1503 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1505 call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
1509 declare void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
1511 define void @vpscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1512 ; RV32-LABEL: vpscatter_nxv8f32:
1514 ; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1515 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1518 ; RV64-LABEL: vpscatter_nxv8f32:
1520 ; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1521 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1523 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1527 define void @vpscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1528 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32:
1530 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1531 ; RV32-NEXT: vsext.vf4 v16, v12
1532 ; RV32-NEXT: vsll.vi v12, v16, 2
1533 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1536 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32:
1538 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1539 ; RV64-NEXT: vsext.vf8 v16, v12
1540 ; RV64-NEXT: vsll.vi v16, v16, 2
1541 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1542 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1544 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
1545 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1549 define void @vpscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1550 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32:
1552 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1553 ; RV32-NEXT: vsext.vf4 v16, v12
1554 ; RV32-NEXT: vsll.vi v12, v16, 2
1555 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1558 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32:
1560 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1561 ; RV64-NEXT: vsext.vf8 v16, v12
1562 ; RV64-NEXT: vsll.vi v16, v16, 2
1563 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1564 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1566 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1567 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1568 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1572 define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1573 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32:
1575 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
1576 ; RV32-NEXT: vzext.vf2 v14, v12
1577 ; RV32-NEXT: vsll.vi v12, v14, 2
1578 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1579 ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1582 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32:
1584 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
1585 ; RV64-NEXT: vzext.vf2 v14, v12
1586 ; RV64-NEXT: vsll.vi v12, v14, 2
1587 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1588 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1590 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1591 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1592 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1596 define void @vpscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1597 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32:
1599 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1600 ; RV32-NEXT: vsext.vf2 v16, v12
1601 ; RV32-NEXT: vsll.vi v12, v16, 2
1602 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1605 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32:
1607 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1608 ; RV64-NEXT: vsext.vf4 v16, v12
1609 ; RV64-NEXT: vsll.vi v16, v16, 2
1610 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1611 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1613 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
1614 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1618 define void @vpscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1619 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32:
1621 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1622 ; RV32-NEXT: vsext.vf2 v16, v12
1623 ; RV32-NEXT: vsll.vi v12, v16, 2
1624 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1627 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32:
1629 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1630 ; RV64-NEXT: vsext.vf4 v16, v12
1631 ; RV64-NEXT: vsll.vi v16, v16, 2
1632 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1633 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1635 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1636 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1637 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1641 define void @vpscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1642 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32:
1644 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1645 ; RV32-NEXT: vzext.vf2 v16, v12
1646 ; RV32-NEXT: vsll.vi v12, v16, 2
1647 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1650 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32:
1652 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1653 ; RV64-NEXT: vzext.vf2 v16, v12
1654 ; RV64-NEXT: vsll.vi v12, v16, 2
1655 ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1657 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1658 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1659 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1663 define void @vpscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1664 ; RV32-LABEL: vpscatter_baseidx_nxv8f32:
1666 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1667 ; RV32-NEXT: vsll.vi v12, v12, 2
1668 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1671 ; RV64-LABEL: vpscatter_baseidx_nxv8f32:
1673 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1674 ; RV64-NEXT: vsext.vf2 v16, v12
1675 ; RV64-NEXT: vsll.vi v16, v16, 2
1676 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1677 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1679 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
1680 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1684 declare void @llvm.vp.scatter.nxv1f64.nxv1p0(<vscale x 1 x double>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
1686 define void @vpscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1687 ; RV32-LABEL: vpscatter_nxv1f64:
1689 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1690 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1693 ; RV64-LABEL: vpscatter_nxv1f64:
1695 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1696 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1698 call void @llvm.vp.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1702 declare void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
1704 define void @vpscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1705 ; RV32-LABEL: vpscatter_nxv2f64:
1707 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1708 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1711 ; RV64-LABEL: vpscatter_nxv2f64:
1713 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1714 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1716 call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1720 declare void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
1722 define void @vpscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1723 ; RV32-LABEL: vpscatter_nxv4f64:
1725 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1726 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1729 ; RV64-LABEL: vpscatter_nxv4f64:
1731 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1732 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1734 call void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1738 define void @vpscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
1739 ; RV32-LABEL: vpscatter_truemask_nxv4f64:
1741 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1742 ; RV32-NEXT: vsoxei32.v v8, (zero), v12
1745 ; RV64-LABEL: vpscatter_truemask_nxv4f64:
1747 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1748 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1750 call void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
1754 declare void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double>, <vscale x 6 x ptr>, <vscale x 6 x i1>, i32)
1756 define void @vpscatter_nxv6f64(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1757 ; RV32-LABEL: vpscatter_nxv6f64:
1759 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1760 ; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t
1763 ; RV64-LABEL: vpscatter_nxv6f64:
1765 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1766 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1768 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1772 define void @vpscatter_baseidx_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1773 ; RV32-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64:
1775 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1776 ; RV32-NEXT: vsext.vf4 v20, v16
1777 ; RV32-NEXT: vsll.vi v16, v20, 3
1778 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1779 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1782 ; RV64-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64:
1784 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1785 ; RV64-NEXT: vsext.vf8 v24, v16
1786 ; RV64-NEXT: vsll.vi v16, v24, 3
1787 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1789 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i8> %idxs
1790 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1794 define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1795 ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64:
1797 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1798 ; RV32-NEXT: vsext.vf4 v20, v16
1799 ; RV32-NEXT: vsll.vi v16, v20, 3
1800 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1801 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1804 ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64:
1806 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1807 ; RV64-NEXT: vsext.vf8 v24, v16
1808 ; RV64-NEXT: vsll.vi v16, v24, 3
1809 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1811 %eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
1812 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1813 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1817 define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1818 ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64:
1820 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
1821 ; RV32-NEXT: vzext.vf2 v18, v16
1822 ; RV32-NEXT: vsll.vi v16, v18, 3
1823 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1824 ; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t
1827 ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64:
1829 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
1830 ; RV64-NEXT: vzext.vf2 v18, v16
1831 ; RV64-NEXT: vsll.vi v16, v18, 3
1832 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1833 ; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t
1835 %eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
1836 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1837 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1841 define void @vpscatter_baseidx_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1842 ; RV32-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64:
1844 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1845 ; RV32-NEXT: vsext.vf2 v20, v16
1846 ; RV32-NEXT: vsll.vi v16, v20, 3
1847 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1848 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1851 ; RV64-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64:
1853 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1854 ; RV64-NEXT: vsext.vf4 v24, v16
1855 ; RV64-NEXT: vsll.vi v16, v24, 3
1856 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1858 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i16> %idxs
1859 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1863 define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1864 ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64:
1866 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1867 ; RV32-NEXT: vsext.vf2 v20, v16
1868 ; RV32-NEXT: vsll.vi v16, v20, 3
1869 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1870 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1873 ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64:
1875 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1876 ; RV64-NEXT: vsext.vf4 v24, v16
1877 ; RV64-NEXT: vsll.vi v16, v24, 3
1878 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1880 %eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
1881 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1882 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1886 define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1887 ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64:
1889 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1890 ; RV32-NEXT: vzext.vf2 v20, v16
1891 ; RV32-NEXT: vsll.vi v16, v20, 3
1892 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1893 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1896 ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64:
1898 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1899 ; RV64-NEXT: vzext.vf2 v20, v16
1900 ; RV64-NEXT: vsll.vi v16, v20, 3
1901 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1902 ; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1904 %eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
1905 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1906 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1910 define void @vpscatter_baseidx_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1911 ; RV32-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64:
1913 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1914 ; RV32-NEXT: vsll.vi v16, v16, 3
1915 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1916 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1919 ; RV64-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64:
1921 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1922 ; RV64-NEXT: vsext.vf2 v24, v16
1923 ; RV64-NEXT: vsll.vi v16, v24, 3
1924 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1926 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i32> %idxs
1927 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1931 define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1932 ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64:
1934 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1935 ; RV32-NEXT: vsll.vi v16, v16, 3
1936 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1937 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1940 ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64:
1942 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1943 ; RV64-NEXT: vsext.vf2 v24, v16
1944 ; RV64-NEXT: vsll.vi v16, v24, 3
1945 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1947 %eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
1948 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1949 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1953 define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1954 ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64:
1956 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1957 ; RV32-NEXT: vsll.vi v16, v16, 3
1958 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1959 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1962 ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64:
1964 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1965 ; RV64-NEXT: vzext.vf2 v24, v16
1966 ; RV64-NEXT: vsll.vi v16, v24, 3
1967 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1969 %eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
1970 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1971 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1975 define void @vpscatter_baseidx_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1976 ; RV32-LABEL: vpscatter_baseidx_nxv6f64:
1978 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1979 ; RV32-NEXT: vnsrl.wi v24, v16, 0
1980 ; RV32-NEXT: vsll.vi v16, v24, 3
1981 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1982 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1985 ; RV64-LABEL: vpscatter_baseidx_nxv6f64:
1987 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1988 ; RV64-NEXT: vsll.vi v16, v16, 3
1989 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1991 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %idxs
1992 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1996 declare void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
1998 define void @vpscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1999 ; RV32-LABEL: vpscatter_nxv8f64:
2001 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2002 ; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t
2005 ; RV64-LABEL: vpscatter_nxv8f64:
2007 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2008 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
2010 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2014 define void @vpscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2015 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64:
2017 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
2018 ; RV32-NEXT: vsext.vf4 v20, v16
2019 ; RV32-NEXT: vsll.vi v16, v20, 3
2020 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
2021 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2024 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64:
2026 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2027 ; RV64-NEXT: vsext.vf8 v24, v16
2028 ; RV64-NEXT: vsll.vi v16, v24, 3
2029 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2031 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
2032 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2036 define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2037 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64:
2039 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
2040 ; RV32-NEXT: vsext.vf4 v20, v16
2041 ; RV32-NEXT: vsll.vi v16, v20, 3
2042 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
2043 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2046 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64:
2048 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2049 ; RV64-NEXT: vsext.vf8 v24, v16
2050 ; RV64-NEXT: vsll.vi v16, v24, 3
2051 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2053 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
2054 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2055 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2059 define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2060 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64:
2062 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
2063 ; RV32-NEXT: vzext.vf2 v18, v16
2064 ; RV32-NEXT: vsll.vi v16, v18, 3
2065 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
2066 ; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t
2069 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64:
2071 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
2072 ; RV64-NEXT: vzext.vf2 v18, v16
2073 ; RV64-NEXT: vsll.vi v16, v18, 3
2074 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
2075 ; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t
2077 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
2078 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2079 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2083 define void @vpscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2084 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64:
2086 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
2087 ; RV32-NEXT: vsext.vf2 v20, v16
2088 ; RV32-NEXT: vsll.vi v16, v20, 3
2089 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
2090 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2093 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64:
2095 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2096 ; RV64-NEXT: vsext.vf4 v24, v16
2097 ; RV64-NEXT: vsll.vi v16, v24, 3
2098 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2100 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
2101 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2105 define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2106 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64:
2108 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
2109 ; RV32-NEXT: vsext.vf2 v20, v16
2110 ; RV32-NEXT: vsll.vi v16, v20, 3
2111 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
2112 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2115 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64:
2117 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2118 ; RV64-NEXT: vsext.vf4 v24, v16
2119 ; RV64-NEXT: vsll.vi v16, v24, 3
2120 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2122 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
2123 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2124 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2128 define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2129 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64:
2131 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
2132 ; RV32-NEXT: vzext.vf2 v20, v16
2133 ; RV32-NEXT: vsll.vi v16, v20, 3
2134 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
2135 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2138 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64:
2140 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
2141 ; RV64-NEXT: vzext.vf2 v20, v16
2142 ; RV64-NEXT: vsll.vi v16, v20, 3
2143 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
2144 ; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2146 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
2147 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2148 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2152 define void @vpscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2153 ; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64:
2155 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
2156 ; RV32-NEXT: vsll.vi v16, v16, 3
2157 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
2158 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2161 ; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64:
2163 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2164 ; RV64-NEXT: vsext.vf2 v24, v16
2165 ; RV64-NEXT: vsll.vi v16, v24, 3
2166 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2168 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
2169 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2173 define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2174 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64:
2176 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
2177 ; RV32-NEXT: vsll.vi v16, v16, 3
2178 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
2179 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2182 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64:
2184 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2185 ; RV64-NEXT: vsext.vf2 v24, v16
2186 ; RV64-NEXT: vsll.vi v16, v24, 3
2187 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2189 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2190 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2191 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2195 define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2196 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64:
2198 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
2199 ; RV32-NEXT: vsll.vi v16, v16, 3
2200 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
2201 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2204 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64:
2206 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2207 ; RV64-NEXT: vzext.vf2 v24, v16
2208 ; RV64-NEXT: vsll.vi v16, v24, 3
2209 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2211 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2212 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2213 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2217 define void @vpscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2218 ; RV32-LABEL: vpscatter_baseidx_nxv8f64:
2220 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
2221 ; RV32-NEXT: vnsrl.wi v24, v16, 0
2222 ; RV32-NEXT: vsll.vi v16, v24, 3
2223 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
2224 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2227 ; RV64-LABEL: vpscatter_baseidx_nxv8f64:
2229 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2230 ; RV64-NEXT: vsll.vi v16, v16, 3
2231 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2233 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
2234 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2238 declare void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double>, <vscale x 16 x ptr>, <vscale x 16 x i1>, i32)
2240 define void @vpscatter_nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2241 ; RV32-LABEL: vpscatter_nxv16f64:
2243 ; RV32-NEXT: vl8re32.v v24, (a0)
2244 ; RV32-NEXT: csrr a0, vlenb
2245 ; RV32-NEXT: mv a2, a1
2246 ; RV32-NEXT: bltu a1, a0, .LBB108_2
2247 ; RV32-NEXT: # %bb.1:
2248 ; RV32-NEXT: mv a2, a0
2249 ; RV32-NEXT: .LBB108_2:
2250 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2251 ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t
2252 ; RV32-NEXT: sub a2, a1, a0
2253 ; RV32-NEXT: srli a0, a0, 3
2254 ; RV32-NEXT: sltu a1, a1, a2
2255 ; RV32-NEXT: addi a1, a1, -1
2256 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
2257 ; RV32-NEXT: vslidedown.vx v0, v0, a0
2258 ; RV32-NEXT: and a1, a1, a2
2259 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2260 ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t
2263 ; RV64-LABEL: vpscatter_nxv16f64:
2265 ; RV64-NEXT: addi sp, sp, -16
2266 ; RV64-NEXT: .cfi_def_cfa_offset 16
2267 ; RV64-NEXT: csrr a1, vlenb
2268 ; RV64-NEXT: slli a1, a1, 3
2269 ; RV64-NEXT: sub sp, sp, a1
2270 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2271 ; RV64-NEXT: addi a1, sp, 16
2272 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
2273 ; RV64-NEXT: csrr a1, vlenb
2274 ; RV64-NEXT: slli a3, a1, 3
2275 ; RV64-NEXT: add a3, a0, a3
2276 ; RV64-NEXT: vl8re64.v v16, (a3)
2277 ; RV64-NEXT: vl8re64.v v24, (a0)
2278 ; RV64-NEXT: mv a0, a2
2279 ; RV64-NEXT: bltu a2, a1, .LBB108_2
2280 ; RV64-NEXT: # %bb.1:
2281 ; RV64-NEXT: mv a0, a1
2282 ; RV64-NEXT: .LBB108_2:
2283 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2284 ; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t
2285 ; RV64-NEXT: sub a0, a2, a1
2286 ; RV64-NEXT: srli a1, a1, 3
2287 ; RV64-NEXT: sltu a2, a2, a0
2288 ; RV64-NEXT: addi a2, a2, -1
2289 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
2290 ; RV64-NEXT: vslidedown.vx v0, v0, a1
2291 ; RV64-NEXT: and a0, a2, a0
2292 ; RV64-NEXT: addi a1, sp, 16
2293 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
2294 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2295 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
2296 ; RV64-NEXT: csrr a0, vlenb
2297 ; RV64-NEXT: slli a0, a0, 3
2298 ; RV64-NEXT: add sp, sp, a0
2299 ; RV64-NEXT: .cfi_def_cfa sp, 16
2300 ; RV64-NEXT: addi sp, sp, 16
2301 ; RV64-NEXT: .cfi_def_cfa_offset 0
2303 call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2307 define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2308 ; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
2310 ; RV32-NEXT: addi sp, sp, -16
2311 ; RV32-NEXT: .cfi_def_cfa_offset 16
2312 ; RV32-NEXT: csrr a3, vlenb
2313 ; RV32-NEXT: sub sp, sp, a3
2314 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
2315 ; RV32-NEXT: addi a3, sp, 16
2316 ; RV32-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill
2317 ; RV32-NEXT: vl4re16.v v24, (a1)
2318 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2319 ; RV32-NEXT: vsext.vf2 v0, v24
2320 ; RV32-NEXT: csrr a1, vlenb
2321 ; RV32-NEXT: vsll.vi v24, v0, 3
2322 ; RV32-NEXT: mv a3, a2
2323 ; RV32-NEXT: bltu a2, a1, .LBB109_2
2324 ; RV32-NEXT: # %bb.1:
2325 ; RV32-NEXT: mv a3, a1
2326 ; RV32-NEXT: .LBB109_2:
2327 ; RV32-NEXT: addi a4, sp, 16
2328 ; RV32-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload
2329 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
2330 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
2331 ; RV32-NEXT: sub a3, a2, a1
2332 ; RV32-NEXT: srli a1, a1, 3
2333 ; RV32-NEXT: sltu a2, a2, a3
2334 ; RV32-NEXT: addi a2, a2, -1
2335 ; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
2336 ; RV32-NEXT: vslidedown.vx v0, v0, a1
2337 ; RV32-NEXT: and a2, a2, a3
2338 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2339 ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
2340 ; RV32-NEXT: csrr a0, vlenb
2341 ; RV32-NEXT: add sp, sp, a0
2342 ; RV32-NEXT: .cfi_def_cfa sp, 16
2343 ; RV32-NEXT: addi sp, sp, 16
2344 ; RV32-NEXT: .cfi_def_cfa_offset 0
2347 ; RV64-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
2349 ; RV64-NEXT: addi sp, sp, -16
2350 ; RV64-NEXT: .cfi_def_cfa_offset 16
2351 ; RV64-NEXT: csrr a3, vlenb
2352 ; RV64-NEXT: slli a3, a3, 4
2353 ; RV64-NEXT: sub sp, sp, a3
2354 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2355 ; RV64-NEXT: csrr a3, vlenb
2356 ; RV64-NEXT: slli a3, a3, 3
2357 ; RV64-NEXT: add a3, sp, a3
2358 ; RV64-NEXT: addi a3, a3, 16
2359 ; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2360 ; RV64-NEXT: vl4re16.v v24, (a1)
2361 ; RV64-NEXT: csrr a1, vlenb
2362 ; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma
2363 ; RV64-NEXT: vsext.vf4 v16, v26
2364 ; RV64-NEXT: vsll.vi v16, v16, 3
2365 ; RV64-NEXT: addi a3, sp, 16
2366 ; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2367 ; RV64-NEXT: vsext.vf4 v16, v24
2368 ; RV64-NEXT: vsll.vi v24, v16, 3
2369 ; RV64-NEXT: mv a3, a2
2370 ; RV64-NEXT: bltu a2, a1, .LBB109_2
2371 ; RV64-NEXT: # %bb.1:
2372 ; RV64-NEXT: mv a3, a1
2373 ; RV64-NEXT: .LBB109_2:
2374 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
2375 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
2376 ; RV64-NEXT: sub a3, a2, a1
2377 ; RV64-NEXT: srli a1, a1, 3
2378 ; RV64-NEXT: sltu a2, a2, a3
2379 ; RV64-NEXT: addi a2, a2, -1
2380 ; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
2381 ; RV64-NEXT: vslidedown.vx v0, v0, a1
2382 ; RV64-NEXT: and a2, a2, a3
2383 ; RV64-NEXT: csrr a1, vlenb
2384 ; RV64-NEXT: slli a1, a1, 3
2385 ; RV64-NEXT: add a1, sp, a1
2386 ; RV64-NEXT: addi a1, a1, 16
2387 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
2388 ; RV64-NEXT: addi a1, sp, 16
2389 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2390 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2391 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2392 ; RV64-NEXT: csrr a0, vlenb
2393 ; RV64-NEXT: slli a0, a0, 4
2394 ; RV64-NEXT: add sp, sp, a0
2395 ; RV64-NEXT: .cfi_def_cfa sp, 16
2396 ; RV64-NEXT: addi sp, sp, 16
2397 ; RV64-NEXT: .cfi_def_cfa_offset 0
2399 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs
2400 call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2404 define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2405 ; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
2407 ; RV32-NEXT: addi sp, sp, -16
2408 ; RV32-NEXT: .cfi_def_cfa_offset 16
2409 ; RV32-NEXT: csrr a3, vlenb
2410 ; RV32-NEXT: sub sp, sp, a3
2411 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
2412 ; RV32-NEXT: addi a3, sp, 16
2413 ; RV32-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill
2414 ; RV32-NEXT: vl4re16.v v24, (a1)
2415 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2416 ; RV32-NEXT: vsext.vf2 v0, v24
2417 ; RV32-NEXT: csrr a1, vlenb
2418 ; RV32-NEXT: vsll.vi v24, v0, 3
2419 ; RV32-NEXT: mv a3, a2
2420 ; RV32-NEXT: bltu a2, a1, .LBB110_2
2421 ; RV32-NEXT: # %bb.1:
2422 ; RV32-NEXT: mv a3, a1
2423 ; RV32-NEXT: .LBB110_2:
2424 ; RV32-NEXT: addi a4, sp, 16
2425 ; RV32-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload
2426 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
2427 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
2428 ; RV32-NEXT: sub a3, a2, a1
2429 ; RV32-NEXT: srli a1, a1, 3
2430 ; RV32-NEXT: sltu a2, a2, a3
2431 ; RV32-NEXT: addi a2, a2, -1
2432 ; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
2433 ; RV32-NEXT: vslidedown.vx v0, v0, a1
2434 ; RV32-NEXT: and a2, a2, a3
2435 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2436 ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
2437 ; RV32-NEXT: csrr a0, vlenb
2438 ; RV32-NEXT: add sp, sp, a0
2439 ; RV32-NEXT: .cfi_def_cfa sp, 16
2440 ; RV32-NEXT: addi sp, sp, 16
2441 ; RV32-NEXT: .cfi_def_cfa_offset 0
2444 ; RV64-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
2446 ; RV64-NEXT: addi sp, sp, -16
2447 ; RV64-NEXT: .cfi_def_cfa_offset 16
2448 ; RV64-NEXT: csrr a3, vlenb
2449 ; RV64-NEXT: slli a3, a3, 4
2450 ; RV64-NEXT: sub sp, sp, a3
2451 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2452 ; RV64-NEXT: csrr a3, vlenb
2453 ; RV64-NEXT: slli a3, a3, 3
2454 ; RV64-NEXT: add a3, sp, a3
2455 ; RV64-NEXT: addi a3, a3, 16
2456 ; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2457 ; RV64-NEXT: vl4re16.v v24, (a1)
2458 ; RV64-NEXT: csrr a1, vlenb
2459 ; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma
2460 ; RV64-NEXT: vsext.vf4 v16, v26
2461 ; RV64-NEXT: vsll.vi v16, v16, 3
2462 ; RV64-NEXT: addi a3, sp, 16
2463 ; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
2464 ; RV64-NEXT: vsext.vf4 v16, v24
2465 ; RV64-NEXT: vsll.vi v24, v16, 3
2466 ; RV64-NEXT: mv a3, a2
2467 ; RV64-NEXT: bltu a2, a1, .LBB110_2
2468 ; RV64-NEXT: # %bb.1:
2469 ; RV64-NEXT: mv a3, a1
2470 ; RV64-NEXT: .LBB110_2:
2471 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
2472 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
2473 ; RV64-NEXT: sub a3, a2, a1
2474 ; RV64-NEXT: srli a1, a1, 3
2475 ; RV64-NEXT: sltu a2, a2, a3
2476 ; RV64-NEXT: addi a2, a2, -1
2477 ; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
2478 ; RV64-NEXT: vslidedown.vx v0, v0, a1
2479 ; RV64-NEXT: and a2, a2, a3
2480 ; RV64-NEXT: csrr a1, vlenb
2481 ; RV64-NEXT: slli a1, a1, 3
2482 ; RV64-NEXT: add a1, sp, a1
2483 ; RV64-NEXT: addi a1, a1, 16
2484 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
2485 ; RV64-NEXT: addi a1, sp, 16
2486 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2487 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2488 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2489 ; RV64-NEXT: csrr a0, vlenb
2490 ; RV64-NEXT: slli a0, a0, 4
2491 ; RV64-NEXT: add sp, sp, a0
2492 ; RV64-NEXT: .cfi_def_cfa sp, 16
2493 ; RV64-NEXT: addi sp, sp, 16
2494 ; RV64-NEXT: .cfi_def_cfa_offset 0
2496 %eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
2497 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs
2498 call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2502 define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2503 ; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
2505 ; RV32-NEXT: addi sp, sp, -16
2506 ; RV32-NEXT: .cfi_def_cfa_offset 16
2507 ; RV32-NEXT: csrr a3, vlenb
2508 ; RV32-NEXT: sub sp, sp, a3
2509 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
2510 ; RV32-NEXT: addi a3, sp, 16
2511 ; RV32-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill
2512 ; RV32-NEXT: vl4re16.v v24, (a1)
2513 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2514 ; RV32-NEXT: vzext.vf2 v0, v24
2515 ; RV32-NEXT: csrr a1, vlenb
2516 ; RV32-NEXT: vsll.vi v24, v0, 3
2517 ; RV32-NEXT: mv a3, a2
2518 ; RV32-NEXT: bltu a2, a1, .LBB111_2
2519 ; RV32-NEXT: # %bb.1:
2520 ; RV32-NEXT: mv a3, a1
2521 ; RV32-NEXT: .LBB111_2:
2522 ; RV32-NEXT: addi a4, sp, 16
2523 ; RV32-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload
2524 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
2525 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
2526 ; RV32-NEXT: sub a3, a2, a1
2527 ; RV32-NEXT: srli a1, a1, 3
2528 ; RV32-NEXT: sltu a2, a2, a3
2529 ; RV32-NEXT: addi a2, a2, -1
2530 ; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
2531 ; RV32-NEXT: vslidedown.vx v0, v0, a1
2532 ; RV32-NEXT: and a2, a2, a3
2533 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2534 ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
2535 ; RV32-NEXT: csrr a0, vlenb
2536 ; RV32-NEXT: add sp, sp, a0
2537 ; RV32-NEXT: .cfi_def_cfa sp, 16
2538 ; RV32-NEXT: addi sp, sp, 16
2539 ; RV32-NEXT: .cfi_def_cfa_offset 0
2542 ; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
2544 ; RV64-NEXT: addi sp, sp, -16
2545 ; RV64-NEXT: .cfi_def_cfa_offset 16
2546 ; RV64-NEXT: csrr a3, vlenb
2547 ; RV64-NEXT: sub sp, sp, a3
2548 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
2549 ; RV64-NEXT: addi a3, sp, 16
2550 ; RV64-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill
2551 ; RV64-NEXT: vl4re16.v v24, (a1)
2552 ; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2553 ; RV64-NEXT: vzext.vf2 v0, v24
2554 ; RV64-NEXT: csrr a1, vlenb
2555 ; RV64-NEXT: vsll.vi v24, v0, 3
2556 ; RV64-NEXT: mv a3, a2
2557 ; RV64-NEXT: bltu a2, a1, .LBB111_2
2558 ; RV64-NEXT: # %bb.1:
2559 ; RV64-NEXT: mv a3, a1
2560 ; RV64-NEXT: .LBB111_2:
2561 ; RV64-NEXT: addi a4, sp, 16
2562 ; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload
2563 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
2564 ; RV64-NEXT: vsoxei32.v v8, (a0), v24, v0.t
2565 ; RV64-NEXT: sub a3, a2, a1
2566 ; RV64-NEXT: srli a1, a1, 3
2567 ; RV64-NEXT: sltu a2, a2, a3
2568 ; RV64-NEXT: addi a2, a2, -1
2569 ; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
2570 ; RV64-NEXT: vslidedown.vx v0, v0, a1
2571 ; RV64-NEXT: and a2, a2, a3
2572 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2573 ; RV64-NEXT: vsoxei32.v v16, (a0), v28, v0.t
2574 ; RV64-NEXT: csrr a0, vlenb
2575 ; RV64-NEXT: add sp, sp, a0
2576 ; RV64-NEXT: .cfi_def_cfa sp, 16
2577 ; RV64-NEXT: addi sp, sp, 16
2578 ; RV64-NEXT: .cfi_def_cfa_offset 0
2580 %eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
2581 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs
2582 call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)