1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
7 declare void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
9 define void @vpscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
10 ; RV32-LABEL: vpscatter_nxv1i8:
12 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
13 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
16 ; RV64-LABEL: vpscatter_nxv1i8:
18 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
19 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
21 call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
25 declare void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
27 define void @vpscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
28 ; RV32-LABEL: vpscatter_nxv2i8:
30 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
31 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
34 ; RV64-LABEL: vpscatter_nxv2i8:
36 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
37 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
39 call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
43 define void @vpscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
44 ; RV32-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
46 ; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
47 ; RV32-NEXT: vnsrl.wi v8, v8, 0
48 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
49 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
52 ; RV64-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
54 ; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
55 ; RV64-NEXT: vnsrl.wi v8, v8, 0
56 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
57 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
59 %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8>
60 call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
64 define void @vpscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
65 ; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
67 ; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
68 ; RV32-NEXT: vnsrl.wi v8, v8, 0
69 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
70 ; RV32-NEXT: vnsrl.wi v8, v8, 0
71 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
72 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
75 ; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
77 ; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
78 ; RV64-NEXT: vnsrl.wi v8, v8, 0
79 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
80 ; RV64-NEXT: vnsrl.wi v8, v8, 0
81 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
82 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
84 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
85 call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
89 define void @vpscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
90 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
92 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
93 ; RV32-NEXT: vnsrl.wi v11, v8, 0
94 ; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
95 ; RV32-NEXT: vnsrl.wi v8, v11, 0
96 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
97 ; RV32-NEXT: vnsrl.wi v8, v8, 0
98 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
99 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
102 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
104 ; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
105 ; RV64-NEXT: vnsrl.wi v12, v8, 0
106 ; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
107 ; RV64-NEXT: vnsrl.wi v8, v12, 0
108 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
109 ; RV64-NEXT: vnsrl.wi v8, v8, 0
110 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
111 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
113 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
114 call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
118 declare void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
120 define void @vpscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
121 ; RV32-LABEL: vpscatter_nxv4i8:
123 ; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
124 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
127 ; RV64-LABEL: vpscatter_nxv4i8:
129 ; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
130 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
132 call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
136 define void @vpscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
137 ; RV32-LABEL: vpscatter_truemask_nxv4i8:
139 ; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
140 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
143 ; RV64-LABEL: vpscatter_truemask_nxv4i8:
145 ; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
146 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
148 call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
152 declare void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
154 define void @vpscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
155 ; RV32-LABEL: vpscatter_nxv8i8:
157 ; RV32-NEXT: vsetvli zero, a0, e8, m1, ta, ma
158 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
161 ; RV64-LABEL: vpscatter_nxv8i8:
163 ; RV64-NEXT: vsetvli zero, a0, e8, m1, ta, ma
164 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
166 call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
170 define void @vpscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
171 ; RV32-LABEL: vpscatter_baseidx_nxv8i8:
173 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
174 ; RV32-NEXT: vsext.vf4 v12, v9
175 ; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
176 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
179 ; RV64-LABEL: vpscatter_baseidx_nxv8i8:
181 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
182 ; RV64-NEXT: vsext.vf8 v16, v9
183 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
184 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
186 %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
187 call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
191 declare void @llvm.vp.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
193 define void @vpscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
194 ; RV32-LABEL: vpscatter_nxv1i16:
196 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
197 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
200 ; RV64-LABEL: vpscatter_nxv1i16:
202 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
203 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
205 call void @llvm.vp.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
209 declare void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
211 define void @vpscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
212 ; RV32-LABEL: vpscatter_nxv2i16:
214 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
215 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
218 ; RV64-LABEL: vpscatter_nxv2i16:
220 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
221 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
223 call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
227 define void @vpscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
228 ; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
230 ; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
231 ; RV32-NEXT: vnsrl.wi v8, v8, 0
232 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
233 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
236 ; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
238 ; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
239 ; RV64-NEXT: vnsrl.wi v8, v8, 0
240 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
241 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
243 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16>
244 call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
248 define void @vpscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
249 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
251 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
252 ; RV32-NEXT: vnsrl.wi v11, v8, 0
253 ; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
254 ; RV32-NEXT: vnsrl.wi v8, v11, 0
255 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
256 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
259 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
261 ; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
262 ; RV64-NEXT: vnsrl.wi v12, v8, 0
263 ; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
264 ; RV64-NEXT: vnsrl.wi v8, v12, 0
265 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
266 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
268 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
269 call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
273 declare void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
275 define void @vpscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
276 ; RV32-LABEL: vpscatter_nxv4i16:
278 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
279 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
282 ; RV64-LABEL: vpscatter_nxv4i16:
284 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
285 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
287 call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
291 define void @vpscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
292 ; RV32-LABEL: vpscatter_truemask_nxv4i16:
294 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
295 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
298 ; RV64-LABEL: vpscatter_truemask_nxv4i16:
300 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
301 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
303 call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
307 declare void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
309 define void @vpscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
310 ; RV32-LABEL: vpscatter_nxv8i16:
312 ; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
313 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
316 ; RV64-LABEL: vpscatter_nxv8i16:
318 ; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
319 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
321 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
325 define void @vpscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
326 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16:
328 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
329 ; RV32-NEXT: vsext.vf4 v12, v10
330 ; RV32-NEXT: vadd.vv v12, v12, v12
331 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
332 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
335 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16:
337 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
338 ; RV64-NEXT: vsext.vf8 v16, v10
339 ; RV64-NEXT: vadd.vv v16, v16, v16
340 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
341 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
343 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
344 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
348 define void @vpscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
349 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16:
351 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
352 ; RV32-NEXT: vsext.vf4 v12, v10
353 ; RV32-NEXT: vadd.vv v12, v12, v12
354 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
355 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
358 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16:
360 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
361 ; RV64-NEXT: vsext.vf8 v16, v10
362 ; RV64-NEXT: vadd.vv v16, v16, v16
363 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
364 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
366 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
367 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
368 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
372 define void @vpscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
373 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16:
375 ; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma
376 ; RV32-NEXT: vwaddu.vv v12, v10, v10
377 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
378 ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
381 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16:
383 ; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma
384 ; RV64-NEXT: vwaddu.vv v12, v10, v10
385 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
386 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
388 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
389 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
390 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
394 define void @vpscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
395 ; RV32-LABEL: vpscatter_baseidx_nxv8i16:
397 ; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
398 ; RV32-NEXT: vwadd.vv v12, v10, v10
399 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
400 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
403 ; RV64-LABEL: vpscatter_baseidx_nxv8i16:
405 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
406 ; RV64-NEXT: vsext.vf4 v16, v10
407 ; RV64-NEXT: vadd.vv v16, v16, v16
408 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
409 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
411 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
412 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
416 declare <vscale x 8 x i32> @llvm.vp.sext.nxv8i16.nxv8i32(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
417 define void @vpscatter_baseidx_vpsext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
418 ; RV32-LABEL: vpscatter_baseidx_vpsext_nxv8i16_nxv8i16:
420 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
421 ; RV32-NEXT: vwadd.vv v12, v10, v10, v0.t
422 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
425 ; RV64-LABEL: vpscatter_baseidx_vpsext_nxv8i16_nxv8i16:
427 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
428 ; RV64-NEXT: vsext.vf2 v12, v10, v0.t
429 ; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
430 ; RV64-NEXT: vwadd.vv v16, v12, v12
431 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
432 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
434 %eidxs = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl)
435 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i32> %eidxs
436 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
440 declare <vscale x 8 x i32> @llvm.vp.zext.nxv8i16.nxv8i32(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
441 define void @vpscatter_baseidx_vpzext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
442 ; RV32-LABEL: vpscatter_baseidx_vpzext_nxv8i16_nxv8i16:
444 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
445 ; RV32-NEXT: vwaddu.vv v12, v10, v10, v0.t
446 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
449 ; RV64-LABEL: vpscatter_baseidx_vpzext_nxv8i16_nxv8i16:
451 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
452 ; RV64-NEXT: vzext.vf2 v12, v10, v0.t
453 ; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
454 ; RV64-NEXT: vwadd.vv v16, v12, v12
455 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
456 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
458 %eidxs = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl)
459 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i32> %eidxs
460 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
464 declare <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
465 define void @vpscatter_baseidx_vpsext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
466 ; RV32-LABEL: vpscatter_baseidx_vpsext_nxv8i32_nxv8i16:
468 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
469 ; RV32-NEXT: vsext.vf2 v16, v12, v0.t
470 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
471 ; RV32-NEXT: vnsrl.wi v12, v16, 0
472 ; RV32-NEXT: vadd.vv v12, v12, v12
473 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
474 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
477 ; RV64-LABEL: vpscatter_baseidx_vpsext_nxv8i32_nxv8i16:
479 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
480 ; RV64-NEXT: vwadd.vv v16, v12, v12, v0.t
481 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
482 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
484 %eidxs = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 %evl)
485 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i64> %eidxs
486 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
490 declare <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
491 define void @vpscatter_baseidx_vpzext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
492 ; RV32-LABEL: vpscatter_baseidx_vpzext_nxv8i32_nxv8i16:
494 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
495 ; RV32-NEXT: vzext.vf2 v16, v12, v0.t
496 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
497 ; RV32-NEXT: vnsrl.wi v12, v16, 0
498 ; RV32-NEXT: vadd.vv v12, v12, v12
499 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
500 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
503 ; RV64-LABEL: vpscatter_baseidx_vpzext_nxv8i32_nxv8i16:
505 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
506 ; RV64-NEXT: vwaddu.vv v16, v12, v12, v0.t
507 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
508 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
510 %eidxs = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 %evl)
511 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i64> %eidxs
512 call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
516 declare void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
518 define void @vpscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
519 ; RV32-LABEL: vpscatter_nxv1i32:
521 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
522 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
525 ; RV64-LABEL: vpscatter_nxv1i32:
527 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
528 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
530 call void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
534 declare void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
536 define void @vpscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
537 ; RV32-LABEL: vpscatter_nxv2i32:
539 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
540 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
543 ; RV64-LABEL: vpscatter_nxv2i32:
545 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
546 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
548 call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
552 define void @vpscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
553 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
555 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
556 ; RV32-NEXT: vnsrl.wi v11, v8, 0
557 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
558 ; RV32-NEXT: vsoxei32.v v11, (zero), v10, v0.t
561 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
563 ; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
564 ; RV64-NEXT: vnsrl.wi v12, v8, 0
565 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
566 ; RV64-NEXT: vsoxei64.v v12, (zero), v10, v0.t
568 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
569 call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
573 declare void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
575 define void @vpscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
576 ; RV32-LABEL: vpscatter_nxv4i32:
578 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
579 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
582 ; RV64-LABEL: vpscatter_nxv4i32:
584 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
585 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
587 call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
591 define void @vpscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
592 ; RV32-LABEL: vpscatter_truemask_nxv4i32:
594 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
595 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
598 ; RV64-LABEL: vpscatter_truemask_nxv4i32:
600 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
601 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
603 call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
607 declare void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
609 define void @vpscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
610 ; RV32-LABEL: vpscatter_nxv8i32:
612 ; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
613 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
616 ; RV64-LABEL: vpscatter_nxv8i32:
618 ; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
619 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
621 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
625 define void @vpscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
626 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32:
628 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
629 ; RV32-NEXT: vsext.vf4 v16, v12
630 ; RV32-NEXT: vsll.vi v12, v16, 2
631 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
632 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
635 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32:
637 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
638 ; RV64-NEXT: vsext.vf8 v16, v12
639 ; RV64-NEXT: vsll.vi v16, v16, 2
640 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
641 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
643 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
644 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
648 define void @vpscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
649 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32:
651 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
652 ; RV32-NEXT: vsext.vf4 v16, v12
653 ; RV32-NEXT: vsll.vi v12, v16, 2
654 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
655 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
658 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32:
660 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
661 ; RV64-NEXT: vsext.vf8 v16, v12
662 ; RV64-NEXT: vsll.vi v16, v16, 2
663 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
664 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
666 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
667 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
668 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
672 define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
673 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32:
675 ; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
676 ; RV32-NEXT: vzext.vf2 v14, v12
677 ; RV32-NEXT: vsll.vi v12, v14, 2
678 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
679 ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
682 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32:
684 ; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma
685 ; RV64-NEXT: vzext.vf2 v14, v12
686 ; RV64-NEXT: vsll.vi v12, v14, 2
687 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
688 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
690 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
691 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
692 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
696 define void @vpscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
697 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32:
699 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
700 ; RV32-NEXT: vsext.vf2 v16, v12
701 ; RV32-NEXT: vsll.vi v12, v16, 2
702 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
703 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
706 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32:
708 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
709 ; RV64-NEXT: vsext.vf4 v16, v12
710 ; RV64-NEXT: vsll.vi v16, v16, 2
711 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
712 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
714 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
715 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
719 define void @vpscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
720 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32:
722 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
723 ; RV32-NEXT: vsext.vf2 v16, v12
724 ; RV32-NEXT: vsll.vi v12, v16, 2
725 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
726 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
729 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32:
731 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
732 ; RV64-NEXT: vsext.vf4 v16, v12
733 ; RV64-NEXT: vsll.vi v16, v16, 2
734 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
735 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
737 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
738 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
739 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
743 define void @vpscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
744 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32:
746 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
747 ; RV32-NEXT: vzext.vf2 v16, v12
748 ; RV32-NEXT: vsll.vi v12, v16, 2
749 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
750 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
753 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32:
755 ; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
756 ; RV64-NEXT: vzext.vf2 v16, v12
757 ; RV64-NEXT: vsll.vi v12, v16, 2
758 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
759 ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
761 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
762 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
763 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
767 define void @vpscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
768 ; RV32-LABEL: vpscatter_baseidx_nxv8i32:
770 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
771 ; RV32-NEXT: vsll.vi v12, v12, 2
772 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
773 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
776 ; RV64-LABEL: vpscatter_baseidx_nxv8i32:
778 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
779 ; RV64-NEXT: vsext.vf2 v16, v12
780 ; RV64-NEXT: vsll.vi v16, v16, 2
781 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
782 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
784 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
785 call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
789 declare void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
791 define void @vpscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
792 ; RV32-LABEL: vpscatter_nxv1i64:
794 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
795 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
798 ; RV64-LABEL: vpscatter_nxv1i64:
800 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
801 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
803 call void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
807 declare void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
809 define void @vpscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
810 ; RV32-LABEL: vpscatter_nxv2i64:
812 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
813 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
816 ; RV64-LABEL: vpscatter_nxv2i64:
818 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
819 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
821 call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
825 declare void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
827 define void @vpscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
828 ; RV32-LABEL: vpscatter_nxv4i64:
830 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
831 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
834 ; RV64-LABEL: vpscatter_nxv4i64:
836 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
837 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
839 call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
843 define void @vpscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
844 ; RV32-LABEL: vpscatter_truemask_nxv4i64:
846 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
847 ; RV32-NEXT: vsoxei32.v v8, (zero), v12
850 ; RV64-LABEL: vpscatter_truemask_nxv4i64:
852 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
853 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
855 call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
859 declare void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
861 define void @vpscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
862 ; RV32-LABEL: vpscatter_nxv8i64:
864 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
865 ; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t
868 ; RV64-LABEL: vpscatter_nxv8i64:
870 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
871 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
873 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
877 define void @vpscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
878 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64:
880 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
881 ; RV32-NEXT: vsext.vf4 v20, v16
882 ; RV32-NEXT: vsll.vi v16, v20, 3
883 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
884 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
887 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64:
889 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
890 ; RV64-NEXT: vsext.vf8 v24, v16
891 ; RV64-NEXT: vsll.vi v16, v24, 3
892 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
893 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
895 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
896 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
900 define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
901 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64:
903 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
904 ; RV32-NEXT: vsext.vf4 v20, v16
905 ; RV32-NEXT: vsll.vi v16, v20, 3
906 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
907 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
910 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64:
912 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
913 ; RV64-NEXT: vsext.vf8 v24, v16
914 ; RV64-NEXT: vsll.vi v16, v24, 3
915 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
916 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
918 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
919 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
920 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
924 define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
925 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64:
927 ; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
928 ; RV32-NEXT: vzext.vf2 v18, v16
929 ; RV32-NEXT: vsll.vi v16, v18, 3
930 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
931 ; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t
934 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64:
936 ; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma
937 ; RV64-NEXT: vzext.vf2 v18, v16
938 ; RV64-NEXT: vsll.vi v16, v18, 3
939 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
940 ; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t
942 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
943 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
944 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
948 define void @vpscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
949 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64:
951 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
952 ; RV32-NEXT: vsext.vf2 v20, v16
953 ; RV32-NEXT: vsll.vi v16, v20, 3
954 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
955 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
958 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64:
960 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
961 ; RV64-NEXT: vsext.vf4 v24, v16
962 ; RV64-NEXT: vsll.vi v16, v24, 3
963 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
964 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
966 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
967 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
971 define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
972 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64:
974 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
975 ; RV32-NEXT: vsext.vf2 v20, v16
976 ; RV32-NEXT: vsll.vi v16, v20, 3
977 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
978 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
981 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64:
983 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
984 ; RV64-NEXT: vsext.vf4 v24, v16
985 ; RV64-NEXT: vsll.vi v16, v24, 3
986 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
987 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
989 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
990 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
991 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
995 define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
996 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64:
998 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
999 ; RV32-NEXT: vzext.vf2 v20, v16
1000 ; RV32-NEXT: vsll.vi v16, v20, 3
1001 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1002 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1005 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64:
1007 ; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1008 ; RV64-NEXT: vzext.vf2 v20, v16
1009 ; RV64-NEXT: vsll.vi v16, v20, 3
1010 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1011 ; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1013 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1014 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1015 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1019 define void @vpscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1020 ; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64:
1022 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1023 ; RV32-NEXT: vsll.vi v16, v16, 3
1024 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1025 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1028 ; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64:
1030 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1031 ; RV64-NEXT: vsext.vf2 v24, v16
1032 ; RV64-NEXT: vsll.vi v16, v24, 3
1033 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1034 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1036 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
1037 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1041 define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1042 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64:
1044 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1045 ; RV32-NEXT: vsll.vi v16, v16, 3
1046 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1047 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1050 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64:
1052 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1053 ; RV64-NEXT: vsext.vf2 v24, v16
1054 ; RV64-NEXT: vsll.vi v16, v24, 3
1055 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1056 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1058 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1059 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1060 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1064 define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1065 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64:
1067 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1068 ; RV32-NEXT: vsll.vi v16, v16, 3
1069 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1070 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1073 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64:
1075 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1076 ; RV64-NEXT: vzext.vf2 v24, v16
1077 ; RV64-NEXT: vsll.vi v16, v24, 3
1078 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1079 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1081 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1082 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1083 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1087 define void @vpscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1088 ; RV32-LABEL: vpscatter_baseidx_nxv8i64:
1090 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1091 ; RV32-NEXT: vnsrl.wi v24, v16, 0
1092 ; RV32-NEXT: vsll.vi v16, v24, 3
1093 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1094 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1097 ; RV64-LABEL: vpscatter_baseidx_nxv8i64:
1099 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1100 ; RV64-NEXT: vsll.vi v16, v16, 3
1101 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1102 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1104 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
1105 call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1109 declare void @llvm.vp.scatter.nxv1f16.nxv1p0(<vscale x 1 x half>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
1111 define void @vpscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1112 ; RV32-LABEL: vpscatter_nxv1f16:
1114 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1115 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1118 ; RV64-LABEL: vpscatter_nxv1f16:
1120 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1121 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1123 call void @llvm.vp.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1127 declare void @llvm.vp.scatter.nxv2f16.nxv2p0(<vscale x 2 x half>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
1129 define void @vpscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1130 ; RV32-LABEL: vpscatter_nxv2f16:
1132 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1133 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1136 ; RV64-LABEL: vpscatter_nxv2f16:
1138 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1139 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1141 call void @llvm.vp.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1145 declare void @llvm.vp.scatter.nxv4f16.nxv4p0(<vscale x 4 x half>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
1147 define void @vpscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1148 ; RV32-LABEL: vpscatter_nxv4f16:
1150 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1151 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1154 ; RV64-LABEL: vpscatter_nxv4f16:
1156 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1157 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1159 call void @llvm.vp.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1163 define void @vpscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
1164 ; RV32-LABEL: vpscatter_truemask_nxv4f16:
1166 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1167 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
1170 ; RV64-LABEL: vpscatter_truemask_nxv4f16:
1172 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1173 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1175 call void @llvm.vp.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
1179 declare void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
1181 define void @vpscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1182 ; RV32-LABEL: vpscatter_nxv8f16:
1184 ; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
1185 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1188 ; RV64-LABEL: vpscatter_nxv8f16:
1190 ; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
1191 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1193 call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1197 define void @vpscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1198 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16:
1200 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1201 ; RV32-NEXT: vsext.vf4 v12, v10
1202 ; RV32-NEXT: vadd.vv v12, v12, v12
1203 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
1204 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1207 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16:
1209 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1210 ; RV64-NEXT: vsext.vf8 v16, v10
1211 ; RV64-NEXT: vadd.vv v16, v16, v16
1212 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
1213 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1215 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
1216 call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1220 define void @vpscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1221 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16:
1223 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1224 ; RV32-NEXT: vsext.vf4 v12, v10
1225 ; RV32-NEXT: vadd.vv v12, v12, v12
1226 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
1227 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1230 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16:
1232 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1233 ; RV64-NEXT: vsext.vf8 v16, v10
1234 ; RV64-NEXT: vadd.vv v16, v16, v16
1235 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
1236 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1238 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1239 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1240 call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1244 define void @vpscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1245 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16:
1247 ; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma
1248 ; RV32-NEXT: vwaddu.vv v12, v10, v10
1249 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
1250 ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1253 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16:
1255 ; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma
1256 ; RV64-NEXT: vwaddu.vv v12, v10, v10
1257 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
1258 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1260 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1261 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1262 call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1266 define void @vpscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1267 ; RV32-LABEL: vpscatter_baseidx_nxv8f16:
1269 ; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
1270 ; RV32-NEXT: vwadd.vv v12, v10, v10
1271 ; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
1272 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1275 ; RV64-LABEL: vpscatter_baseidx_nxv8f16:
1277 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1278 ; RV64-NEXT: vsext.vf4 v16, v10
1279 ; RV64-NEXT: vadd.vv v16, v16, v16
1280 ; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
1281 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1283 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
1284 call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1288 declare void @llvm.vp.scatter.nxv1f32.nxv1p0(<vscale x 1 x float>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
1290 define void @vpscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1291 ; RV32-LABEL: vpscatter_nxv1f32:
1293 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1294 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1297 ; RV64-LABEL: vpscatter_nxv1f32:
1299 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1300 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1302 call void @llvm.vp.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1306 declare void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
1308 define void @vpscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1309 ; RV32-LABEL: vpscatter_nxv2f32:
1311 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1312 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1315 ; RV64-LABEL: vpscatter_nxv2f32:
1317 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1318 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1320 call void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1324 declare void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
1326 define void @vpscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1327 ; RV32-LABEL: vpscatter_nxv4f32:
1329 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1330 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1333 ; RV64-LABEL: vpscatter_nxv4f32:
1335 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1336 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1338 call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1342 define void @vpscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
1343 ; RV32-LABEL: vpscatter_truemask_nxv4f32:
1345 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1346 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
1349 ; RV64-LABEL: vpscatter_truemask_nxv4f32:
1351 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1352 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1354 call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
1358 declare void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
1360 define void @vpscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1361 ; RV32-LABEL: vpscatter_nxv8f32:
1363 ; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1364 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1367 ; RV64-LABEL: vpscatter_nxv8f32:
1369 ; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1370 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1372 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1376 define void @vpscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1377 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32:
1379 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1380 ; RV32-NEXT: vsext.vf4 v16, v12
1381 ; RV32-NEXT: vsll.vi v12, v16, 2
1382 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1383 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1386 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32:
1388 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1389 ; RV64-NEXT: vsext.vf8 v16, v12
1390 ; RV64-NEXT: vsll.vi v16, v16, 2
1391 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1392 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1394 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
1395 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1399 define void @vpscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1400 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32:
1402 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1403 ; RV32-NEXT: vsext.vf4 v16, v12
1404 ; RV32-NEXT: vsll.vi v12, v16, 2
1405 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1406 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1409 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32:
1411 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1412 ; RV64-NEXT: vsext.vf8 v16, v12
1413 ; RV64-NEXT: vsll.vi v16, v16, 2
1414 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1415 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1417 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1418 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1419 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1423 define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1424 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32:
1426 ; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
1427 ; RV32-NEXT: vzext.vf2 v14, v12
1428 ; RV32-NEXT: vsll.vi v12, v14, 2
1429 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1430 ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1433 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32:
1435 ; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma
1436 ; RV64-NEXT: vzext.vf2 v14, v12
1437 ; RV64-NEXT: vsll.vi v12, v14, 2
1438 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1439 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1441 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1442 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1443 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1447 define void @vpscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1448 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32:
1450 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1451 ; RV32-NEXT: vsext.vf2 v16, v12
1452 ; RV32-NEXT: vsll.vi v12, v16, 2
1453 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1454 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1457 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32:
1459 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1460 ; RV64-NEXT: vsext.vf4 v16, v12
1461 ; RV64-NEXT: vsll.vi v16, v16, 2
1462 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1463 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1465 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
1466 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1470 define void @vpscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1471 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32:
1473 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1474 ; RV32-NEXT: vsext.vf2 v16, v12
1475 ; RV32-NEXT: vsll.vi v12, v16, 2
1476 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1477 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1480 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32:
1482 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1483 ; RV64-NEXT: vsext.vf4 v16, v12
1484 ; RV64-NEXT: vsll.vi v16, v16, 2
1485 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1486 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1488 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1489 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1490 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1494 define void @vpscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1495 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32:
1497 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1498 ; RV32-NEXT: vzext.vf2 v16, v12
1499 ; RV32-NEXT: vsll.vi v12, v16, 2
1500 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1501 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1504 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32:
1506 ; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1507 ; RV64-NEXT: vzext.vf2 v16, v12
1508 ; RV64-NEXT: vsll.vi v12, v16, 2
1509 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1510 ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1512 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1513 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1514 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1518 define void @vpscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1519 ; RV32-LABEL: vpscatter_baseidx_nxv8f32:
1521 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1522 ; RV32-NEXT: vsll.vi v12, v12, 2
1523 ; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1524 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1527 ; RV64-LABEL: vpscatter_baseidx_nxv8f32:
1529 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1530 ; RV64-NEXT: vsext.vf2 v16, v12
1531 ; RV64-NEXT: vsll.vi v16, v16, 2
1532 ; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
1533 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1535 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
1536 call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1540 declare void @llvm.vp.scatter.nxv1f64.nxv1p0(<vscale x 1 x double>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
1542 define void @vpscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1543 ; RV32-LABEL: vpscatter_nxv1f64:
1545 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1546 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1549 ; RV64-LABEL: vpscatter_nxv1f64:
1551 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1552 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1554 call void @llvm.vp.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1558 declare void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
1560 define void @vpscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1561 ; RV32-LABEL: vpscatter_nxv2f64:
1563 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1564 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1567 ; RV64-LABEL: vpscatter_nxv2f64:
1569 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1570 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1572 call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1576 declare void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
1578 define void @vpscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1579 ; RV32-LABEL: vpscatter_nxv4f64:
1581 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1582 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1585 ; RV64-LABEL: vpscatter_nxv4f64:
1587 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1588 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1590 call void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1594 define void @vpscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
1595 ; RV32-LABEL: vpscatter_truemask_nxv4f64:
1597 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1598 ; RV32-NEXT: vsoxei32.v v8, (zero), v12
1601 ; RV64-LABEL: vpscatter_truemask_nxv4f64:
1603 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1604 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1606 call void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
1610 declare void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double>, <vscale x 6 x ptr>, <vscale x 6 x i1>, i32)
1612 define void @vpscatter_nxv6f64(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1613 ; RV32-LABEL: vpscatter_nxv6f64:
1615 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1616 ; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t
1619 ; RV64-LABEL: vpscatter_nxv6f64:
1621 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1622 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1624 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1628 define void @vpscatter_baseidx_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1629 ; RV32-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64:
1631 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1632 ; RV32-NEXT: vsext.vf4 v20, v16
1633 ; RV32-NEXT: vsll.vi v16, v20, 3
1634 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1635 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1638 ; RV64-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64:
1640 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1641 ; RV64-NEXT: vsext.vf8 v24, v16
1642 ; RV64-NEXT: vsll.vi v16, v24, 3
1643 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1644 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1646 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i8> %idxs
1647 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1651 define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1652 ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64:
1654 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1655 ; RV32-NEXT: vsext.vf4 v20, v16
1656 ; RV32-NEXT: vsll.vi v16, v20, 3
1657 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1658 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1661 ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64:
1663 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1664 ; RV64-NEXT: vsext.vf8 v24, v16
1665 ; RV64-NEXT: vsll.vi v16, v24, 3
1666 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1667 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1669 %eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
1670 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1671 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1675 define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1676 ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64:
1678 ; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
1679 ; RV32-NEXT: vzext.vf2 v18, v16
1680 ; RV32-NEXT: vsll.vi v16, v18, 3
1681 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1682 ; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t
1685 ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64:
1687 ; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma
1688 ; RV64-NEXT: vzext.vf2 v18, v16
1689 ; RV64-NEXT: vsll.vi v16, v18, 3
1690 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1691 ; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t
1693 %eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
1694 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1695 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1699 define void @vpscatter_baseidx_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1700 ; RV32-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64:
1702 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1703 ; RV32-NEXT: vsext.vf2 v20, v16
1704 ; RV32-NEXT: vsll.vi v16, v20, 3
1705 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1706 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1709 ; RV64-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64:
1711 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1712 ; RV64-NEXT: vsext.vf4 v24, v16
1713 ; RV64-NEXT: vsll.vi v16, v24, 3
1714 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1715 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1717 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i16> %idxs
1718 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1722 define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1723 ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64:
1725 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1726 ; RV32-NEXT: vsext.vf2 v20, v16
1727 ; RV32-NEXT: vsll.vi v16, v20, 3
1728 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1729 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1732 ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64:
1734 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1735 ; RV64-NEXT: vsext.vf4 v24, v16
1736 ; RV64-NEXT: vsll.vi v16, v24, 3
1737 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1738 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1740 %eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
1741 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1742 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1746 define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1747 ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64:
1749 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1750 ; RV32-NEXT: vzext.vf2 v20, v16
1751 ; RV32-NEXT: vsll.vi v16, v20, 3
1752 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1753 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1756 ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64:
1758 ; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1759 ; RV64-NEXT: vzext.vf2 v20, v16
1760 ; RV64-NEXT: vsll.vi v16, v20, 3
1761 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1762 ; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1764 %eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
1765 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1766 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1770 define void @vpscatter_baseidx_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1771 ; RV32-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64:
1773 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1774 ; RV32-NEXT: vsll.vi v16, v16, 3
1775 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1776 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1779 ; RV64-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64:
1781 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1782 ; RV64-NEXT: vsext.vf2 v24, v16
1783 ; RV64-NEXT: vsll.vi v16, v24, 3
1784 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1785 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1787 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i32> %idxs
1788 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1792 define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1793 ; RV32-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64:
1795 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1796 ; RV32-NEXT: vsll.vi v16, v16, 3
1797 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1798 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1801 ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64:
1803 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1804 ; RV64-NEXT: vsext.vf2 v24, v16
1805 ; RV64-NEXT: vsll.vi v16, v24, 3
1806 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1807 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1809 %eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
1810 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1811 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1815 define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1816 ; RV32-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64:
1818 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1819 ; RV32-NEXT: vsll.vi v16, v16, 3
1820 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1821 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1824 ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64:
1826 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1827 ; RV64-NEXT: vzext.vf2 v24, v16
1828 ; RV64-NEXT: vsll.vi v16, v24, 3
1829 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1830 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1832 %eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
1833 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1834 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1838 define void @vpscatter_baseidx_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1839 ; RV32-LABEL: vpscatter_baseidx_nxv6f64:
1841 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1842 ; RV32-NEXT: vnsrl.wi v24, v16, 0
1843 ; RV32-NEXT: vsll.vi v16, v24, 3
1844 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1845 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1848 ; RV64-LABEL: vpscatter_baseidx_nxv6f64:
1850 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1851 ; RV64-NEXT: vsll.vi v16, v16, 3
1852 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1853 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1855 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %idxs
1856 call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1860 declare void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
1862 define void @vpscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1863 ; RV32-LABEL: vpscatter_nxv8f64:
1865 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1866 ; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t
1869 ; RV64-LABEL: vpscatter_nxv8f64:
1871 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1872 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1874 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1878 define void @vpscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1879 ; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64:
1881 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1882 ; RV32-NEXT: vsext.vf4 v20, v16
1883 ; RV32-NEXT: vsll.vi v16, v20, 3
1884 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1885 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1888 ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64:
1890 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1891 ; RV64-NEXT: vsext.vf8 v24, v16
1892 ; RV64-NEXT: vsll.vi v16, v24, 3
1893 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1894 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1896 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
1897 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1901 define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1902 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64:
1904 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1905 ; RV32-NEXT: vsext.vf4 v20, v16
1906 ; RV32-NEXT: vsll.vi v16, v20, 3
1907 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1908 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1911 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64:
1913 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1914 ; RV64-NEXT: vsext.vf8 v24, v16
1915 ; RV64-NEXT: vsll.vi v16, v24, 3
1916 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1917 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1919 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1920 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1921 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1925 define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1926 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64:
1928 ; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
1929 ; RV32-NEXT: vzext.vf2 v18, v16
1930 ; RV32-NEXT: vsll.vi v16, v18, 3
1931 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1932 ; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t
1935 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64:
1937 ; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma
1938 ; RV64-NEXT: vzext.vf2 v18, v16
1939 ; RV64-NEXT: vsll.vi v16, v18, 3
1940 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1941 ; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t
1943 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1944 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1945 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1949 define void @vpscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1950 ; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64:
1952 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1953 ; RV32-NEXT: vsext.vf2 v20, v16
1954 ; RV32-NEXT: vsll.vi v16, v20, 3
1955 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1956 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1959 ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64:
1961 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1962 ; RV64-NEXT: vsext.vf4 v24, v16
1963 ; RV64-NEXT: vsll.vi v16, v24, 3
1964 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1965 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1967 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
1968 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1972 define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1973 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64:
1975 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
1976 ; RV32-NEXT: vsext.vf2 v20, v16
1977 ; RV32-NEXT: vsll.vi v16, v20, 3
1978 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1979 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1982 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64:
1984 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
1985 ; RV64-NEXT: vsext.vf4 v24, v16
1986 ; RV64-NEXT: vsll.vi v16, v24, 3
1987 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1988 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1990 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1991 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1992 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1996 define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1997 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64:
1999 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
2000 ; RV32-NEXT: vzext.vf2 v20, v16
2001 ; RV32-NEXT: vsll.vi v16, v20, 3
2002 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2003 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2006 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64:
2008 ; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
2009 ; RV64-NEXT: vzext.vf2 v20, v16
2010 ; RV64-NEXT: vsll.vi v16, v20, 3
2011 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2012 ; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2014 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
2015 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2016 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2020 define void @vpscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2021 ; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64:
2023 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
2024 ; RV32-NEXT: vsll.vi v16, v16, 3
2025 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2026 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2029 ; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64:
2031 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
2032 ; RV64-NEXT: vsext.vf2 v24, v16
2033 ; RV64-NEXT: vsll.vi v16, v24, 3
2034 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2035 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2037 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
2038 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2042 define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2043 ; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64:
2045 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
2046 ; RV32-NEXT: vsll.vi v16, v16, 3
2047 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2048 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2051 ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64:
2053 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
2054 ; RV64-NEXT: vsext.vf2 v24, v16
2055 ; RV64-NEXT: vsll.vi v16, v24, 3
2056 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2057 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2059 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2060 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2061 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2065 define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2066 ; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64:
2068 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
2069 ; RV32-NEXT: vsll.vi v16, v16, 3
2070 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2071 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2074 ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64:
2076 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
2077 ; RV64-NEXT: vzext.vf2 v24, v16
2078 ; RV64-NEXT: vsll.vi v16, v24, 3
2079 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2080 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2082 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2083 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2084 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2088 define void @vpscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2089 ; RV32-LABEL: vpscatter_baseidx_nxv8f64:
2091 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
2092 ; RV32-NEXT: vnsrl.wi v24, v16, 0
2093 ; RV32-NEXT: vsll.vi v16, v24, 3
2094 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2095 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
2098 ; RV64-LABEL: vpscatter_baseidx_nxv8f64:
2100 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
2101 ; RV64-NEXT: vsll.vi v16, v16, 3
2102 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2103 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2105 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
2106 call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2110 declare void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double>, <vscale x 16 x ptr>, <vscale x 16 x i1>, i32)
2112 define void @vpscatter_nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2113 ; RV32-LABEL: vpscatter_nxv16f64:
2115 ; RV32-NEXT: vl8re32.v v24, (a0)
2116 ; RV32-NEXT: csrr a0, vlenb
2117 ; RV32-NEXT: mv a2, a1
2118 ; RV32-NEXT: bltu a1, a0, .LBB99_2
2119 ; RV32-NEXT: # %bb.1:
2120 ; RV32-NEXT: mv a2, a0
2121 ; RV32-NEXT: .LBB99_2:
2122 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2123 ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t
2124 ; RV32-NEXT: sub a2, a1, a0
2125 ; RV32-NEXT: sltu a1, a1, a2
2126 ; RV32-NEXT: addi a1, a1, -1
2127 ; RV32-NEXT: srli a0, a0, 3
2128 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
2129 ; RV32-NEXT: vslidedown.vx v0, v0, a0
2130 ; RV32-NEXT: and a1, a1, a2
2131 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2132 ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t
2135 ; RV64-LABEL: vpscatter_nxv16f64:
2137 ; RV64-NEXT: addi sp, sp, -16
2138 ; RV64-NEXT: .cfi_def_cfa_offset 16
2139 ; RV64-NEXT: csrr a1, vlenb
2140 ; RV64-NEXT: slli a1, a1, 3
2141 ; RV64-NEXT: sub sp, sp, a1
2142 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2143 ; RV64-NEXT: csrr a1, vlenb
2144 ; RV64-NEXT: slli a3, a1, 3
2145 ; RV64-NEXT: add a3, a0, a3
2146 ; RV64-NEXT: vl8re64.v v24, (a3)
2147 ; RV64-NEXT: addi a3, sp, 16
2148 ; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
2149 ; RV64-NEXT: vl8re64.v v24, (a0)
2150 ; RV64-NEXT: mv a0, a2
2151 ; RV64-NEXT: bltu a2, a1, .LBB99_2
2152 ; RV64-NEXT: # %bb.1:
2153 ; RV64-NEXT: mv a0, a1
2154 ; RV64-NEXT: .LBB99_2:
2155 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2156 ; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t
2157 ; RV64-NEXT: sub a0, a2, a1
2158 ; RV64-NEXT: sltu a2, a2, a0
2159 ; RV64-NEXT: addi a2, a2, -1
2160 ; RV64-NEXT: srli a1, a1, 3
2161 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
2162 ; RV64-NEXT: vslidedown.vx v0, v0, a1
2163 ; RV64-NEXT: and a0, a2, a0
2164 ; RV64-NEXT: addi a1, sp, 16
2165 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
2166 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2167 ; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t
2168 ; RV64-NEXT: csrr a0, vlenb
2169 ; RV64-NEXT: slli a0, a0, 3
2170 ; RV64-NEXT: add sp, sp, a0
2171 ; RV64-NEXT: addi sp, sp, 16
2173 call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2177 define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2178 ; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
2180 ; RV32-NEXT: vl4re16.v v4, (a1)
2181 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2182 ; RV32-NEXT: vsext.vf2 v24, v4
2183 ; RV32-NEXT: csrr a1, vlenb
2184 ; RV32-NEXT: vsll.vi v24, v24, 3
2185 ; RV32-NEXT: mv a3, a2
2186 ; RV32-NEXT: bltu a2, a1, .LBB100_2
2187 ; RV32-NEXT: # %bb.1:
2188 ; RV32-NEXT: mv a3, a1
2189 ; RV32-NEXT: .LBB100_2:
2190 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
2191 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
2192 ; RV32-NEXT: sub a3, a2, a1
2193 ; RV32-NEXT: sltu a2, a2, a3
2194 ; RV32-NEXT: addi a2, a2, -1
2195 ; RV32-NEXT: srli a1, a1, 3
2196 ; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
2197 ; RV32-NEXT: vslidedown.vx v0, v0, a1
2198 ; RV32-NEXT: and a2, a2, a3
2199 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2200 ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
2203 ; RV64-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
2205 ; RV64-NEXT: addi sp, sp, -16
2206 ; RV64-NEXT: .cfi_def_cfa_offset 16
2207 ; RV64-NEXT: csrr a3, vlenb
2208 ; RV64-NEXT: slli a3, a3, 4
2209 ; RV64-NEXT: sub sp, sp, a3
2210 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2211 ; RV64-NEXT: vl4re16.v v24, (a1)
2212 ; RV64-NEXT: csrr a1, vlenb
2213 ; RV64-NEXT: slli a1, a1, 3
2214 ; RV64-NEXT: add a1, sp, a1
2215 ; RV64-NEXT: addi a1, a1, 16
2216 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
2217 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2218 ; RV64-NEXT: vsext.vf4 v16, v26
2219 ; RV64-NEXT: vsll.vi v16, v16, 3
2220 ; RV64-NEXT: addi a1, sp, 16
2221 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
2222 ; RV64-NEXT: vsext.vf4 v16, v24
2223 ; RV64-NEXT: csrr a1, vlenb
2224 ; RV64-NEXT: vsll.vi v24, v16, 3
2225 ; RV64-NEXT: mv a3, a2
2226 ; RV64-NEXT: bltu a2, a1, .LBB100_2
2227 ; RV64-NEXT: # %bb.1:
2228 ; RV64-NEXT: mv a3, a1
2229 ; RV64-NEXT: .LBB100_2:
2230 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
2231 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
2232 ; RV64-NEXT: sub a3, a2, a1
2233 ; RV64-NEXT: sltu a2, a2, a3
2234 ; RV64-NEXT: addi a2, a2, -1
2235 ; RV64-NEXT: srli a1, a1, 3
2236 ; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
2237 ; RV64-NEXT: vslidedown.vx v0, v0, a1
2238 ; RV64-NEXT: and a2, a2, a3
2239 ; RV64-NEXT: csrr a1, vlenb
2240 ; RV64-NEXT: slli a1, a1, 3
2241 ; RV64-NEXT: add a1, sp, a1
2242 ; RV64-NEXT: addi a1, a1, 16
2243 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
2244 ; RV64-NEXT: addi a1, sp, 16
2245 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2246 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2247 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2248 ; RV64-NEXT: csrr a0, vlenb
2249 ; RV64-NEXT: slli a0, a0, 4
2250 ; RV64-NEXT: add sp, sp, a0
2251 ; RV64-NEXT: addi sp, sp, 16
2253 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs
2254 call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2258 define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2259 ; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
2261 ; RV32-NEXT: vl4re16.v v4, (a1)
2262 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2263 ; RV32-NEXT: vsext.vf2 v24, v4
2264 ; RV32-NEXT: csrr a1, vlenb
2265 ; RV32-NEXT: vsll.vi v24, v24, 3
2266 ; RV32-NEXT: mv a3, a2
2267 ; RV32-NEXT: bltu a2, a1, .LBB101_2
2268 ; RV32-NEXT: # %bb.1:
2269 ; RV32-NEXT: mv a3, a1
2270 ; RV32-NEXT: .LBB101_2:
2271 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
2272 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
2273 ; RV32-NEXT: sub a3, a2, a1
2274 ; RV32-NEXT: sltu a2, a2, a3
2275 ; RV32-NEXT: addi a2, a2, -1
2276 ; RV32-NEXT: srli a1, a1, 3
2277 ; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
2278 ; RV32-NEXT: vslidedown.vx v0, v0, a1
2279 ; RV32-NEXT: and a2, a2, a3
2280 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2281 ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
2284 ; RV64-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
2286 ; RV64-NEXT: addi sp, sp, -16
2287 ; RV64-NEXT: .cfi_def_cfa_offset 16
2288 ; RV64-NEXT: csrr a3, vlenb
2289 ; RV64-NEXT: li a4, 10
2290 ; RV64-NEXT: mul a3, a3, a4
2291 ; RV64-NEXT: sub sp, sp, a3
2292 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
2293 ; RV64-NEXT: vl4re16.v v24, (a1)
2294 ; RV64-NEXT: addi a1, sp, 16
2295 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
2296 ; RV64-NEXT: csrr a1, vlenb
2297 ; RV64-NEXT: add a1, sp, a1
2298 ; RV64-NEXT: addi a1, a1, 16
2299 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
2300 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2301 ; RV64-NEXT: vsext.vf4 v0, v24
2302 ; RV64-NEXT: vsext.vf4 v16, v26
2303 ; RV64-NEXT: vsll.vi v16, v16, 3
2304 ; RV64-NEXT: csrr a1, vlenb
2305 ; RV64-NEXT: vsll.vi v24, v0, 3
2306 ; RV64-NEXT: mv a3, a2
2307 ; RV64-NEXT: bltu a2, a1, .LBB101_2
2308 ; RV64-NEXT: # %bb.1:
2309 ; RV64-NEXT: mv a3, a1
2310 ; RV64-NEXT: .LBB101_2:
2311 ; RV64-NEXT: addi a4, sp, 16
2312 ; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload
2313 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
2314 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
2315 ; RV64-NEXT: sub a3, a2, a1
2316 ; RV64-NEXT: sltu a2, a2, a3
2317 ; RV64-NEXT: addi a2, a2, -1
2318 ; RV64-NEXT: srli a1, a1, 3
2319 ; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
2320 ; RV64-NEXT: vslidedown.vx v0, v0, a1
2321 ; RV64-NEXT: and a2, a2, a3
2322 ; RV64-NEXT: csrr a1, vlenb
2323 ; RV64-NEXT: add a1, sp, a1
2324 ; RV64-NEXT: addi a1, a1, 16
2325 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
2326 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2327 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2328 ; RV64-NEXT: csrr a0, vlenb
2329 ; RV64-NEXT: li a1, 10
2330 ; RV64-NEXT: mul a0, a0, a1
2331 ; RV64-NEXT: add sp, sp, a0
2332 ; RV64-NEXT: addi sp, sp, 16
2334 %eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
2335 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs
2336 call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2340 define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2341 ; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
2343 ; RV32-NEXT: vl4re16.v v4, (a1)
2344 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2345 ; RV32-NEXT: vzext.vf2 v24, v4
2346 ; RV32-NEXT: csrr a1, vlenb
2347 ; RV32-NEXT: vsll.vi v24, v24, 3
2348 ; RV32-NEXT: mv a3, a2
2349 ; RV32-NEXT: bltu a2, a1, .LBB102_2
2350 ; RV32-NEXT: # %bb.1:
2351 ; RV32-NEXT: mv a3, a1
2352 ; RV32-NEXT: .LBB102_2:
2353 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
2354 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
2355 ; RV32-NEXT: sub a3, a2, a1
2356 ; RV32-NEXT: sltu a2, a2, a3
2357 ; RV32-NEXT: addi a2, a2, -1
2358 ; RV32-NEXT: srli a1, a1, 3
2359 ; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
2360 ; RV32-NEXT: vslidedown.vx v0, v0, a1
2361 ; RV32-NEXT: and a2, a2, a3
2362 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2363 ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
2366 ; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
2368 ; RV64-NEXT: vl4re16.v v4, (a1)
2369 ; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2370 ; RV64-NEXT: vzext.vf2 v24, v4
2371 ; RV64-NEXT: csrr a1, vlenb
2372 ; RV64-NEXT: vsll.vi v24, v24, 3
2373 ; RV64-NEXT: mv a3, a2
2374 ; RV64-NEXT: bltu a2, a1, .LBB102_2
2375 ; RV64-NEXT: # %bb.1:
2376 ; RV64-NEXT: mv a3, a1
2377 ; RV64-NEXT: .LBB102_2:
2378 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
2379 ; RV64-NEXT: vsoxei32.v v8, (a0), v24, v0.t
2380 ; RV64-NEXT: sub a3, a2, a1
2381 ; RV64-NEXT: sltu a2, a2, a3
2382 ; RV64-NEXT: addi a2, a2, -1
2383 ; RV64-NEXT: srli a1, a1, 3
2384 ; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
2385 ; RV64-NEXT: vslidedown.vx v0, v0, a1
2386 ; RV64-NEXT: and a2, a2, a3
2387 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2388 ; RV64-NEXT: vsoxei32.v v16, (a0), v28, v0.t
2390 %eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
2391 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs
2392 call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)