1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zvfbfmin,+v \
3 ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
4 ; RUN: --check-prefixes=CHECK,RV32
5 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zvfbfmin,+v \
6 ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
7 ; RUN: --check-prefixes=CHECK,RV64
8 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfhmin,+zvfbfmin,+v \
9 ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
10 ; RUN: --check-prefixes=CHECK,RV32
11 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfhmin,+zvfbfmin,+v \
12 ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
13 ; RUN: --check-prefixes=CHECK,RV64
15 declare void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
17 define void @mscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
18 ; RV32-LABEL: mscatter_nxv1i8:
20 ; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
21 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
24 ; RV64-LABEL: mscatter_nxv1i8:
26 ; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
27 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
29 call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m)
33 declare void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
35 define void @mscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
36 ; RV32-LABEL: mscatter_nxv2i8:
38 ; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
39 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
42 ; RV64-LABEL: mscatter_nxv2i8:
44 ; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
45 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
47 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
51 define void @mscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
52 ; RV32-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
54 ; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
55 ; RV32-NEXT: vnsrl.wi v8, v8, 0
56 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
59 ; RV64-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
61 ; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
62 ; RV64-NEXT: vnsrl.wi v8, v8, 0
63 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
65 %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8>
66 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
70 define void @mscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
71 ; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
73 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
74 ; RV32-NEXT: vnsrl.wi v8, v8, 0
75 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
76 ; RV32-NEXT: vnsrl.wi v8, v8, 0
77 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
80 ; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
82 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
83 ; RV64-NEXT: vnsrl.wi v8, v8, 0
84 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
85 ; RV64-NEXT: vnsrl.wi v8, v8, 0
86 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
88 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
89 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
93 define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
94 ; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
96 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
97 ; RV32-NEXT: vnsrl.wi v11, v8, 0
98 ; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
99 ; RV32-NEXT: vnsrl.wi v8, v11, 0
100 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
101 ; RV32-NEXT: vnsrl.wi v8, v8, 0
102 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
105 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
107 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
108 ; RV64-NEXT: vnsrl.wi v12, v8, 0
109 ; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
110 ; RV64-NEXT: vnsrl.wi v8, v12, 0
111 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
112 ; RV64-NEXT: vnsrl.wi v8, v8, 0
113 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
115 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
116 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
120 declare void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
122 define void @mscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
123 ; RV32-LABEL: mscatter_nxv4i8:
125 ; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
126 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
129 ; RV64-LABEL: mscatter_nxv4i8:
131 ; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
132 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
134 call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> %m)
138 define void @mscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs) {
139 ; RV32-LABEL: mscatter_truemask_nxv4i8:
141 ; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
142 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
145 ; RV64-LABEL: mscatter_truemask_nxv4i8:
147 ; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
148 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
150 call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> splat (i1 1))
154 define void @mscatter_falsemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs) {
155 ; CHECK-LABEL: mscatter_falsemask_nxv4i8:
158 call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> zeroinitializer)
162 declare void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
164 define void @mscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
165 ; RV32-LABEL: mscatter_nxv8i8:
167 ; RV32-NEXT: vsetvli a0, zero, e8, m1, ta, ma
168 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
171 ; RV64-LABEL: mscatter_nxv8i8:
173 ; RV64-NEXT: vsetvli a0, zero, e8, m1, ta, ma
174 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
176 call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, i32 1, <vscale x 8 x i1> %m)
180 define void @mscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
181 ; RV32-LABEL: mscatter_baseidx_nxv8i8:
183 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
184 ; RV32-NEXT: vsext.vf4 v12, v9
185 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
186 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
189 ; RV64-LABEL: mscatter_baseidx_nxv8i8:
191 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
192 ; RV64-NEXT: vsext.vf8 v16, v9
193 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
194 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
196 %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
197 call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, i32 1, <vscale x 8 x i1> %m)
201 declare void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
203 define void @mscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
204 ; RV32-LABEL: mscatter_nxv1i16:
206 ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
207 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
210 ; RV64-LABEL: mscatter_nxv1i16:
212 ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
213 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
215 call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m)
219 declare void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
221 define void @mscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
222 ; RV32-LABEL: mscatter_nxv2i16:
224 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
225 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
228 ; RV64-LABEL: mscatter_nxv2i16:
230 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
231 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
233 call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
237 define void @mscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
238 ; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
240 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
241 ; RV32-NEXT: vnsrl.wi v8, v8, 0
242 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
245 ; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
247 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
248 ; RV64-NEXT: vnsrl.wi v8, v8, 0
249 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
251 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16>
252 call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
256 define void @mscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
257 ; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
259 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
260 ; RV32-NEXT: vnsrl.wi v11, v8, 0
261 ; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
262 ; RV32-NEXT: vnsrl.wi v8, v11, 0
263 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
266 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
268 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
269 ; RV64-NEXT: vnsrl.wi v12, v8, 0
270 ; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
271 ; RV64-NEXT: vnsrl.wi v8, v12, 0
272 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
274 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
275 call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
279 declare void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
281 define void @mscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
282 ; RV32-LABEL: mscatter_nxv4i16:
284 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
285 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
288 ; RV64-LABEL: mscatter_nxv4i16:
290 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
291 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
293 call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m)
297 define void @mscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs) {
298 ; RV32-LABEL: mscatter_truemask_nxv4i16:
300 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
301 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
304 ; RV64-LABEL: mscatter_truemask_nxv4i16:
306 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
307 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
309 call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1))
313 define void @mscatter_falsemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs) {
314 ; CHECK-LABEL: mscatter_falsemask_nxv4i16:
317 call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
321 declare void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
323 define void @mscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
324 ; RV32-LABEL: mscatter_nxv8i16:
326 ; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma
327 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
330 ; RV64-LABEL: mscatter_nxv8i16:
332 ; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma
333 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
335 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
339 define void @mscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
340 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i16:
342 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
343 ; RV32-NEXT: vsext.vf4 v12, v10
344 ; RV32-NEXT: vadd.vv v12, v12, v12
345 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
346 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
349 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i16:
351 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
352 ; RV64-NEXT: vsext.vf8 v16, v10
353 ; RV64-NEXT: vadd.vv v16, v16, v16
354 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
355 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
357 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
358 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
362 define void @mscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
363 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16:
365 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
366 ; RV32-NEXT: vsext.vf4 v12, v10
367 ; RV32-NEXT: vadd.vv v12, v12, v12
368 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
369 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
372 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16:
374 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
375 ; RV64-NEXT: vsext.vf8 v16, v10
376 ; RV64-NEXT: vadd.vv v16, v16, v16
377 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
378 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
380 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
381 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
382 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
386 define void @mscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
387 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16:
389 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
390 ; CHECK-NEXT: vwaddu.vv v12, v10, v10
391 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
392 ; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t
394 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
395 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
396 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
400 define void @mscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
401 ; RV32-LABEL: mscatter_baseidx_nxv8i16:
403 ; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
404 ; RV32-NEXT: vwadd.vv v12, v10, v10
405 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
408 ; RV64-LABEL: mscatter_baseidx_nxv8i16:
410 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
411 ; RV64-NEXT: vsext.vf4 v16, v10
412 ; RV64-NEXT: vadd.vv v16, v16, v16
413 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
414 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
416 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
417 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
421 declare void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
423 define void @mscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
424 ; RV32-LABEL: mscatter_nxv1i32:
426 ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
427 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
430 ; RV64-LABEL: mscatter_nxv1i32:
432 ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
433 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
435 call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, i32 4, <vscale x 1 x i1> %m)
439 declare void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
441 define void @mscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
442 ; RV32-LABEL: mscatter_nxv2i32:
444 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
445 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
448 ; RV64-LABEL: mscatter_nxv2i32:
450 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
451 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
453 call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m)
457 define void @mscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
458 ; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
460 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
461 ; RV32-NEXT: vnsrl.wi v11, v8, 0
462 ; RV32-NEXT: vsoxei32.v v11, (zero), v10, v0.t
465 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
467 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
468 ; RV64-NEXT: vnsrl.wi v12, v8, 0
469 ; RV64-NEXT: vsoxei64.v v12, (zero), v10, v0.t
471 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
472 call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %tval, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m)
476 declare void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
478 define void @mscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
479 ; RV32-LABEL: mscatter_nxv4i32:
481 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
482 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
485 ; RV64-LABEL: mscatter_nxv4i32:
487 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
488 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
490 call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %m)
494 define void @mscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs) {
495 ; RV32-LABEL: mscatter_truemask_nxv4i32:
497 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
498 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
501 ; RV64-LABEL: mscatter_truemask_nxv4i32:
503 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
504 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
506 call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> splat (i1 1))
510 define void @mscatter_falsemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs) {
511 ; CHECK-LABEL: mscatter_falsemask_nxv4i32:
514 call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer)
518 declare void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
520 define void @mscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
521 ; RV32-LABEL: mscatter_nxv8i32:
523 ; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
524 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
527 ; RV64-LABEL: mscatter_nxv8i32:
529 ; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma
530 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
532 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
536 define void @mscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
537 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i32:
539 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
540 ; RV32-NEXT: vsext.vf4 v16, v12
541 ; RV32-NEXT: vsll.vi v12, v16, 2
542 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
545 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i32:
547 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
548 ; RV64-NEXT: vsext.vf8 v16, v12
549 ; RV64-NEXT: vsll.vi v16, v16, 2
550 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
551 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
553 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
554 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
558 define void @mscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
559 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32:
561 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
562 ; RV32-NEXT: vsext.vf4 v16, v12
563 ; RV32-NEXT: vsll.vi v12, v16, 2
564 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
567 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32:
569 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
570 ; RV64-NEXT: vsext.vf8 v16, v12
571 ; RV64-NEXT: vsll.vi v16, v16, 2
572 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
573 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
575 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
576 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
577 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
581 define void @mscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
582 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32:
584 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
585 ; CHECK-NEXT: vzext.vf2 v14, v12
586 ; CHECK-NEXT: vsll.vi v12, v14, 2
587 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
588 ; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t
590 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
591 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
592 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
596 define void @mscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
597 ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i32:
599 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
600 ; RV32-NEXT: vsext.vf2 v16, v12
601 ; RV32-NEXT: vsll.vi v12, v16, 2
602 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
605 ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i32:
607 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
608 ; RV64-NEXT: vsext.vf4 v16, v12
609 ; RV64-NEXT: vsll.vi v16, v16, 2
610 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
611 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
613 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
614 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
618 define void @mscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
619 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32:
621 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
622 ; RV32-NEXT: vsext.vf2 v16, v12
623 ; RV32-NEXT: vsll.vi v12, v16, 2
624 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
627 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32:
629 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
630 ; RV64-NEXT: vsext.vf4 v16, v12
631 ; RV64-NEXT: vsll.vi v16, v16, 2
632 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
633 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
635 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
636 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
637 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
641 define void @mscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
642 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32:
644 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
645 ; CHECK-NEXT: vzext.vf2 v16, v12
646 ; CHECK-NEXT: vsll.vi v12, v16, 2
647 ; CHECK-NEXT: vsoxei32.v v8, (a0), v12, v0.t
649 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
650 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
651 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
655 define void @mscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
656 ; RV32-LABEL: mscatter_baseidx_nxv8i32:
658 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
659 ; RV32-NEXT: vsll.vi v12, v12, 2
660 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
663 ; RV64-LABEL: mscatter_baseidx_nxv8i32:
665 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
666 ; RV64-NEXT: vsext.vf2 v16, v12
667 ; RV64-NEXT: vsll.vi v16, v16, 2
668 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
669 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
671 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
672 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
676 declare void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
678 define void @mscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
679 ; RV32-LABEL: mscatter_nxv1i64:
681 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
682 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
685 ; RV64-LABEL: mscatter_nxv1i64:
687 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
688 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
690 call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, i32 8, <vscale x 1 x i1> %m)
694 declare void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
696 define void @mscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
697 ; RV32-LABEL: mscatter_nxv2i64:
699 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
700 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
703 ; RV64-LABEL: mscatter_nxv2i64:
705 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
706 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
708 call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %m)
712 declare void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
714 define void @mscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
715 ; RV32-LABEL: mscatter_nxv4i64:
717 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
718 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
721 ; RV64-LABEL: mscatter_nxv4i64:
723 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
724 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
726 call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %m)
730 define void @mscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs) {
731 ; RV32-LABEL: mscatter_truemask_nxv4i64:
733 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
734 ; RV32-NEXT: vsoxei32.v v8, (zero), v12
737 ; RV64-LABEL: mscatter_truemask_nxv4i64:
739 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
740 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
742 call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> splat (i1 1))
746 define void @mscatter_falsemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs) {
747 ; CHECK-LABEL: mscatter_falsemask_nxv4i64:
750 call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer)
754 declare void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
756 define void @mscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
757 ; RV32-LABEL: mscatter_nxv8i64:
759 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
760 ; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t
763 ; RV64-LABEL: mscatter_nxv8i64:
765 ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
766 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
768 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
772 define void @mscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
773 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i64:
775 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
776 ; RV32-NEXT: vsext.vf4 v20, v16
777 ; RV32-NEXT: vsll.vi v16, v20, 3
778 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
779 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
782 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i64:
784 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
785 ; RV64-NEXT: vsext.vf8 v24, v16
786 ; RV64-NEXT: vsll.vi v16, v24, 3
787 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
789 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
790 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
794 define void @mscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
795 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64:
797 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
798 ; RV32-NEXT: vsext.vf4 v20, v16
799 ; RV32-NEXT: vsll.vi v16, v20, 3
800 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
801 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
804 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64:
806 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
807 ; RV64-NEXT: vsext.vf8 v24, v16
808 ; RV64-NEXT: vsll.vi v16, v24, 3
809 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
811 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
812 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
813 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
817 define void @mscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
818 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64:
820 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
821 ; CHECK-NEXT: vzext.vf2 v18, v16
822 ; CHECK-NEXT: vsll.vi v16, v18, 3
823 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
824 ; CHECK-NEXT: vsoxei16.v v8, (a0), v16, v0.t
826 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
827 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
828 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
832 define void @mscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
833 ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i64:
835 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
836 ; RV32-NEXT: vsext.vf2 v20, v16
837 ; RV32-NEXT: vsll.vi v16, v20, 3
838 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
839 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
842 ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i64:
844 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
845 ; RV64-NEXT: vsext.vf4 v24, v16
846 ; RV64-NEXT: vsll.vi v16, v24, 3
847 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
849 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
850 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
854 define void @mscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
855 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64:
857 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
858 ; RV32-NEXT: vsext.vf2 v20, v16
859 ; RV32-NEXT: vsll.vi v16, v20, 3
860 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
861 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
864 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64:
866 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
867 ; RV64-NEXT: vsext.vf4 v24, v16
868 ; RV64-NEXT: vsll.vi v16, v24, 3
869 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
871 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
872 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
873 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
877 define void @mscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
878 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64:
880 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
881 ; CHECK-NEXT: vzext.vf2 v20, v16
882 ; CHECK-NEXT: vsll.vi v16, v20, 3
883 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
884 ; CHECK-NEXT: vsoxei32.v v8, (a0), v16, v0.t
886 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
887 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
888 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
892 define void @mscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
893 ; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8i64:
895 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
896 ; RV32-NEXT: vsll.vi v16, v16, 3
897 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
898 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
901 ; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8i64:
903 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
904 ; RV64-NEXT: vsext.vf2 v24, v16
905 ; RV64-NEXT: vsll.vi v16, v24, 3
906 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
908 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
909 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
913 define void @mscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
914 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64:
916 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
917 ; RV32-NEXT: vsll.vi v16, v16, 3
918 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
919 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
922 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64:
924 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
925 ; RV64-NEXT: vsext.vf2 v24, v16
926 ; RV64-NEXT: vsll.vi v16, v24, 3
927 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
929 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
930 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
931 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
935 define void @mscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
936 ; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64:
938 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
939 ; RV32-NEXT: vsll.vi v16, v16, 3
940 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
941 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
944 ; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64:
946 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
947 ; RV64-NEXT: vzext.vf2 v24, v16
948 ; RV64-NEXT: vsll.vi v16, v24, 3
949 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
951 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
952 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
953 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
957 define void @mscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) {
958 ; RV32-LABEL: mscatter_baseidx_nxv8i64:
960 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
961 ; RV32-NEXT: vnsrl.wi v24, v16, 0
962 ; RV32-NEXT: vsll.vi v16, v24, 3
963 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
964 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
967 ; RV64-LABEL: mscatter_baseidx_nxv8i64:
969 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
970 ; RV64-NEXT: vsll.vi v16, v16, 3
971 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
973 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
974 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
978 declare void @llvm.masked.scatter.nxv1bf16.nxv1p0(<vscale x 1 x bfloat>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
980 define void @mscatter_nxv1bf16(<vscale x 1 x bfloat> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
981 ; RV32-LABEL: mscatter_nxv1bf16:
983 ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
984 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
987 ; RV64-LABEL: mscatter_nxv1bf16:
989 ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
990 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
992 call void @llvm.masked.scatter.nxv1bf16.nxv1p0(<vscale x 1 x bfloat> %val, <vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m)
996 declare void @llvm.masked.scatter.nxv2bf16.nxv2p0(<vscale x 2 x bfloat>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
998 define void @mscatter_nxv2bf16(<vscale x 2 x bfloat> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
999 ; RV32-LABEL: mscatter_nxv2bf16:
1001 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
1002 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1005 ; RV64-LABEL: mscatter_nxv2bf16:
1007 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
1008 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1010 call void @llvm.masked.scatter.nxv2bf16.nxv2p0(<vscale x 2 x bfloat> %val, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
1014 declare void @llvm.masked.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
1016 define void @mscatter_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
1017 ; RV32-LABEL: mscatter_nxv4bf16:
1019 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1020 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1023 ; RV64-LABEL: mscatter_nxv4bf16:
1025 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1026 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1028 call void @llvm.masked.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m)
1032 define void @mscatter_truemask_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs) {
1033 ; RV32-LABEL: mscatter_truemask_nxv4bf16:
1035 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1036 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
1039 ; RV64-LABEL: mscatter_truemask_nxv4bf16:
1041 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1042 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1044 call void @llvm.masked.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1))
1048 define void @mscatter_falsemask_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs) {
1049 ; CHECK-LABEL: mscatter_falsemask_nxv4bf16:
1052 call void @llvm.masked.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
1056 declare void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
1058 define void @mscatter_nxv8bf16(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
1059 ; RV32-LABEL: mscatter_nxv8bf16:
1061 ; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma
1062 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1065 ; RV64-LABEL: mscatter_nxv8bf16:
1067 ; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma
1068 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1070 call void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1074 define void @mscatter_baseidx_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1075 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8bf16:
1077 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1078 ; RV32-NEXT: vsext.vf4 v12, v10
1079 ; RV32-NEXT: vadd.vv v12, v12, v12
1080 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1081 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1084 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8bf16:
1086 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1087 ; RV64-NEXT: vsext.vf8 v16, v10
1088 ; RV64-NEXT: vadd.vv v16, v16, v16
1089 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1090 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1092 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i8> %idxs
1093 call void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1097 define void @mscatter_baseidx_sext_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1098 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8bf16:
1100 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1101 ; RV32-NEXT: vsext.vf4 v12, v10
1102 ; RV32-NEXT: vadd.vv v12, v12, v12
1103 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1104 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1107 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8bf16:
1109 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1110 ; RV64-NEXT: vsext.vf8 v16, v10
1111 ; RV64-NEXT: vadd.vv v16, v16, v16
1112 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1113 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1115 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1116 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
1117 call void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1121 define void @mscatter_baseidx_zext_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1122 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8bf16:
1124 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
1125 ; CHECK-NEXT: vwaddu.vv v12, v10, v10
1126 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1127 ; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1129 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1130 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
1131 call void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1135 define void @mscatter_baseidx_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1136 ; RV32-LABEL: mscatter_baseidx_nxv8bf16:
1138 ; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1139 ; RV32-NEXT: vwadd.vv v12, v10, v10
1140 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1143 ; RV64-LABEL: mscatter_baseidx_nxv8bf16:
1145 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1146 ; RV64-NEXT: vsext.vf4 v16, v10
1147 ; RV64-NEXT: vadd.vv v16, v16, v16
1148 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1149 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1151 %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %idxs
1152 call void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1156 declare void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
1158 define void @mscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
1159 ; RV32-LABEL: mscatter_nxv1f16:
1161 ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
1162 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1165 ; RV64-LABEL: mscatter_nxv1f16:
1167 ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
1168 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1170 call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m)
1174 declare void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
1176 define void @mscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
1177 ; RV32-LABEL: mscatter_nxv2f16:
1179 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
1180 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1183 ; RV64-LABEL: mscatter_nxv2f16:
1185 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
1186 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1188 call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
1192 declare void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
1194 define void @mscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
1195 ; RV32-LABEL: mscatter_nxv4f16:
1197 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1198 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1201 ; RV64-LABEL: mscatter_nxv4f16:
1203 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1204 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1206 call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m)
1210 define void @mscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs) {
1211 ; RV32-LABEL: mscatter_truemask_nxv4f16:
1213 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1214 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
1217 ; RV64-LABEL: mscatter_truemask_nxv4f16:
1219 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1220 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1222 call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1))
1226 define void @mscatter_falsemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs) {
1227 ; CHECK-LABEL: mscatter_falsemask_nxv4f16:
1230 call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
1234 declare void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
1236 define void @mscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
1237 ; RV32-LABEL: mscatter_nxv8f16:
1239 ; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma
1240 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1243 ; RV64-LABEL: mscatter_nxv8f16:
1245 ; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma
1246 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1248 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1252 define void @mscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1253 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f16:
1255 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1256 ; RV32-NEXT: vsext.vf4 v12, v10
1257 ; RV32-NEXT: vadd.vv v12, v12, v12
1258 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1259 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1262 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f16:
1264 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1265 ; RV64-NEXT: vsext.vf8 v16, v10
1266 ; RV64-NEXT: vadd.vv v16, v16, v16
1267 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1268 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1270 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
1271 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1275 define void @mscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1276 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16:
1278 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1279 ; RV32-NEXT: vsext.vf4 v12, v10
1280 ; RV32-NEXT: vadd.vv v12, v12, v12
1281 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1282 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1285 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16:
1287 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1288 ; RV64-NEXT: vsext.vf8 v16, v10
1289 ; RV64-NEXT: vadd.vv v16, v16, v16
1290 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1291 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1293 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1294 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1295 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1299 define void @mscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1300 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16:
1302 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
1303 ; CHECK-NEXT: vwaddu.vv v12, v10, v10
1304 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1305 ; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1307 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1308 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1309 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1313 define void @mscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1314 ; RV32-LABEL: mscatter_baseidx_nxv8f16:
1316 ; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1317 ; RV32-NEXT: vwadd.vv v12, v10, v10
1318 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1321 ; RV64-LABEL: mscatter_baseidx_nxv8f16:
1323 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1324 ; RV64-NEXT: vsext.vf4 v16, v10
1325 ; RV64-NEXT: vadd.vv v16, v16, v16
1326 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1327 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1329 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
1330 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1334 declare void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
1336 define void @mscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
1337 ; RV32-LABEL: mscatter_nxv1f32:
1339 ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
1340 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1343 ; RV64-LABEL: mscatter_nxv1f32:
1345 ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
1346 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1348 call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, i32 4, <vscale x 1 x i1> %m)
1352 declare void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
1354 define void @mscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
1355 ; RV32-LABEL: mscatter_nxv2f32:
1357 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1358 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1361 ; RV64-LABEL: mscatter_nxv2f32:
1363 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1364 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1366 call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m)
1370 declare void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
1372 define void @mscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
1373 ; RV32-LABEL: mscatter_nxv4f32:
1375 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1376 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1379 ; RV64-LABEL: mscatter_nxv4f32:
1381 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1382 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1384 call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %m)
1388 define void @mscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs) {
1389 ; RV32-LABEL: mscatter_truemask_nxv4f32:
1391 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1392 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
1395 ; RV64-LABEL: mscatter_truemask_nxv4f32:
1397 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1398 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1400 call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> splat (i1 1))
1404 define void @mscatter_falsemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs) {
1405 ; CHECK-LABEL: mscatter_falsemask_nxv4f32:
1408 call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer)
1412 declare void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
1414 define void @mscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
1415 ; RV32-LABEL: mscatter_nxv8f32:
1417 ; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
1418 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1421 ; RV64-LABEL: mscatter_nxv8f32:
1423 ; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma
1424 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1426 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1430 define void @mscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1431 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f32:
1433 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1434 ; RV32-NEXT: vsext.vf4 v16, v12
1435 ; RV32-NEXT: vsll.vi v12, v16, 2
1436 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1439 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f32:
1441 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1442 ; RV64-NEXT: vsext.vf8 v16, v12
1443 ; RV64-NEXT: vsll.vi v16, v16, 2
1444 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1445 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1447 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
1448 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1452 define void @mscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1453 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32:
1455 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1456 ; RV32-NEXT: vsext.vf4 v16, v12
1457 ; RV32-NEXT: vsll.vi v12, v16, 2
1458 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1461 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32:
1463 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1464 ; RV64-NEXT: vsext.vf8 v16, v12
1465 ; RV64-NEXT: vsll.vi v16, v16, 2
1466 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1467 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1469 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1470 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1471 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1475 define void @mscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1476 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32:
1478 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1479 ; CHECK-NEXT: vzext.vf2 v14, v12
1480 ; CHECK-NEXT: vsll.vi v12, v14, 2
1481 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1482 ; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1484 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1485 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1486 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1490 define void @mscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1491 ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f32:
1493 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1494 ; RV32-NEXT: vsext.vf2 v16, v12
1495 ; RV32-NEXT: vsll.vi v12, v16, 2
1496 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1499 ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f32:
1501 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1502 ; RV64-NEXT: vsext.vf4 v16, v12
1503 ; RV64-NEXT: vsll.vi v16, v16, 2
1504 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1505 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1507 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
1508 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1512 define void @mscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1513 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32:
1515 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1516 ; RV32-NEXT: vsext.vf2 v16, v12
1517 ; RV32-NEXT: vsll.vi v12, v16, 2
1518 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1521 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32:
1523 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1524 ; RV64-NEXT: vsext.vf4 v16, v12
1525 ; RV64-NEXT: vsll.vi v16, v16, 2
1526 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1527 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1529 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1530 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1531 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1535 define void @mscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1536 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32:
1538 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1539 ; CHECK-NEXT: vzext.vf2 v16, v12
1540 ; CHECK-NEXT: vsll.vi v12, v16, 2
1541 ; CHECK-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1543 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1544 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1545 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1549 define void @mscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1550 ; RV32-LABEL: mscatter_baseidx_nxv8f32:
1552 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1553 ; RV32-NEXT: vsll.vi v12, v12, 2
1554 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1557 ; RV64-LABEL: mscatter_baseidx_nxv8f32:
1559 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1560 ; RV64-NEXT: vsext.vf2 v16, v12
1561 ; RV64-NEXT: vsll.vi v16, v16, 2
1562 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1563 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1565 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
1566 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1570 declare void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
1572 define void @mscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
1573 ; RV32-LABEL: mscatter_nxv1f64:
1575 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1576 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1579 ; RV64-LABEL: mscatter_nxv1f64:
1581 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1582 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1584 call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, i32 8, <vscale x 1 x i1> %m)
1588 declare void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
1590 define void @mscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
1591 ; RV32-LABEL: mscatter_nxv2f64:
1593 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1594 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1597 ; RV64-LABEL: mscatter_nxv2f64:
1599 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1600 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1602 call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %m)
1606 declare void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
1608 define void @mscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
1609 ; RV32-LABEL: mscatter_nxv4f64:
1611 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1612 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1615 ; RV64-LABEL: mscatter_nxv4f64:
1617 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1618 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1620 call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %m)
1624 define void @mscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs) {
1625 ; RV32-LABEL: mscatter_truemask_nxv4f64:
1627 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1628 ; RV32-NEXT: vsoxei32.v v8, (zero), v12
1631 ; RV64-LABEL: mscatter_truemask_nxv4f64:
1633 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1634 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1636 call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> splat (i1 1))
1640 define void @mscatter_falsemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs) {
1641 ; CHECK-LABEL: mscatter_falsemask_nxv4f64:
1644 call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer)
1648 declare void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
1650 define void @mscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
1651 ; RV32-LABEL: mscatter_nxv8f64:
1653 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1654 ; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t
1657 ; RV64-LABEL: mscatter_nxv8f64:
1659 ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1660 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1662 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1666 define void @mscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1667 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f64:
1669 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1670 ; RV32-NEXT: vsext.vf4 v20, v16
1671 ; RV32-NEXT: vsll.vi v16, v20, 3
1672 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1673 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1676 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f64:
1678 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1679 ; RV64-NEXT: vsext.vf8 v24, v16
1680 ; RV64-NEXT: vsll.vi v16, v24, 3
1681 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1683 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
1684 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1688 define void @mscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1689 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64:
1691 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1692 ; RV32-NEXT: vsext.vf4 v20, v16
1693 ; RV32-NEXT: vsll.vi v16, v20, 3
1694 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1695 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1698 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64:
1700 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1701 ; RV64-NEXT: vsext.vf8 v24, v16
1702 ; RV64-NEXT: vsll.vi v16, v24, 3
1703 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1705 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1706 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1707 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1711 define void @mscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1712 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64:
1714 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1715 ; CHECK-NEXT: vzext.vf2 v18, v16
1716 ; CHECK-NEXT: vsll.vi v16, v18, 3
1717 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1718 ; CHECK-NEXT: vsoxei16.v v8, (a0), v16, v0.t
1720 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1721 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1722 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1726 define void @mscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1727 ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f64:
1729 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1730 ; RV32-NEXT: vsext.vf2 v20, v16
1731 ; RV32-NEXT: vsll.vi v16, v20, 3
1732 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1733 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1736 ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f64:
1738 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1739 ; RV64-NEXT: vsext.vf4 v24, v16
1740 ; RV64-NEXT: vsll.vi v16, v24, 3
1741 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1743 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
1744 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1748 define void @mscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1749 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64:
1751 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1752 ; RV32-NEXT: vsext.vf2 v20, v16
1753 ; RV32-NEXT: vsll.vi v16, v20, 3
1754 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1755 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1758 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64:
1760 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1761 ; RV64-NEXT: vsext.vf4 v24, v16
1762 ; RV64-NEXT: vsll.vi v16, v24, 3
1763 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1765 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1766 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1767 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1771 define void @mscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1772 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64:
1774 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1775 ; CHECK-NEXT: vzext.vf2 v20, v16
1776 ; CHECK-NEXT: vsll.vi v16, v20, 3
1777 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1778 ; CHECK-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1780 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1781 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1782 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1786 define void @mscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1787 ; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8f64:
1789 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1790 ; RV32-NEXT: vsll.vi v16, v16, 3
1791 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1792 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1795 ; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8f64:
1797 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1798 ; RV64-NEXT: vsext.vf2 v24, v16
1799 ; RV64-NEXT: vsll.vi v16, v24, 3
1800 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1802 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
1803 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1807 define void @mscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1808 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64:
1810 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1811 ; RV32-NEXT: vsll.vi v16, v16, 3
1812 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1813 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1816 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64:
1818 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1819 ; RV64-NEXT: vsext.vf2 v24, v16
1820 ; RV64-NEXT: vsll.vi v16, v24, 3
1821 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1823 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1824 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1825 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1829 define void @mscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1830 ; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64:
1832 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1833 ; RV32-NEXT: vsll.vi v16, v16, 3
1834 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1835 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1838 ; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64:
1840 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1841 ; RV64-NEXT: vzext.vf2 v24, v16
1842 ; RV64-NEXT: vsll.vi v16, v24, 3
1843 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1845 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1846 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1847 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1851 define void @mscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) {
1852 ; RV32-LABEL: mscatter_baseidx_nxv8f64:
1854 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1855 ; RV32-NEXT: vnsrl.wi v24, v16, 0
1856 ; RV32-NEXT: vsll.vi v16, v24, 3
1857 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1858 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1861 ; RV64-LABEL: mscatter_baseidx_nxv8f64:
1863 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1864 ; RV64-NEXT: vsll.vi v16, v16, 3
1865 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1867 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
1868 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1872 declare void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
1874 declare <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double>, <vscale x 8 x double>, i64)
1875 declare <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr>, <vscale x 8 x ptr>, i64)
1877 define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, <vscale x 8 x ptr> %ptrs0, <vscale x 8 x ptr> %ptrs1, <vscale x 16 x i1> %m) {
1878 ; RV32-LABEL: mscatter_nxv16f64:
1880 ; RV32-NEXT: vl4re32.v v28, (a1)
1881 ; RV32-NEXT: vl4re32.v v4, (a0)
1882 ; RV32-NEXT: csrr a0, vlenb
1883 ; RV32-NEXT: srli a0, a0, 3
1884 ; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
1885 ; RV32-NEXT: vslidedown.vx v24, v0, a0
1886 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1887 ; RV32-NEXT: vsoxei32.v v8, (zero), v4, v0.t
1888 ; RV32-NEXT: vmv1r.v v0, v24
1889 ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t
1892 ; RV64-LABEL: mscatter_nxv16f64:
1894 ; RV64-NEXT: addi sp, sp, -16
1895 ; RV64-NEXT: .cfi_def_cfa_offset 16
1896 ; RV64-NEXT: csrr a2, vlenb
1897 ; RV64-NEXT: slli a2, a2, 5
1898 ; RV64-NEXT: sub sp, sp, a2
1899 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
1900 ; RV64-NEXT: csrr a2, vlenb
1901 ; RV64-NEXT: li a3, 24
1902 ; RV64-NEXT: mul a2, a2, a3
1903 ; RV64-NEXT: add a2, sp, a2
1904 ; RV64-NEXT: addi a2, a2, 16
1905 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
1906 ; RV64-NEXT: csrr a2, vlenb
1907 ; RV64-NEXT: slli a2, a2, 4
1908 ; RV64-NEXT: add a2, sp, a2
1909 ; RV64-NEXT: addi a2, a2, 16
1910 ; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
1911 ; RV64-NEXT: vl8re64.v v8, (a0)
1912 ; RV64-NEXT: csrr a0, vlenb
1913 ; RV64-NEXT: slli a0, a0, 3
1914 ; RV64-NEXT: add a0, sp, a0
1915 ; RV64-NEXT: addi a0, a0, 16
1916 ; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
1917 ; RV64-NEXT: csrr a0, vlenb
1918 ; RV64-NEXT: vl8re64.v v8, (a1)
1919 ; RV64-NEXT: addi a1, sp, 16
1920 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
1921 ; RV64-NEXT: srli a0, a0, 3
1922 ; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
1923 ; RV64-NEXT: vslidedown.vx v24, v0, a0
1924 ; RV64-NEXT: csrr a0, vlenb
1925 ; RV64-NEXT: slli a0, a0, 4
1926 ; RV64-NEXT: add a0, sp, a0
1927 ; RV64-NEXT: addi a0, a0, 16
1928 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1929 ; RV64-NEXT: csrr a0, vlenb
1930 ; RV64-NEXT: slli a0, a0, 3
1931 ; RV64-NEXT: add a0, sp, a0
1932 ; RV64-NEXT: addi a0, a0, 16
1933 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1934 ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1935 ; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t
1936 ; RV64-NEXT: vmv1r.v v0, v24
1937 ; RV64-NEXT: csrr a0, vlenb
1938 ; RV64-NEXT: li a1, 24
1939 ; RV64-NEXT: mul a0, a0, a1
1940 ; RV64-NEXT: add a0, sp, a0
1941 ; RV64-NEXT: addi a0, a0, 16
1942 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1943 ; RV64-NEXT: addi a0, sp, 16
1944 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1945 ; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t
1946 ; RV64-NEXT: csrr a0, vlenb
1947 ; RV64-NEXT: slli a0, a0, 5
1948 ; RV64-NEXT: add sp, sp, a0
1949 ; RV64-NEXT: .cfi_def_cfa sp, 16
1950 ; RV64-NEXT: addi sp, sp, 16
1951 ; RV64-NEXT: .cfi_def_cfa_offset 0
1953 %p0 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> undef, <vscale x 8 x ptr> %ptrs0, i64 0)
1954 %p1 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> %p0, <vscale x 8 x ptr> %ptrs1, i64 8)
1955 %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1956 %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1957 call void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %v1, <vscale x 16 x ptr> %p1, i32 8, <vscale x 16 x i1> %m)
1961 define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, ptr %base, <vscale x 16 x i8> %idxs, <vscale x 16 x i1> %m) {
1962 ; RV32-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
1964 ; RV32-NEXT: vl2r.v v4, (a1)
1965 ; RV32-NEXT: csrr a1, vlenb
1966 ; RV32-NEXT: srli a1, a1, 3
1967 ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
1968 ; RV32-NEXT: vslidedown.vx v7, v0, a1
1969 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
1970 ; RV32-NEXT: vsext.vf4 v24, v4
1971 ; RV32-NEXT: vsll.vi v24, v24, 3
1972 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1973 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
1974 ; RV32-NEXT: vmv1r.v v0, v7
1975 ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
1978 ; RV64-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
1980 ; RV64-NEXT: vl2r.v v6, (a1)
1981 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1982 ; RV64-NEXT: vsext.vf8 v24, v6
1983 ; RV64-NEXT: vsll.vi v24, v24, 3
1984 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
1985 ; RV64-NEXT: csrr a1, vlenb
1986 ; RV64-NEXT: srli a1, a1, 3
1987 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
1988 ; RV64-NEXT: vslidedown.vx v0, v0, a1
1989 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1990 ; RV64-NEXT: vsext.vf8 v8, v7
1991 ; RV64-NEXT: vsll.vi v8, v8, 3
1992 ; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t
1994 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i8> %idxs
1995 %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1996 %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1997 call void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %v1, <vscale x 16 x ptr> %ptrs, i32 8, <vscale x 16 x i1> %m)
2001 define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m) {
2002 ; RV32-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
2004 ; RV32-NEXT: addi sp, sp, -16
2005 ; RV32-NEXT: .cfi_def_cfa_offset 16
2006 ; RV32-NEXT: csrr a2, vlenb
2007 ; RV32-NEXT: slli a2, a2, 3
2008 ; RV32-NEXT: sub sp, sp, a2
2009 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2010 ; RV32-NEXT: addi a2, sp, 16
2011 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
2012 ; RV32-NEXT: vmv8r.v v16, v8
2013 ; RV32-NEXT: vl4re16.v v8, (a1)
2014 ; RV32-NEXT: csrr a1, vlenb
2015 ; RV32-NEXT: srli a1, a1, 3
2016 ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
2017 ; RV32-NEXT: vslidedown.vx v7, v0, a1
2018 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2019 ; RV32-NEXT: vsext.vf2 v24, v8
2020 ; RV32-NEXT: vsll.vi v8, v24, 3
2021 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2022 ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t
2023 ; RV32-NEXT: vmv1r.v v0, v7
2024 ; RV32-NEXT: addi a1, sp, 16
2025 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
2026 ; RV32-NEXT: vsoxei32.v v16, (a0), v12, v0.t
2027 ; RV32-NEXT: csrr a0, vlenb
2028 ; RV32-NEXT: slli a0, a0, 3
2029 ; RV32-NEXT: add sp, sp, a0
2030 ; RV32-NEXT: .cfi_def_cfa sp, 16
2031 ; RV32-NEXT: addi sp, sp, 16
2032 ; RV32-NEXT: .cfi_def_cfa_offset 0
2035 ; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
2037 ; RV64-NEXT: vl4re16.v v4, (a1)
2038 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2039 ; RV64-NEXT: vsext.vf4 v24, v4
2040 ; RV64-NEXT: vsll.vi v24, v24, 3
2041 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
2042 ; RV64-NEXT: csrr a1, vlenb
2043 ; RV64-NEXT: srli a1, a1, 3
2044 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
2045 ; RV64-NEXT: vslidedown.vx v0, v0, a1
2046 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2047 ; RV64-NEXT: vsext.vf4 v8, v6
2048 ; RV64-NEXT: vsll.vi v8, v8, 3
2049 ; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t
2051 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs
2052 %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
2053 %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
2054 call void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %v1, <vscale x 16 x ptr> %ptrs, i32 8, <vscale x 16 x i1> %m)
2058 define void @mscatter_baseidx_zext_nxv1i1_nxv1i8(<vscale x 1 x i8> %val, ptr %base, <vscale x 1 x i1> %idxs, <vscale x 1 x i1> %m) {
2059 ; CHECK-LABEL: mscatter_baseidx_zext_nxv1i1_nxv1i8:
2061 ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
2062 ; CHECK-NEXT: vmv.v.i v10, 0
2063 ; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
2064 ; CHECK-NEXT: vmv1r.v v0, v9
2065 ; CHECK-NEXT: vsoxei8.v v8, (a0), v10, v0.t
2067 %eidxs = zext <vscale x 1 x i1> %idxs to <vscale x 1 x i8>
2068 %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 1 x i8> %eidxs
2069 call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m)