1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
7 declare void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
9 define void @mscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
10 ; RV32-LABEL: mscatter_nxv1i8:
12 ; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
13 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
16 ; RV64-LABEL: mscatter_nxv1i8:
18 ; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
19 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
21 call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m)
25 declare void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
27 define void @mscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
28 ; RV32-LABEL: mscatter_nxv2i8:
30 ; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
31 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
34 ; RV64-LABEL: mscatter_nxv2i8:
36 ; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
37 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
39 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
43 define void @mscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
44 ; RV32-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
46 ; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
47 ; RV32-NEXT: vnsrl.wi v8, v8, 0
48 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
51 ; RV64-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
53 ; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
54 ; RV64-NEXT: vnsrl.wi v8, v8, 0
55 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
57 %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8>
58 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
62 define void @mscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
63 ; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
65 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
66 ; RV32-NEXT: vnsrl.wi v8, v8, 0
67 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
68 ; RV32-NEXT: vnsrl.wi v8, v8, 0
69 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
72 ; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
74 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
75 ; RV64-NEXT: vnsrl.wi v8, v8, 0
76 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
77 ; RV64-NEXT: vnsrl.wi v8, v8, 0
78 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
80 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
81 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
85 define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
86 ; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
88 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
89 ; RV32-NEXT: vnsrl.wi v11, v8, 0
90 ; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
91 ; RV32-NEXT: vnsrl.wi v8, v11, 0
92 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
93 ; RV32-NEXT: vnsrl.wi v8, v8, 0
94 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
97 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
99 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
100 ; RV64-NEXT: vnsrl.wi v12, v8, 0
101 ; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
102 ; RV64-NEXT: vnsrl.wi v8, v12, 0
103 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
104 ; RV64-NEXT: vnsrl.wi v8, v8, 0
105 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
107 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
108 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
112 declare void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
114 define void @mscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
115 ; RV32-LABEL: mscatter_nxv4i8:
117 ; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
118 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
121 ; RV64-LABEL: mscatter_nxv4i8:
123 ; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
124 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
126 call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> %m)
130 define void @mscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs) {
131 ; RV32-LABEL: mscatter_truemask_nxv4i8:
133 ; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
134 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
137 ; RV64-LABEL: mscatter_truemask_nxv4i8:
139 ; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
140 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
142 call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> splat (i1 1))
146 define void @mscatter_falsemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs) {
147 ; CHECK-LABEL: mscatter_falsemask_nxv4i8:
150 call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> zeroinitializer)
154 declare void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
156 define void @mscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
157 ; RV32-LABEL: mscatter_nxv8i8:
159 ; RV32-NEXT: vsetvli a0, zero, e8, m1, ta, ma
160 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
163 ; RV64-LABEL: mscatter_nxv8i8:
165 ; RV64-NEXT: vsetvli a0, zero, e8, m1, ta, ma
166 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
168 call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, i32 1, <vscale x 8 x i1> %m)
172 define void @mscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
173 ; RV32-LABEL: mscatter_baseidx_nxv8i8:
175 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
176 ; RV32-NEXT: vsext.vf4 v12, v9
177 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
178 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
181 ; RV64-LABEL: mscatter_baseidx_nxv8i8:
183 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
184 ; RV64-NEXT: vsext.vf8 v16, v9
185 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
186 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
188 %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
189 call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, i32 1, <vscale x 8 x i1> %m)
193 declare void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
195 define void @mscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
196 ; RV32-LABEL: mscatter_nxv1i16:
198 ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
199 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
202 ; RV64-LABEL: mscatter_nxv1i16:
204 ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
205 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
207 call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m)
211 declare void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
213 define void @mscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
214 ; RV32-LABEL: mscatter_nxv2i16:
216 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
217 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
220 ; RV64-LABEL: mscatter_nxv2i16:
222 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
223 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
225 call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
229 define void @mscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
230 ; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
232 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
233 ; RV32-NEXT: vnsrl.wi v8, v8, 0
234 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
237 ; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
239 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
240 ; RV64-NEXT: vnsrl.wi v8, v8, 0
241 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
243 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16>
244 call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
248 define void @mscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
249 ; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
251 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
252 ; RV32-NEXT: vnsrl.wi v11, v8, 0
253 ; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
254 ; RV32-NEXT: vnsrl.wi v8, v11, 0
255 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
258 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
260 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
261 ; RV64-NEXT: vnsrl.wi v12, v8, 0
262 ; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
263 ; RV64-NEXT: vnsrl.wi v8, v12, 0
264 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
266 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
267 call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
271 declare void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
273 define void @mscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
274 ; RV32-LABEL: mscatter_nxv4i16:
276 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
277 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
280 ; RV64-LABEL: mscatter_nxv4i16:
282 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
283 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
285 call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m)
289 define void @mscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs) {
290 ; RV32-LABEL: mscatter_truemask_nxv4i16:
292 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
293 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
296 ; RV64-LABEL: mscatter_truemask_nxv4i16:
298 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
299 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
301 call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1))
305 define void @mscatter_falsemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs) {
306 ; CHECK-LABEL: mscatter_falsemask_nxv4i16:
309 call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
313 declare void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
315 define void @mscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
316 ; RV32-LABEL: mscatter_nxv8i16:
318 ; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma
319 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
322 ; RV64-LABEL: mscatter_nxv8i16:
324 ; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma
325 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
327 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
331 define void @mscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
332 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i16:
334 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
335 ; RV32-NEXT: vsext.vf4 v12, v10
336 ; RV32-NEXT: vadd.vv v12, v12, v12
337 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
338 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
341 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i16:
343 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
344 ; RV64-NEXT: vsext.vf8 v16, v10
345 ; RV64-NEXT: vadd.vv v16, v16, v16
346 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
347 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
349 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
350 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
354 define void @mscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
355 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16:
357 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
358 ; RV32-NEXT: vsext.vf4 v12, v10
359 ; RV32-NEXT: vadd.vv v12, v12, v12
360 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
361 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
364 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16:
366 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
367 ; RV64-NEXT: vsext.vf8 v16, v10
368 ; RV64-NEXT: vadd.vv v16, v16, v16
369 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
370 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
372 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
373 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
374 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
378 define void @mscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
379 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16:
381 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
382 ; CHECK-NEXT: vwaddu.vv v12, v10, v10
383 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
384 ; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t
386 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
387 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
388 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
392 define void @mscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
393 ; RV32-LABEL: mscatter_baseidx_nxv8i16:
395 ; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
396 ; RV32-NEXT: vwadd.vv v12, v10, v10
397 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
400 ; RV64-LABEL: mscatter_baseidx_nxv8i16:
402 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
403 ; RV64-NEXT: vsext.vf4 v16, v10
404 ; RV64-NEXT: vadd.vv v16, v16, v16
405 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
406 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
408 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
409 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
413 declare void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
415 define void @mscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
416 ; RV32-LABEL: mscatter_nxv1i32:
418 ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
419 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
422 ; RV64-LABEL: mscatter_nxv1i32:
424 ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
425 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
427 call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, i32 4, <vscale x 1 x i1> %m)
431 declare void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
433 define void @mscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
434 ; RV32-LABEL: mscatter_nxv2i32:
436 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
437 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
440 ; RV64-LABEL: mscatter_nxv2i32:
442 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
443 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
445 call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m)
449 define void @mscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
450 ; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
452 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
453 ; RV32-NEXT: vnsrl.wi v11, v8, 0
454 ; RV32-NEXT: vsoxei32.v v11, (zero), v10, v0.t
457 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
459 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
460 ; RV64-NEXT: vnsrl.wi v12, v8, 0
461 ; RV64-NEXT: vsoxei64.v v12, (zero), v10, v0.t
463 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
464 call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %tval, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m)
468 declare void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
470 define void @mscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
471 ; RV32-LABEL: mscatter_nxv4i32:
473 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
474 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
477 ; RV64-LABEL: mscatter_nxv4i32:
479 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
480 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
482 call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %m)
486 define void @mscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs) {
487 ; RV32-LABEL: mscatter_truemask_nxv4i32:
489 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
490 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
493 ; RV64-LABEL: mscatter_truemask_nxv4i32:
495 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
496 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
498 call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> splat (i1 1))
502 define void @mscatter_falsemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs) {
503 ; CHECK-LABEL: mscatter_falsemask_nxv4i32:
506 call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer)
510 declare void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
512 define void @mscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
513 ; RV32-LABEL: mscatter_nxv8i32:
515 ; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
516 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
519 ; RV64-LABEL: mscatter_nxv8i32:
521 ; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma
522 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
524 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
528 define void @mscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
529 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i32:
531 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
532 ; RV32-NEXT: vsext.vf4 v16, v12
533 ; RV32-NEXT: vsll.vi v12, v16, 2
534 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
537 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i32:
539 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
540 ; RV64-NEXT: vsext.vf8 v16, v12
541 ; RV64-NEXT: vsll.vi v16, v16, 2
542 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
543 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
545 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
546 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
550 define void @mscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
551 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32:
553 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
554 ; RV32-NEXT: vsext.vf4 v16, v12
555 ; RV32-NEXT: vsll.vi v12, v16, 2
556 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
559 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32:
561 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
562 ; RV64-NEXT: vsext.vf8 v16, v12
563 ; RV64-NEXT: vsll.vi v16, v16, 2
564 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
565 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
567 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
568 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
569 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
573 define void @mscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
574 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32:
576 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
577 ; CHECK-NEXT: vzext.vf2 v14, v12
578 ; CHECK-NEXT: vsll.vi v12, v14, 2
579 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
580 ; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t
582 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
583 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
584 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
588 define void @mscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
589 ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i32:
591 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
592 ; RV32-NEXT: vsext.vf2 v16, v12
593 ; RV32-NEXT: vsll.vi v12, v16, 2
594 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
597 ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i32:
599 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
600 ; RV64-NEXT: vsext.vf4 v16, v12
601 ; RV64-NEXT: vsll.vi v16, v16, 2
602 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
603 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
605 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
606 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
610 define void @mscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
611 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32:
613 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
614 ; RV32-NEXT: vsext.vf2 v16, v12
615 ; RV32-NEXT: vsll.vi v12, v16, 2
616 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
619 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32:
621 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
622 ; RV64-NEXT: vsext.vf4 v16, v12
623 ; RV64-NEXT: vsll.vi v16, v16, 2
624 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
625 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
627 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
628 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
629 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
633 define void @mscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
634 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32:
636 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
637 ; CHECK-NEXT: vzext.vf2 v16, v12
638 ; CHECK-NEXT: vsll.vi v12, v16, 2
639 ; CHECK-NEXT: vsoxei32.v v8, (a0), v12, v0.t
641 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
642 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
643 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
647 define void @mscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
648 ; RV32-LABEL: mscatter_baseidx_nxv8i32:
650 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
651 ; RV32-NEXT: vsll.vi v12, v12, 2
652 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
655 ; RV64-LABEL: mscatter_baseidx_nxv8i32:
657 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
658 ; RV64-NEXT: vsext.vf2 v16, v12
659 ; RV64-NEXT: vsll.vi v16, v16, 2
660 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
661 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
663 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
664 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
668 declare void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
670 define void @mscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
671 ; RV32-LABEL: mscatter_nxv1i64:
673 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
674 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
677 ; RV64-LABEL: mscatter_nxv1i64:
679 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
680 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
682 call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, i32 8, <vscale x 1 x i1> %m)
686 declare void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
688 define void @mscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
689 ; RV32-LABEL: mscatter_nxv2i64:
691 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
692 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
695 ; RV64-LABEL: mscatter_nxv2i64:
697 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
698 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
700 call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %m)
704 declare void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
706 define void @mscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
707 ; RV32-LABEL: mscatter_nxv4i64:
709 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
710 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
713 ; RV64-LABEL: mscatter_nxv4i64:
715 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
716 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
718 call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %m)
722 define void @mscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs) {
723 ; RV32-LABEL: mscatter_truemask_nxv4i64:
725 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
726 ; RV32-NEXT: vsoxei32.v v8, (zero), v12
729 ; RV64-LABEL: mscatter_truemask_nxv4i64:
731 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
732 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
734 call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> splat (i1 1))
738 define void @mscatter_falsemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs) {
739 ; CHECK-LABEL: mscatter_falsemask_nxv4i64:
742 call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer)
746 declare void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
748 define void @mscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
749 ; RV32-LABEL: mscatter_nxv8i64:
751 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
752 ; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t
755 ; RV64-LABEL: mscatter_nxv8i64:
757 ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
758 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
760 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
764 define void @mscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
765 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i64:
767 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
768 ; RV32-NEXT: vsext.vf4 v20, v16
769 ; RV32-NEXT: vsll.vi v16, v20, 3
770 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
771 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
774 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i64:
776 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
777 ; RV64-NEXT: vsext.vf8 v24, v16
778 ; RV64-NEXT: vsll.vi v16, v24, 3
779 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
781 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
782 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
786 define void @mscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
787 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64:
789 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
790 ; RV32-NEXT: vsext.vf4 v20, v16
791 ; RV32-NEXT: vsll.vi v16, v20, 3
792 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
793 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
796 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64:
798 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
799 ; RV64-NEXT: vsext.vf8 v24, v16
800 ; RV64-NEXT: vsll.vi v16, v24, 3
801 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
803 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
804 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
805 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
809 define void @mscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
810 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64:
812 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
813 ; CHECK-NEXT: vzext.vf2 v18, v16
814 ; CHECK-NEXT: vsll.vi v16, v18, 3
815 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
816 ; CHECK-NEXT: vsoxei16.v v8, (a0), v16, v0.t
818 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
819 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
820 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
824 define void @mscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
825 ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i64:
827 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
828 ; RV32-NEXT: vsext.vf2 v20, v16
829 ; RV32-NEXT: vsll.vi v16, v20, 3
830 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
831 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
834 ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i64:
836 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
837 ; RV64-NEXT: vsext.vf4 v24, v16
838 ; RV64-NEXT: vsll.vi v16, v24, 3
839 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
841 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
842 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
846 define void @mscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
847 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64:
849 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
850 ; RV32-NEXT: vsext.vf2 v20, v16
851 ; RV32-NEXT: vsll.vi v16, v20, 3
852 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
853 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
856 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64:
858 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
859 ; RV64-NEXT: vsext.vf4 v24, v16
860 ; RV64-NEXT: vsll.vi v16, v24, 3
861 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
863 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
864 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
865 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
869 define void @mscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
870 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64:
872 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
873 ; CHECK-NEXT: vzext.vf2 v20, v16
874 ; CHECK-NEXT: vsll.vi v16, v20, 3
875 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
876 ; CHECK-NEXT: vsoxei32.v v8, (a0), v16, v0.t
878 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
879 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
880 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
884 define void @mscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
885 ; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8i64:
887 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
888 ; RV32-NEXT: vsll.vi v16, v16, 3
889 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
890 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
893 ; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8i64:
895 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
896 ; RV64-NEXT: vsext.vf2 v24, v16
897 ; RV64-NEXT: vsll.vi v16, v24, 3
898 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
900 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
901 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
905 define void @mscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
906 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64:
908 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
909 ; RV32-NEXT: vsll.vi v16, v16, 3
910 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
911 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
914 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64:
916 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
917 ; RV64-NEXT: vsext.vf2 v24, v16
918 ; RV64-NEXT: vsll.vi v16, v24, 3
919 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
921 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
922 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
923 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
927 define void @mscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
928 ; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64:
930 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
931 ; RV32-NEXT: vsll.vi v16, v16, 3
932 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
933 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
936 ; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64:
938 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
939 ; RV64-NEXT: vzext.vf2 v24, v16
940 ; RV64-NEXT: vsll.vi v16, v24, 3
941 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
943 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
944 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
945 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
949 define void @mscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) {
950 ; RV32-LABEL: mscatter_baseidx_nxv8i64:
952 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
953 ; RV32-NEXT: vnsrl.wi v24, v16, 0
954 ; RV32-NEXT: vsll.vi v16, v24, 3
955 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
956 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
959 ; RV64-LABEL: mscatter_baseidx_nxv8i64:
961 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
962 ; RV64-NEXT: vsll.vi v16, v16, 3
963 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
965 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
966 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
970 declare void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
972 define void @mscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
973 ; RV32-LABEL: mscatter_nxv1f16:
975 ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
976 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
979 ; RV64-LABEL: mscatter_nxv1f16:
981 ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
982 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
984 call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m)
988 declare void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
990 define void @mscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
991 ; RV32-LABEL: mscatter_nxv2f16:
993 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
994 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
997 ; RV64-LABEL: mscatter_nxv2f16:
999 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
1000 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1002 call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
1006 declare void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
1008 define void @mscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
1009 ; RV32-LABEL: mscatter_nxv4f16:
1011 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1012 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1015 ; RV64-LABEL: mscatter_nxv4f16:
1017 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1018 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1020 call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m)
1024 define void @mscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs) {
1025 ; RV32-LABEL: mscatter_truemask_nxv4f16:
1027 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1028 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
1031 ; RV64-LABEL: mscatter_truemask_nxv4f16:
1033 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1034 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1036 call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1))
1040 define void @mscatter_falsemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs) {
1041 ; CHECK-LABEL: mscatter_falsemask_nxv4f16:
1044 call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
1048 declare void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
1050 define void @mscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
1051 ; RV32-LABEL: mscatter_nxv8f16:
1053 ; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma
1054 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1057 ; RV64-LABEL: mscatter_nxv8f16:
1059 ; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma
1060 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1062 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1066 define void @mscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1067 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f16:
1069 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1070 ; RV32-NEXT: vsext.vf4 v12, v10
1071 ; RV32-NEXT: vadd.vv v12, v12, v12
1072 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1073 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1076 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f16:
1078 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1079 ; RV64-NEXT: vsext.vf8 v16, v10
1080 ; RV64-NEXT: vadd.vv v16, v16, v16
1081 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1082 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1084 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
1085 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1089 define void @mscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1090 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16:
1092 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1093 ; RV32-NEXT: vsext.vf4 v12, v10
1094 ; RV32-NEXT: vadd.vv v12, v12, v12
1095 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1096 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1099 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16:
1101 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1102 ; RV64-NEXT: vsext.vf8 v16, v10
1103 ; RV64-NEXT: vadd.vv v16, v16, v16
1104 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1105 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1107 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1108 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1109 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1113 define void @mscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1114 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16:
1116 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
1117 ; CHECK-NEXT: vwaddu.vv v12, v10, v10
1118 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1119 ; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1121 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1122 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1123 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1127 define void @mscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1128 ; RV32-LABEL: mscatter_baseidx_nxv8f16:
1130 ; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1131 ; RV32-NEXT: vwadd.vv v12, v10, v10
1132 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1135 ; RV64-LABEL: mscatter_baseidx_nxv8f16:
1137 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1138 ; RV64-NEXT: vsext.vf4 v16, v10
1139 ; RV64-NEXT: vadd.vv v16, v16, v16
1140 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1141 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1143 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
1144 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1148 declare void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
1150 define void @mscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
1151 ; RV32-LABEL: mscatter_nxv1f32:
1153 ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
1154 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1157 ; RV64-LABEL: mscatter_nxv1f32:
1159 ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
1160 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1162 call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, i32 4, <vscale x 1 x i1> %m)
1166 declare void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
1168 define void @mscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
1169 ; RV32-LABEL: mscatter_nxv2f32:
1171 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1172 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1175 ; RV64-LABEL: mscatter_nxv2f32:
1177 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1178 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1180 call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m)
1184 declare void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
1186 define void @mscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
1187 ; RV32-LABEL: mscatter_nxv4f32:
1189 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1190 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1193 ; RV64-LABEL: mscatter_nxv4f32:
1195 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1196 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1198 call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %m)
1202 define void @mscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs) {
1203 ; RV32-LABEL: mscatter_truemask_nxv4f32:
1205 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1206 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
1209 ; RV64-LABEL: mscatter_truemask_nxv4f32:
1211 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1212 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1214 call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> splat (i1 1))
1218 define void @mscatter_falsemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs) {
1219 ; CHECK-LABEL: mscatter_falsemask_nxv4f32:
1222 call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer)
1226 declare void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
1228 define void @mscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
1229 ; RV32-LABEL: mscatter_nxv8f32:
1231 ; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
1232 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1235 ; RV64-LABEL: mscatter_nxv8f32:
1237 ; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma
1238 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1240 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1244 define void @mscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1245 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f32:
1247 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1248 ; RV32-NEXT: vsext.vf4 v16, v12
1249 ; RV32-NEXT: vsll.vi v12, v16, 2
1250 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1253 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f32:
1255 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1256 ; RV64-NEXT: vsext.vf8 v16, v12
1257 ; RV64-NEXT: vsll.vi v16, v16, 2
1258 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1259 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1261 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
1262 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1266 define void @mscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1267 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32:
1269 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1270 ; RV32-NEXT: vsext.vf4 v16, v12
1271 ; RV32-NEXT: vsll.vi v12, v16, 2
1272 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1275 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32:
1277 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1278 ; RV64-NEXT: vsext.vf8 v16, v12
1279 ; RV64-NEXT: vsll.vi v16, v16, 2
1280 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1281 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1283 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1284 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1285 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1289 define void @mscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1290 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32:
1292 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1293 ; CHECK-NEXT: vzext.vf2 v14, v12
1294 ; CHECK-NEXT: vsll.vi v12, v14, 2
1295 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1296 ; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1298 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1299 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1300 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1304 define void @mscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1305 ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f32:
1307 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1308 ; RV32-NEXT: vsext.vf2 v16, v12
1309 ; RV32-NEXT: vsll.vi v12, v16, 2
1310 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1313 ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f32:
1315 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1316 ; RV64-NEXT: vsext.vf4 v16, v12
1317 ; RV64-NEXT: vsll.vi v16, v16, 2
1318 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1319 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1321 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
1322 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1326 define void @mscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1327 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32:
1329 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1330 ; RV32-NEXT: vsext.vf2 v16, v12
1331 ; RV32-NEXT: vsll.vi v12, v16, 2
1332 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1335 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32:
1337 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1338 ; RV64-NEXT: vsext.vf4 v16, v12
1339 ; RV64-NEXT: vsll.vi v16, v16, 2
1340 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1341 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1343 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1344 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1345 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1349 define void @mscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1350 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32:
1352 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1353 ; CHECK-NEXT: vzext.vf2 v16, v12
1354 ; CHECK-NEXT: vsll.vi v12, v16, 2
1355 ; CHECK-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1357 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1358 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1359 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1363 define void @mscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1364 ; RV32-LABEL: mscatter_baseidx_nxv8f32:
1366 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1367 ; RV32-NEXT: vsll.vi v12, v12, 2
1368 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1371 ; RV64-LABEL: mscatter_baseidx_nxv8f32:
1373 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1374 ; RV64-NEXT: vsext.vf2 v16, v12
1375 ; RV64-NEXT: vsll.vi v16, v16, 2
1376 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1377 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1379 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
1380 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1384 declare void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
1386 define void @mscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
1387 ; RV32-LABEL: mscatter_nxv1f64:
1389 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1390 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1393 ; RV64-LABEL: mscatter_nxv1f64:
1395 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1396 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1398 call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, i32 8, <vscale x 1 x i1> %m)
1402 declare void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
1404 define void @mscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
1405 ; RV32-LABEL: mscatter_nxv2f64:
1407 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1408 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1411 ; RV64-LABEL: mscatter_nxv2f64:
1413 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1414 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1416 call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %m)
1420 declare void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
1422 define void @mscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
1423 ; RV32-LABEL: mscatter_nxv4f64:
1425 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1426 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1429 ; RV64-LABEL: mscatter_nxv4f64:
1431 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1432 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1434 call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %m)
1438 define void @mscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs) {
1439 ; RV32-LABEL: mscatter_truemask_nxv4f64:
1441 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1442 ; RV32-NEXT: vsoxei32.v v8, (zero), v12
1445 ; RV64-LABEL: mscatter_truemask_nxv4f64:
1447 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1448 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1450 call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> splat (i1 1))
1454 define void @mscatter_falsemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs) {
1455 ; CHECK-LABEL: mscatter_falsemask_nxv4f64:
1458 call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer)
1462 declare void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
1464 define void @mscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
1465 ; RV32-LABEL: mscatter_nxv8f64:
1467 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1468 ; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t
1471 ; RV64-LABEL: mscatter_nxv8f64:
1473 ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1474 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1476 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1480 define void @mscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1481 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f64:
1483 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1484 ; RV32-NEXT: vsext.vf4 v20, v16
1485 ; RV32-NEXT: vsll.vi v16, v20, 3
1486 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1487 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1490 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f64:
1492 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1493 ; RV64-NEXT: vsext.vf8 v24, v16
1494 ; RV64-NEXT: vsll.vi v16, v24, 3
1495 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1497 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
1498 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1502 define void @mscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1503 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64:
1505 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1506 ; RV32-NEXT: vsext.vf4 v20, v16
1507 ; RV32-NEXT: vsll.vi v16, v20, 3
1508 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1509 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1512 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64:
1514 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1515 ; RV64-NEXT: vsext.vf8 v24, v16
1516 ; RV64-NEXT: vsll.vi v16, v24, 3
1517 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1519 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1520 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1521 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1525 define void @mscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1526 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64:
1528 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1529 ; CHECK-NEXT: vzext.vf2 v18, v16
1530 ; CHECK-NEXT: vsll.vi v16, v18, 3
1531 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1532 ; CHECK-NEXT: vsoxei16.v v8, (a0), v16, v0.t
1534 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1535 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1536 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1540 define void @mscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1541 ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f64:
1543 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1544 ; RV32-NEXT: vsext.vf2 v20, v16
1545 ; RV32-NEXT: vsll.vi v16, v20, 3
1546 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1547 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1550 ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f64:
1552 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1553 ; RV64-NEXT: vsext.vf4 v24, v16
1554 ; RV64-NEXT: vsll.vi v16, v24, 3
1555 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1557 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
1558 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1562 define void @mscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1563 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64:
1565 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1566 ; RV32-NEXT: vsext.vf2 v20, v16
1567 ; RV32-NEXT: vsll.vi v16, v20, 3
1568 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1569 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1572 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64:
1574 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1575 ; RV64-NEXT: vsext.vf4 v24, v16
1576 ; RV64-NEXT: vsll.vi v16, v24, 3
1577 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1579 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1580 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1581 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1585 define void @mscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1586 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64:
1588 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1589 ; CHECK-NEXT: vzext.vf2 v20, v16
1590 ; CHECK-NEXT: vsll.vi v16, v20, 3
1591 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1592 ; CHECK-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1594 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1595 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1596 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1600 define void @mscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1601 ; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8f64:
1603 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1604 ; RV32-NEXT: vsll.vi v16, v16, 3
1605 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1606 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1609 ; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8f64:
1611 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1612 ; RV64-NEXT: vsext.vf2 v24, v16
1613 ; RV64-NEXT: vsll.vi v16, v24, 3
1614 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1616 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
1617 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1621 define void @mscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1622 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64:
1624 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1625 ; RV32-NEXT: vsll.vi v16, v16, 3
1626 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1627 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1630 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64:
1632 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1633 ; RV64-NEXT: vsext.vf2 v24, v16
1634 ; RV64-NEXT: vsll.vi v16, v24, 3
1635 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1637 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1638 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1639 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1643 define void @mscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1644 ; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64:
1646 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1647 ; RV32-NEXT: vsll.vi v16, v16, 3
1648 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1649 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1652 ; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64:
1654 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1655 ; RV64-NEXT: vzext.vf2 v24, v16
1656 ; RV64-NEXT: vsll.vi v16, v24, 3
1657 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1659 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1660 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1661 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1665 define void @mscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) {
1666 ; RV32-LABEL: mscatter_baseidx_nxv8f64:
1668 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1669 ; RV32-NEXT: vnsrl.wi v24, v16, 0
1670 ; RV32-NEXT: vsll.vi v16, v24, 3
1671 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1672 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1675 ; RV64-LABEL: mscatter_baseidx_nxv8f64:
1677 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1678 ; RV64-NEXT: vsll.vi v16, v16, 3
1679 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1681 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
1682 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1686 declare void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
1688 declare <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double>, <vscale x 8 x double>, i64)
1689 declare <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr>, <vscale x 8 x ptr>, i64)
1691 define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, <vscale x 8 x ptr> %ptrs0, <vscale x 8 x ptr> %ptrs1, <vscale x 16 x i1> %m) {
1692 ; RV32-LABEL: mscatter_nxv16f64:
1694 ; RV32-NEXT: vl4re32.v v28, (a1)
1695 ; RV32-NEXT: vl4re32.v v4, (a0)
1696 ; RV32-NEXT: csrr a0, vlenb
1697 ; RV32-NEXT: srli a0, a0, 3
1698 ; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
1699 ; RV32-NEXT: vslidedown.vx v24, v0, a0
1700 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1701 ; RV32-NEXT: vsoxei32.v v8, (zero), v4, v0.t
1702 ; RV32-NEXT: vmv1r.v v0, v24
1703 ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t
1706 ; RV64-LABEL: mscatter_nxv16f64:
1708 ; RV64-NEXT: addi sp, sp, -16
1709 ; RV64-NEXT: .cfi_def_cfa_offset 16
1710 ; RV64-NEXT: csrr a2, vlenb
1711 ; RV64-NEXT: slli a2, a2, 4
1712 ; RV64-NEXT: sub sp, sp, a2
1713 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1714 ; RV64-NEXT: csrr a2, vlenb
1715 ; RV64-NEXT: slli a2, a2, 3
1716 ; RV64-NEXT: add a2, sp, a2
1717 ; RV64-NEXT: addi a2, a2, 16
1718 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
1719 ; RV64-NEXT: vmv8r.v v16, v8
1720 ; RV64-NEXT: vl8re64.v v8, (a1)
1721 ; RV64-NEXT: addi a1, sp, 16
1722 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
1723 ; RV64-NEXT: vl8re64.v v8, (a0)
1724 ; RV64-NEXT: csrr a0, vlenb
1725 ; RV64-NEXT: srli a0, a0, 3
1726 ; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
1727 ; RV64-NEXT: vslidedown.vx v24, v0, a0
1728 ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1729 ; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t
1730 ; RV64-NEXT: vmv1r.v v0, v24
1731 ; RV64-NEXT: csrr a0, vlenb
1732 ; RV64-NEXT: slli a0, a0, 3
1733 ; RV64-NEXT: add a0, sp, a0
1734 ; RV64-NEXT: addi a0, a0, 16
1735 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1736 ; RV64-NEXT: addi a0, sp, 16
1737 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1738 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1739 ; RV64-NEXT: csrr a0, vlenb
1740 ; RV64-NEXT: slli a0, a0, 4
1741 ; RV64-NEXT: add sp, sp, a0
1742 ; RV64-NEXT: addi sp, sp, 16
1744 %p0 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> undef, <vscale x 8 x ptr> %ptrs0, i64 0)
1745 %p1 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> %p0, <vscale x 8 x ptr> %ptrs1, i64 8)
1746 %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1747 %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1748 call void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %v1, <vscale x 16 x ptr> %p1, i32 8, <vscale x 16 x i1> %m)
1752 define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, ptr %base, <vscale x 16 x i8> %idxs, <vscale x 16 x i1> %m) {
1753 ; RV32-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
1755 ; RV32-NEXT: vl2r.v v6, (a1)
1756 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
1757 ; RV32-NEXT: vsext.vf4 v24, v6
1758 ; RV32-NEXT: vsll.vi v24, v24, 3
1759 ; RV32-NEXT: csrr a1, vlenb
1760 ; RV32-NEXT: srli a1, a1, 3
1761 ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
1762 ; RV32-NEXT: vslidedown.vx v7, v0, a1
1763 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1764 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
1765 ; RV32-NEXT: vmv1r.v v0, v7
1766 ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
1769 ; RV64-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
1771 ; RV64-NEXT: vl2r.v v6, (a1)
1772 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1773 ; RV64-NEXT: vsext.vf8 v24, v6
1774 ; RV64-NEXT: vsll.vi v24, v24, 3
1775 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
1776 ; RV64-NEXT: vsext.vf8 v8, v7
1777 ; RV64-NEXT: csrr a1, vlenb
1778 ; RV64-NEXT: srli a1, a1, 3
1779 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
1780 ; RV64-NEXT: vslidedown.vx v0, v0, a1
1781 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1782 ; RV64-NEXT: vsll.vi v8, v8, 3
1783 ; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t
1785 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i8> %idxs
1786 %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1787 %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1788 call void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %v1, <vscale x 16 x ptr> %ptrs, i32 8, <vscale x 16 x i1> %m)
1792 define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m) {
1793 ; RV32-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
1795 ; RV32-NEXT: vl4re16.v v4, (a1)
1796 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
1797 ; RV32-NEXT: vsext.vf2 v24, v4
1798 ; RV32-NEXT: vsll.vi v24, v24, 3
1799 ; RV32-NEXT: csrr a1, vlenb
1800 ; RV32-NEXT: srli a1, a1, 3
1801 ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
1802 ; RV32-NEXT: vslidedown.vx v7, v0, a1
1803 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1804 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
1805 ; RV32-NEXT: vmv1r.v v0, v7
1806 ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
1809 ; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
1811 ; RV64-NEXT: vl4re16.v v4, (a1)
1812 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1813 ; RV64-NEXT: vsext.vf4 v24, v4
1814 ; RV64-NEXT: vsll.vi v24, v24, 3
1815 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
1816 ; RV64-NEXT: vsext.vf4 v8, v6
1817 ; RV64-NEXT: csrr a1, vlenb
1818 ; RV64-NEXT: srli a1, a1, 3
1819 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
1820 ; RV64-NEXT: vslidedown.vx v0, v0, a1
1821 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1822 ; RV64-NEXT: vsll.vi v8, v8, 3
1823 ; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t
1825 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs
1826 %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1827 %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1828 call void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %v1, <vscale x 16 x ptr> %ptrs, i32 8, <vscale x 16 x i1> %m)
1832 define void @mscatter_baseidx_zext_nxv1i1_nxv1i8(<vscale x 1 x i8> %val, ptr %base, <vscale x 1 x i1> %idxs, <vscale x 1 x i1> %m) {
1833 ; CHECK-LABEL: mscatter_baseidx_zext_nxv1i1_nxv1i8:
1835 ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
1836 ; CHECK-NEXT: vmv.v.i v10, 0
1837 ; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
1838 ; CHECK-NEXT: vmv1r.v v0, v9
1839 ; CHECK-NEXT: vsoxei8.v v8, (a0), v10, v0.t
1841 %eidxs = zext <vscale x 1 x i1> %idxs to <vscale x 1 x i8>
1842 %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 1 x i8> %eidxs
1843 call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m)