1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
7 declare void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
9 define void @mscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
10 ; RV32-LABEL: mscatter_nxv1i8:
12 ; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
13 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
16 ; RV64-LABEL: mscatter_nxv1i8:
18 ; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
19 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
21 call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m)
25 declare void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
27 define void @mscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
28 ; RV32-LABEL: mscatter_nxv2i8:
30 ; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
31 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
34 ; RV64-LABEL: mscatter_nxv2i8:
36 ; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
37 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
39 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
43 define void @mscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
44 ; RV32-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
46 ; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
47 ; RV32-NEXT: vnsrl.wi v8, v8, 0
48 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
51 ; RV64-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
53 ; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
54 ; RV64-NEXT: vnsrl.wi v8, v8, 0
55 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
57 %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8>
58 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
62 define void @mscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
63 ; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
65 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
66 ; RV32-NEXT: vnsrl.wi v8, v8, 0
67 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
68 ; RV32-NEXT: vnsrl.wi v8, v8, 0
69 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
72 ; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
74 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
75 ; RV64-NEXT: vnsrl.wi v8, v8, 0
76 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
77 ; RV64-NEXT: vnsrl.wi v8, v8, 0
78 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
80 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
81 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
85 define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
86 ; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
88 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
89 ; RV32-NEXT: vnsrl.wi v11, v8, 0
90 ; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
91 ; RV32-NEXT: vnsrl.wi v8, v11, 0
92 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
93 ; RV32-NEXT: vnsrl.wi v8, v8, 0
94 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
97 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
99 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
100 ; RV64-NEXT: vnsrl.wi v12, v8, 0
101 ; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
102 ; RV64-NEXT: vnsrl.wi v8, v12, 0
103 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
104 ; RV64-NEXT: vnsrl.wi v8, v8, 0
105 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
107 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
108 call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
112 declare void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
114 define void @mscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
115 ; RV32-LABEL: mscatter_nxv4i8:
117 ; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
118 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
121 ; RV64-LABEL: mscatter_nxv4i8:
123 ; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
124 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
126 call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> %m)
130 define void @mscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs) {
131 ; RV32-LABEL: mscatter_truemask_nxv4i8:
133 ; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
134 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
137 ; RV64-LABEL: mscatter_truemask_nxv4i8:
139 ; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
140 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
142 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
143 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
144 call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> %mtrue)
148 define void @mscatter_falsemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs) {
149 ; CHECK-LABEL: mscatter_falsemask_nxv4i8:
152 call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> zeroinitializer)
156 declare void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
158 define void @mscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
159 ; RV32-LABEL: mscatter_nxv8i8:
161 ; RV32-NEXT: vsetvli a0, zero, e8, m1, ta, ma
162 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
165 ; RV64-LABEL: mscatter_nxv8i8:
167 ; RV64-NEXT: vsetvli a0, zero, e8, m1, ta, ma
168 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
170 call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, i32 1, <vscale x 8 x i1> %m)
174 define void @mscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
175 ; RV32-LABEL: mscatter_baseidx_nxv8i8:
177 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
178 ; RV32-NEXT: vsext.vf4 v12, v9
179 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
180 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
183 ; RV64-LABEL: mscatter_baseidx_nxv8i8:
185 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
186 ; RV64-NEXT: vsext.vf8 v16, v9
187 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
188 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
190 %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
191 call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, i32 1, <vscale x 8 x i1> %m)
195 declare void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
197 define void @mscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
198 ; RV32-LABEL: mscatter_nxv1i16:
200 ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
201 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
204 ; RV64-LABEL: mscatter_nxv1i16:
206 ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
207 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
209 call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m)
213 declare void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
215 define void @mscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
216 ; RV32-LABEL: mscatter_nxv2i16:
218 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
219 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
222 ; RV64-LABEL: mscatter_nxv2i16:
224 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
225 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
227 call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
231 define void @mscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
232 ; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
234 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
235 ; RV32-NEXT: vnsrl.wi v8, v8, 0
236 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
239 ; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
241 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
242 ; RV64-NEXT: vnsrl.wi v8, v8, 0
243 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
245 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16>
246 call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
250 define void @mscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
251 ; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
253 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
254 ; RV32-NEXT: vnsrl.wi v11, v8, 0
255 ; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
256 ; RV32-NEXT: vnsrl.wi v8, v11, 0
257 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
260 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
262 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
263 ; RV64-NEXT: vnsrl.wi v12, v8, 0
264 ; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
265 ; RV64-NEXT: vnsrl.wi v8, v12, 0
266 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
268 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
269 call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
273 declare void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
275 define void @mscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
276 ; RV32-LABEL: mscatter_nxv4i16:
278 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
279 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
282 ; RV64-LABEL: mscatter_nxv4i16:
284 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
285 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
287 call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m)
291 define void @mscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs) {
292 ; RV32-LABEL: mscatter_truemask_nxv4i16:
294 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
295 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
298 ; RV64-LABEL: mscatter_truemask_nxv4i16:
300 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
301 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
303 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
304 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
305 call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %mtrue)
309 define void @mscatter_falsemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs) {
310 ; CHECK-LABEL: mscatter_falsemask_nxv4i16:
313 call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
317 declare void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
319 define void @mscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
320 ; RV32-LABEL: mscatter_nxv8i16:
322 ; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma
323 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
326 ; RV64-LABEL: mscatter_nxv8i16:
328 ; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma
329 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
331 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
335 define void @mscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
336 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i16:
338 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
339 ; RV32-NEXT: vsext.vf4 v12, v10
340 ; RV32-NEXT: vadd.vv v12, v12, v12
341 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
342 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
345 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i16:
347 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
348 ; RV64-NEXT: vsext.vf8 v16, v10
349 ; RV64-NEXT: vadd.vv v16, v16, v16
350 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
351 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
353 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
354 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
358 define void @mscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
359 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16:
361 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
362 ; RV32-NEXT: vsext.vf4 v12, v10
363 ; RV32-NEXT: vadd.vv v12, v12, v12
364 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
365 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
368 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16:
370 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
371 ; RV64-NEXT: vsext.vf8 v16, v10
372 ; RV64-NEXT: vadd.vv v16, v16, v16
373 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
374 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
376 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
377 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
378 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
382 define void @mscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
383 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16:
385 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
386 ; CHECK-NEXT: vwaddu.vv v12, v10, v10
387 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
388 ; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t
390 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
391 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
392 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
396 define void @mscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
397 ; RV32-LABEL: mscatter_baseidx_nxv8i16:
399 ; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
400 ; RV32-NEXT: vwadd.vv v12, v10, v10
401 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
404 ; RV64-LABEL: mscatter_baseidx_nxv8i16:
406 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
407 ; RV64-NEXT: vsext.vf4 v16, v10
408 ; RV64-NEXT: vadd.vv v16, v16, v16
409 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
410 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
412 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
413 call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
417 declare void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
419 define void @mscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
420 ; RV32-LABEL: mscatter_nxv1i32:
422 ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
423 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
426 ; RV64-LABEL: mscatter_nxv1i32:
428 ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
429 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
431 call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, i32 4, <vscale x 1 x i1> %m)
435 declare void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
437 define void @mscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
438 ; RV32-LABEL: mscatter_nxv2i32:
440 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
441 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
444 ; RV64-LABEL: mscatter_nxv2i32:
446 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
447 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
449 call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m)
453 define void @mscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
454 ; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
456 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
457 ; RV32-NEXT: vnsrl.wi v11, v8, 0
458 ; RV32-NEXT: vsoxei32.v v11, (zero), v10, v0.t
461 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
463 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
464 ; RV64-NEXT: vnsrl.wi v12, v8, 0
465 ; RV64-NEXT: vsoxei64.v v12, (zero), v10, v0.t
467 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
468 call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %tval, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m)
472 declare void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
474 define void @mscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
475 ; RV32-LABEL: mscatter_nxv4i32:
477 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
478 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
481 ; RV64-LABEL: mscatter_nxv4i32:
483 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
484 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
486 call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %m)
490 define void @mscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs) {
491 ; RV32-LABEL: mscatter_truemask_nxv4i32:
493 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
494 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
497 ; RV64-LABEL: mscatter_truemask_nxv4i32:
499 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
500 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
502 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
503 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
504 call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mtrue)
508 define void @mscatter_falsemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs) {
509 ; CHECK-LABEL: mscatter_falsemask_nxv4i32:
512 call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer)
516 declare void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
518 define void @mscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
519 ; RV32-LABEL: mscatter_nxv8i32:
521 ; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
522 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
525 ; RV64-LABEL: mscatter_nxv8i32:
527 ; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma
528 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
530 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
534 define void @mscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
535 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i32:
537 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
538 ; RV32-NEXT: vsext.vf4 v16, v12
539 ; RV32-NEXT: vsll.vi v12, v16, 2
540 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
543 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i32:
545 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
546 ; RV64-NEXT: vsext.vf8 v16, v12
547 ; RV64-NEXT: vsll.vi v16, v16, 2
548 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
549 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
551 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
552 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
556 define void @mscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
557 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32:
559 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
560 ; RV32-NEXT: vsext.vf4 v16, v12
561 ; RV32-NEXT: vsll.vi v12, v16, 2
562 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
565 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32:
567 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
568 ; RV64-NEXT: vsext.vf8 v16, v12
569 ; RV64-NEXT: vsll.vi v16, v16, 2
570 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
571 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
573 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
574 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
575 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
579 define void @mscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
580 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32:
582 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
583 ; CHECK-NEXT: vzext.vf2 v14, v12
584 ; CHECK-NEXT: vsll.vi v12, v14, 2
585 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
586 ; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t
588 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
589 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
590 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
594 define void @mscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
595 ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i32:
597 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
598 ; RV32-NEXT: vsext.vf2 v16, v12
599 ; RV32-NEXT: vsll.vi v12, v16, 2
600 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
603 ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i32:
605 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
606 ; RV64-NEXT: vsext.vf4 v16, v12
607 ; RV64-NEXT: vsll.vi v16, v16, 2
608 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
609 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
611 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
612 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
616 define void @mscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
617 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32:
619 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
620 ; RV32-NEXT: vsext.vf2 v16, v12
621 ; RV32-NEXT: vsll.vi v12, v16, 2
622 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
625 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32:
627 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
628 ; RV64-NEXT: vsext.vf4 v16, v12
629 ; RV64-NEXT: vsll.vi v16, v16, 2
630 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
631 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
633 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
634 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
635 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
639 define void @mscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
640 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32:
642 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
643 ; CHECK-NEXT: vzext.vf2 v16, v12
644 ; CHECK-NEXT: vsll.vi v12, v16, 2
645 ; CHECK-NEXT: vsoxei32.v v8, (a0), v12, v0.t
647 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
648 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
649 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
653 define void @mscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
654 ; RV32-LABEL: mscatter_baseidx_nxv8i32:
656 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
657 ; RV32-NEXT: vsll.vi v12, v12, 2
658 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
661 ; RV64-LABEL: mscatter_baseidx_nxv8i32:
663 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
664 ; RV64-NEXT: vsext.vf2 v16, v12
665 ; RV64-NEXT: vsll.vi v16, v16, 2
666 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
667 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
669 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
670 call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
674 declare void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
676 define void @mscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
677 ; RV32-LABEL: mscatter_nxv1i64:
679 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
680 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
683 ; RV64-LABEL: mscatter_nxv1i64:
685 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
686 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
688 call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, i32 8, <vscale x 1 x i1> %m)
692 declare void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
694 define void @mscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
695 ; RV32-LABEL: mscatter_nxv2i64:
697 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
698 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
701 ; RV64-LABEL: mscatter_nxv2i64:
703 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
704 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
706 call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %m)
710 declare void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
712 define void @mscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
713 ; RV32-LABEL: mscatter_nxv4i64:
715 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
716 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
719 ; RV64-LABEL: mscatter_nxv4i64:
721 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
722 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
724 call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %m)
728 define void @mscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs) {
729 ; RV32-LABEL: mscatter_truemask_nxv4i64:
731 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
732 ; RV32-NEXT: vsoxei32.v v8, (zero), v12
735 ; RV64-LABEL: mscatter_truemask_nxv4i64:
737 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
738 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
740 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
741 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
742 call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %mtrue)
746 define void @mscatter_falsemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs) {
747 ; CHECK-LABEL: mscatter_falsemask_nxv4i64:
750 call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer)
754 declare void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
756 define void @mscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
757 ; RV32-LABEL: mscatter_nxv8i64:
759 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
760 ; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t
763 ; RV64-LABEL: mscatter_nxv8i64:
765 ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
766 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
768 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
772 define void @mscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
773 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i64:
775 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
776 ; RV32-NEXT: vsext.vf4 v20, v16
777 ; RV32-NEXT: vsll.vi v16, v20, 3
778 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
779 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
782 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i64:
784 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
785 ; RV64-NEXT: vsext.vf8 v24, v16
786 ; RV64-NEXT: vsll.vi v16, v24, 3
787 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
789 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
790 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
794 define void @mscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
795 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64:
797 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
798 ; RV32-NEXT: vsext.vf4 v20, v16
799 ; RV32-NEXT: vsll.vi v16, v20, 3
800 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
801 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
804 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64:
806 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
807 ; RV64-NEXT: vsext.vf8 v24, v16
808 ; RV64-NEXT: vsll.vi v16, v24, 3
809 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
811 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
812 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
813 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
817 define void @mscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
818 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64:
820 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
821 ; CHECK-NEXT: vzext.vf2 v18, v16
822 ; CHECK-NEXT: vsll.vi v16, v18, 3
823 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
824 ; CHECK-NEXT: vsoxei16.v v8, (a0), v16, v0.t
826 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
827 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
828 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
832 define void @mscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
833 ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i64:
835 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
836 ; RV32-NEXT: vsext.vf2 v20, v16
837 ; RV32-NEXT: vsll.vi v16, v20, 3
838 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
839 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
842 ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i64:
844 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
845 ; RV64-NEXT: vsext.vf4 v24, v16
846 ; RV64-NEXT: vsll.vi v16, v24, 3
847 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
849 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
850 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
854 define void @mscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
855 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64:
857 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
858 ; RV32-NEXT: vsext.vf2 v20, v16
859 ; RV32-NEXT: vsll.vi v16, v20, 3
860 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
861 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
864 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64:
866 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
867 ; RV64-NEXT: vsext.vf4 v24, v16
868 ; RV64-NEXT: vsll.vi v16, v24, 3
869 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
871 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
872 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
873 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
877 define void @mscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
878 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64:
880 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
881 ; CHECK-NEXT: vzext.vf2 v20, v16
882 ; CHECK-NEXT: vsll.vi v16, v20, 3
883 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
884 ; CHECK-NEXT: vsoxei32.v v8, (a0), v16, v0.t
886 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
887 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
888 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
892 define void @mscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
893 ; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8i64:
895 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
896 ; RV32-NEXT: vsll.vi v16, v16, 3
897 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
898 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
901 ; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8i64:
903 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
904 ; RV64-NEXT: vsext.vf2 v24, v16
905 ; RV64-NEXT: vsll.vi v16, v24, 3
906 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
908 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
909 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
913 define void @mscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
914 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64:
916 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
917 ; RV32-NEXT: vsll.vi v16, v16, 3
918 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
919 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
922 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64:
924 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
925 ; RV64-NEXT: vsext.vf2 v24, v16
926 ; RV64-NEXT: vsll.vi v16, v24, 3
927 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
929 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
930 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
931 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
935 define void @mscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
936 ; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64:
938 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
939 ; RV32-NEXT: vsll.vi v16, v16, 3
940 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
941 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
944 ; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64:
946 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
947 ; RV64-NEXT: vzext.vf2 v24, v16
948 ; RV64-NEXT: vsll.vi v16, v24, 3
949 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
951 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
952 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
953 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
957 define void @mscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) {
958 ; RV32-LABEL: mscatter_baseidx_nxv8i64:
960 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
961 ; RV32-NEXT: vnsrl.wi v24, v16, 0
962 ; RV32-NEXT: vsll.vi v16, v24, 3
963 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
964 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
967 ; RV64-LABEL: mscatter_baseidx_nxv8i64:
969 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
970 ; RV64-NEXT: vsll.vi v16, v16, 3
971 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
973 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
974 call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
978 declare void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
980 define void @mscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
981 ; RV32-LABEL: mscatter_nxv1f16:
983 ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
984 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
987 ; RV64-LABEL: mscatter_nxv1f16:
989 ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
990 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
992 call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m)
996 declare void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
998 define void @mscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
999 ; RV32-LABEL: mscatter_nxv2f16:
1001 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
1002 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1005 ; RV64-LABEL: mscatter_nxv2f16:
1007 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
1008 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1010 call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
1014 declare void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
1016 define void @mscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
1017 ; RV32-LABEL: mscatter_nxv4f16:
1019 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1020 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1023 ; RV64-LABEL: mscatter_nxv4f16:
1025 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1026 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1028 call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m)
1032 define void @mscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs) {
1033 ; RV32-LABEL: mscatter_truemask_nxv4f16:
1035 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1036 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
1039 ; RV64-LABEL: mscatter_truemask_nxv4f16:
1041 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1042 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1044 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1045 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1046 call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %mtrue)
1050 define void @mscatter_falsemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs) {
1051 ; CHECK-LABEL: mscatter_falsemask_nxv4f16:
1054 call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
1058 declare void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
1060 define void @mscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
1061 ; RV32-LABEL: mscatter_nxv8f16:
1063 ; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, ma
1064 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1067 ; RV64-LABEL: mscatter_nxv8f16:
1069 ; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, ma
1070 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1072 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1076 define void @mscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1077 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f16:
1079 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1080 ; RV32-NEXT: vsext.vf4 v12, v10
1081 ; RV32-NEXT: vadd.vv v12, v12, v12
1082 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1083 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1086 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f16:
1088 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1089 ; RV64-NEXT: vsext.vf8 v16, v10
1090 ; RV64-NEXT: vadd.vv v16, v16, v16
1091 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1092 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1094 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
1095 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1099 define void @mscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1100 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16:
1102 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1103 ; RV32-NEXT: vsext.vf4 v12, v10
1104 ; RV32-NEXT: vadd.vv v12, v12, v12
1105 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1106 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1109 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16:
1111 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1112 ; RV64-NEXT: vsext.vf8 v16, v10
1113 ; RV64-NEXT: vadd.vv v16, v16, v16
1114 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1115 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1117 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1118 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1119 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1123 define void @mscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1124 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16:
1126 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
1127 ; CHECK-NEXT: vwaddu.vv v12, v10, v10
1128 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1129 ; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1131 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1132 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1133 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1137 define void @mscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1138 ; RV32-LABEL: mscatter_baseidx_nxv8f16:
1140 ; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1141 ; RV32-NEXT: vwadd.vv v12, v10, v10
1142 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1145 ; RV64-LABEL: mscatter_baseidx_nxv8f16:
1147 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1148 ; RV64-NEXT: vsext.vf4 v16, v10
1149 ; RV64-NEXT: vadd.vv v16, v16, v16
1150 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1151 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1153 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
1154 call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1158 declare void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
1160 define void @mscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
1161 ; RV32-LABEL: mscatter_nxv1f32:
1163 ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
1164 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1167 ; RV64-LABEL: mscatter_nxv1f32:
1169 ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
1170 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1172 call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, i32 4, <vscale x 1 x i1> %m)
1176 declare void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
1178 define void @mscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
1179 ; RV32-LABEL: mscatter_nxv2f32:
1181 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1182 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1185 ; RV64-LABEL: mscatter_nxv2f32:
1187 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1188 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1190 call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m)
1194 declare void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
1196 define void @mscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
1197 ; RV32-LABEL: mscatter_nxv4f32:
1199 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1200 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1203 ; RV64-LABEL: mscatter_nxv4f32:
1205 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1206 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1208 call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %m)
1212 define void @mscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs) {
1213 ; RV32-LABEL: mscatter_truemask_nxv4f32:
1215 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1216 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
1219 ; RV64-LABEL: mscatter_truemask_nxv4f32:
1221 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1222 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1224 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1225 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1226 call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mtrue)
1230 define void @mscatter_falsemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs) {
1231 ; CHECK-LABEL: mscatter_falsemask_nxv4f32:
1234 call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer)
1238 declare void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
1240 define void @mscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
1241 ; RV32-LABEL: mscatter_nxv8f32:
1243 ; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma
1244 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1247 ; RV64-LABEL: mscatter_nxv8f32:
1249 ; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma
1250 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1252 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1256 define void @mscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1257 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f32:
1259 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1260 ; RV32-NEXT: vsext.vf4 v16, v12
1261 ; RV32-NEXT: vsll.vi v12, v16, 2
1262 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1265 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f32:
1267 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1268 ; RV64-NEXT: vsext.vf8 v16, v12
1269 ; RV64-NEXT: vsll.vi v16, v16, 2
1270 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1271 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1273 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
1274 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1278 define void @mscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1279 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32:
1281 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1282 ; RV32-NEXT: vsext.vf4 v16, v12
1283 ; RV32-NEXT: vsll.vi v12, v16, 2
1284 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1287 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32:
1289 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1290 ; RV64-NEXT: vsext.vf8 v16, v12
1291 ; RV64-NEXT: vsll.vi v16, v16, 2
1292 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1293 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1295 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1296 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1297 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1301 define void @mscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1302 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32:
1304 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1305 ; CHECK-NEXT: vzext.vf2 v14, v12
1306 ; CHECK-NEXT: vsll.vi v12, v14, 2
1307 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1308 ; CHECK-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1310 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1311 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1312 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1316 define void @mscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1317 ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f32:
1319 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1320 ; RV32-NEXT: vsext.vf2 v16, v12
1321 ; RV32-NEXT: vsll.vi v12, v16, 2
1322 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1325 ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f32:
1327 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1328 ; RV64-NEXT: vsext.vf4 v16, v12
1329 ; RV64-NEXT: vsll.vi v16, v16, 2
1330 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1331 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1333 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
1334 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1338 define void @mscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1339 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32:
1341 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1342 ; RV32-NEXT: vsext.vf2 v16, v12
1343 ; RV32-NEXT: vsll.vi v12, v16, 2
1344 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1347 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32:
1349 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1350 ; RV64-NEXT: vsext.vf4 v16, v12
1351 ; RV64-NEXT: vsll.vi v16, v16, 2
1352 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1353 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1355 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1356 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1357 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1361 define void @mscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1362 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32:
1364 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1365 ; CHECK-NEXT: vzext.vf2 v16, v12
1366 ; CHECK-NEXT: vsll.vi v12, v16, 2
1367 ; CHECK-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1369 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1370 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1371 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1375 define void @mscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1376 ; RV32-LABEL: mscatter_baseidx_nxv8f32:
1378 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1379 ; RV32-NEXT: vsll.vi v12, v12, 2
1380 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1383 ; RV64-LABEL: mscatter_baseidx_nxv8f32:
1385 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1386 ; RV64-NEXT: vsext.vf2 v16, v12
1387 ; RV64-NEXT: vsll.vi v16, v16, 2
1388 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1389 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1391 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
1392 call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1396 declare void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
1398 define void @mscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
1399 ; RV32-LABEL: mscatter_nxv1f64:
1401 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1402 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1405 ; RV64-LABEL: mscatter_nxv1f64:
1407 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1408 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1410 call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, i32 8, <vscale x 1 x i1> %m)
1414 declare void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
1416 define void @mscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
1417 ; RV32-LABEL: mscatter_nxv2f64:
1419 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1420 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1423 ; RV64-LABEL: mscatter_nxv2f64:
1425 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1426 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1428 call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %m)
1432 declare void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
1434 define void @mscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
1435 ; RV32-LABEL: mscatter_nxv4f64:
1437 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1438 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1441 ; RV64-LABEL: mscatter_nxv4f64:
1443 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1444 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1446 call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %m)
1450 define void @mscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs) {
1451 ; RV32-LABEL: mscatter_truemask_nxv4f64:
1453 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1454 ; RV32-NEXT: vsoxei32.v v8, (zero), v12
1457 ; RV64-LABEL: mscatter_truemask_nxv4f64:
1459 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1460 ; RV64-NEXT: vsoxei64.v v8, (zero), v12
1462 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1463 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1464 call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %mtrue)
1468 define void @mscatter_falsemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs) {
1469 ; CHECK-LABEL: mscatter_falsemask_nxv4f64:
1472 call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer)
1476 declare void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
1478 define void @mscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
1479 ; RV32-LABEL: mscatter_nxv8f64:
1481 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1482 ; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t
1485 ; RV64-LABEL: mscatter_nxv8f64:
1487 ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1488 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1490 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1494 define void @mscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1495 ; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f64:
1497 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1498 ; RV32-NEXT: vsext.vf4 v20, v16
1499 ; RV32-NEXT: vsll.vi v16, v20, 3
1500 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1501 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1504 ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f64:
1506 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1507 ; RV64-NEXT: vsext.vf8 v24, v16
1508 ; RV64-NEXT: vsll.vi v16, v24, 3
1509 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1511 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
1512 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1516 define void @mscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1517 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64:
1519 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1520 ; RV32-NEXT: vsext.vf4 v20, v16
1521 ; RV32-NEXT: vsll.vi v16, v20, 3
1522 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1523 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1526 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64:
1528 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1529 ; RV64-NEXT: vsext.vf8 v24, v16
1530 ; RV64-NEXT: vsll.vi v16, v24, 3
1531 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1533 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1534 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1535 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1539 define void @mscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1540 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64:
1542 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1543 ; CHECK-NEXT: vzext.vf2 v18, v16
1544 ; CHECK-NEXT: vsll.vi v16, v18, 3
1545 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1546 ; CHECK-NEXT: vsoxei16.v v8, (a0), v16, v0.t
1548 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1549 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1550 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1554 define void @mscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1555 ; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f64:
1557 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1558 ; RV32-NEXT: vsext.vf2 v20, v16
1559 ; RV32-NEXT: vsll.vi v16, v20, 3
1560 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1561 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1564 ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f64:
1566 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1567 ; RV64-NEXT: vsext.vf4 v24, v16
1568 ; RV64-NEXT: vsll.vi v16, v24, 3
1569 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1571 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
1572 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1576 define void @mscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1577 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64:
1579 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1580 ; RV32-NEXT: vsext.vf2 v20, v16
1581 ; RV32-NEXT: vsll.vi v16, v20, 3
1582 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1583 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1586 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64:
1588 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1589 ; RV64-NEXT: vsext.vf4 v24, v16
1590 ; RV64-NEXT: vsll.vi v16, v24, 3
1591 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1593 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1594 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1595 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1599 define void @mscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1600 ; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64:
1602 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1603 ; CHECK-NEXT: vzext.vf2 v20, v16
1604 ; CHECK-NEXT: vsll.vi v16, v20, 3
1605 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1606 ; CHECK-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1608 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1609 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1610 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1614 define void @mscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1615 ; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8f64:
1617 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1618 ; RV32-NEXT: vsll.vi v16, v16, 3
1619 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1620 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1623 ; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8f64:
1625 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1626 ; RV64-NEXT: vsext.vf2 v24, v16
1627 ; RV64-NEXT: vsll.vi v16, v24, 3
1628 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1630 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
1631 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1635 define void @mscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1636 ; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64:
1638 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1639 ; RV32-NEXT: vsll.vi v16, v16, 3
1640 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1641 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1644 ; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64:
1646 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1647 ; RV64-NEXT: vsext.vf2 v24, v16
1648 ; RV64-NEXT: vsll.vi v16, v24, 3
1649 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1651 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1652 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1653 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1657 define void @mscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1658 ; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64:
1660 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1661 ; RV32-NEXT: vsll.vi v16, v16, 3
1662 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1663 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1666 ; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64:
1668 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1669 ; RV64-NEXT: vzext.vf2 v24, v16
1670 ; RV64-NEXT: vsll.vi v16, v24, 3
1671 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1673 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1674 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1675 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1679 define void @mscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) {
1680 ; RV32-LABEL: mscatter_baseidx_nxv8f64:
1682 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1683 ; RV32-NEXT: vnsrl.wi v24, v16, 0
1684 ; RV32-NEXT: vsll.vi v16, v24, 3
1685 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1686 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
1689 ; RV64-LABEL: mscatter_baseidx_nxv8f64:
1691 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1692 ; RV64-NEXT: vsll.vi v16, v16, 3
1693 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1695 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
1696 call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1700 declare void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
1702 declare <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double>, <vscale x 8 x double>, i64)
1703 declare <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr>, <vscale x 8 x ptr>, i64)
1705 define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, <vscale x 8 x ptr> %ptrs0, <vscale x 8 x ptr> %ptrs1, <vscale x 16 x i1> %m) {
1706 ; RV32-LABEL: mscatter_nxv16f64:
1708 ; RV32-NEXT: vl4re32.v v24, (a0)
1709 ; RV32-NEXT: vl4re32.v v28, (a1)
1710 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1711 ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t
1712 ; RV32-NEXT: csrr a0, vlenb
1713 ; RV32-NEXT: srli a0, a0, 3
1714 ; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
1715 ; RV32-NEXT: vslidedown.vx v0, v0, a0
1716 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1717 ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t
1720 ; RV64-LABEL: mscatter_nxv16f64:
1722 ; RV64-NEXT: addi sp, sp, -16
1723 ; RV64-NEXT: .cfi_def_cfa_offset 16
1724 ; RV64-NEXT: csrr a2, vlenb
1725 ; RV64-NEXT: slli a2, a2, 3
1726 ; RV64-NEXT: sub sp, sp, a2
1727 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1728 ; RV64-NEXT: vl8re64.v v24, (a0)
1729 ; RV64-NEXT: addi a0, sp, 16
1730 ; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1731 ; RV64-NEXT: vl8re64.v v16, (a1)
1732 ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1733 ; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t
1734 ; RV64-NEXT: csrr a0, vlenb
1735 ; RV64-NEXT: srli a0, a0, 3
1736 ; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
1737 ; RV64-NEXT: vslidedown.vx v0, v0, a0
1738 ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
1739 ; RV64-NEXT: addi a0, sp, 16
1740 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1741 ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
1742 ; RV64-NEXT: csrr a0, vlenb
1743 ; RV64-NEXT: slli a0, a0, 3
1744 ; RV64-NEXT: add sp, sp, a0
1745 ; RV64-NEXT: addi sp, sp, 16
1747 %p0 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> undef, <vscale x 8 x ptr> %ptrs0, i64 0)
1748 %p1 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> %p0, <vscale x 8 x ptr> %ptrs1, i64 8)
1749 %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1750 %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1751 call void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %v1, <vscale x 16 x ptr> %p1, i32 8, <vscale x 16 x i1> %m)
1755 define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, ptr %base, <vscale x 16 x i8> %idxs, <vscale x 16 x i1> %m) {
1756 ; RV32-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
1758 ; RV32-NEXT: vl2r.v v2, (a1)
1759 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
1760 ; RV32-NEXT: vsext.vf4 v24, v2
1761 ; RV32-NEXT: vsll.vi v24, v24, 3
1762 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1763 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
1764 ; RV32-NEXT: csrr a1, vlenb
1765 ; RV32-NEXT: srli a1, a1, 3
1766 ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
1767 ; RV32-NEXT: vslidedown.vx v0, v0, a1
1768 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1769 ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
1772 ; RV64-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
1774 ; RV64-NEXT: vl2r.v v2, (a1)
1775 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1776 ; RV64-NEXT: vsext.vf8 v24, v2
1777 ; RV64-NEXT: vsll.vi v24, v24, 3
1778 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
1779 ; RV64-NEXT: vsext.vf8 v8, v3
1780 ; RV64-NEXT: vsll.vi v8, v8, 3
1781 ; RV64-NEXT: csrr a1, vlenb
1782 ; RV64-NEXT: srli a1, a1, 3
1783 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
1784 ; RV64-NEXT: vslidedown.vx v0, v0, a1
1785 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1786 ; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t
1788 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i8> %idxs
1789 %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1790 %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1791 call void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %v1, <vscale x 16 x ptr> %ptrs, i32 8, <vscale x 16 x i1> %m)
1795 define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m) {
1796 ; RV32-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
1798 ; RV32-NEXT: vl4re16.v v4, (a1)
1799 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
1800 ; RV32-NEXT: vsext.vf2 v24, v4
1801 ; RV32-NEXT: vsll.vi v24, v24, 3
1802 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1803 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
1804 ; RV32-NEXT: csrr a1, vlenb
1805 ; RV32-NEXT: srli a1, a1, 3
1806 ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
1807 ; RV32-NEXT: vslidedown.vx v0, v0, a1
1808 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1809 ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
1812 ; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
1814 ; RV64-NEXT: vl4re16.v v4, (a1)
1815 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1816 ; RV64-NEXT: vsext.vf4 v24, v4
1817 ; RV64-NEXT: vsll.vi v24, v24, 3
1818 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
1819 ; RV64-NEXT: vsext.vf4 v8, v6
1820 ; RV64-NEXT: vsll.vi v8, v8, 3
1821 ; RV64-NEXT: csrr a1, vlenb
1822 ; RV64-NEXT: srli a1, a1, 3
1823 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
1824 ; RV64-NEXT: vslidedown.vx v0, v0, a1
1825 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1826 ; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t
1828 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs
1829 %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1830 %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1831 call void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %v1, <vscale x 16 x ptr> %ptrs, i32 8, <vscale x 16 x i1> %m)