1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
7 declare void @llvm.vp.scatter.v2i8.v2p0(<2 x i8>, <2 x ptr>, <2 x i1>, i32)
9 define void @vpscatter_v2i8(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
10 ; RV32-LABEL: vpscatter_v2i8:
12 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
13 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
16 ; RV64-LABEL: vpscatter_v2i8:
18 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
19 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
21 call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
25 define void @vpscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
26 ; RV32-LABEL: vpscatter_v2i16_truncstore_v2i8:
28 ; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
29 ; RV32-NEXT: vnsrl.wi v8, v8, 0
30 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
31 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
34 ; RV64-LABEL: vpscatter_v2i16_truncstore_v2i8:
36 ; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
37 ; RV64-NEXT: vnsrl.wi v8, v8, 0
38 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
39 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
41 %tval = trunc <2 x i16> %val to <2 x i8>
42 call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
46 define void @vpscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
47 ; RV32-LABEL: vpscatter_v2i32_truncstore_v2i8:
49 ; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
50 ; RV32-NEXT: vnsrl.wi v8, v8, 0
51 ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
52 ; RV32-NEXT: vnsrl.wi v8, v8, 0
53 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
54 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
57 ; RV64-LABEL: vpscatter_v2i32_truncstore_v2i8:
59 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
60 ; RV64-NEXT: vnsrl.wi v8, v8, 0
61 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
62 ; RV64-NEXT: vnsrl.wi v8, v8, 0
63 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
64 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
66 %tval = trunc <2 x i32> %val to <2 x i8>
67 call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
71 define void @vpscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
72 ; RV32-LABEL: vpscatter_v2i64_truncstore_v2i8:
74 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
75 ; RV32-NEXT: vnsrl.wi v8, v8, 0
76 ; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
77 ; RV32-NEXT: vnsrl.wi v8, v8, 0
78 ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
79 ; RV32-NEXT: vnsrl.wi v8, v8, 0
80 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
81 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
84 ; RV64-LABEL: vpscatter_v2i64_truncstore_v2i8:
86 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
87 ; RV64-NEXT: vnsrl.wi v8, v8, 0
88 ; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
89 ; RV64-NEXT: vnsrl.wi v8, v8, 0
90 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
91 ; RV64-NEXT: vnsrl.wi v8, v8, 0
92 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
93 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
95 %tval = trunc <2 x i64> %val to <2 x i8>
96 call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
100 declare void @llvm.vp.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, <4 x i1>, i32)
102 define void @vpscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
103 ; RV32-LABEL: vpscatter_v4i8:
105 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
106 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
109 ; RV64-LABEL: vpscatter_v4i8:
111 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
112 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
114 call void @llvm.vp.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
118 define void @vpscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
119 ; RV32-LABEL: vpscatter_truemask_v4i8:
121 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
122 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
125 ; RV64-LABEL: vpscatter_truemask_v4i8:
127 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
128 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
130 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
131 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
132 call void @llvm.vp.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl)
136 declare void @llvm.vp.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, <8 x i1>, i32)
138 define void @vpscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
139 ; RV32-LABEL: vpscatter_v8i8:
141 ; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
142 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
145 ; RV64-LABEL: vpscatter_v8i8:
147 ; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
148 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
150 call void @llvm.vp.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
154 define void @vpscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
155 ; RV32-LABEL: vpscatter_baseidx_v8i8:
157 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
158 ; RV32-NEXT: vsext.vf4 v10, v9
159 ; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
160 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
163 ; RV64-LABEL: vpscatter_baseidx_v8i8:
165 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
166 ; RV64-NEXT: vsext.vf8 v12, v9
167 ; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
168 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
170 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
171 call void @llvm.vp.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
175 declare void @llvm.vp.scatter.v2i16.v2p0(<2 x i16>, <2 x ptr>, <2 x i1>, i32)
177 define void @vpscatter_v2i16(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
178 ; RV32-LABEL: vpscatter_v2i16:
180 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
181 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
184 ; RV64-LABEL: vpscatter_v2i16:
186 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
187 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
189 call void @llvm.vp.scatter.v2i16.v2p0(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
193 define void @vpscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
194 ; RV32-LABEL: vpscatter_v2i32_truncstore_v2i16:
196 ; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
197 ; RV32-NEXT: vnsrl.wi v8, v8, 0
198 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
199 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
202 ; RV64-LABEL: vpscatter_v2i32_truncstore_v2i16:
204 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
205 ; RV64-NEXT: vnsrl.wi v8, v8, 0
206 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
207 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
209 %tval = trunc <2 x i32> %val to <2 x i16>
210 call void @llvm.vp.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
214 define void @vpscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
215 ; RV32-LABEL: vpscatter_v2i64_truncstore_v2i16:
217 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
218 ; RV32-NEXT: vnsrl.wi v8, v8, 0
219 ; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
220 ; RV32-NEXT: vnsrl.wi v8, v8, 0
221 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
222 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
225 ; RV64-LABEL: vpscatter_v2i64_truncstore_v2i16:
227 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
228 ; RV64-NEXT: vnsrl.wi v8, v8, 0
229 ; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
230 ; RV64-NEXT: vnsrl.wi v8, v8, 0
231 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
232 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
234 %tval = trunc <2 x i64> %val to <2 x i16>
235 call void @llvm.vp.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
239 declare void @llvm.vp.scatter.v3i16.v3p0(<3 x i16>, <3 x ptr>, <3 x i1>, i32)
241 define void @vpscatter_v3i16(<3 x i16> %val, <3 x ptr> %ptrs, <3 x i1> %m, i32 zeroext %evl) {
242 ; RV32-LABEL: vpscatter_v3i16:
244 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
245 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
248 ; RV64-LABEL: vpscatter_v3i16:
250 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
251 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
253 call void @llvm.vp.scatter.v3i16.v3p0(<3 x i16> %val, <3 x ptr> %ptrs, <3 x i1> %m, i32 %evl)
257 define void @vpscatter_truemask_v3i16(<3 x i16> %val, <3 x ptr> %ptrs, i32 zeroext %evl) {
258 ; RV32-LABEL: vpscatter_truemask_v3i16:
260 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
261 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
264 ; RV64-LABEL: vpscatter_truemask_v3i16:
266 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
267 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
269 %mhead = insertelement <3 x i1> poison, i1 1, i32 0
270 %mtrue = shufflevector <3 x i1> %mhead, <3 x i1> poison, <3 x i32> zeroinitializer
271 call void @llvm.vp.scatter.v3i16.v3p0(<3 x i16> %val, <3 x ptr> %ptrs, <3 x i1> %mtrue, i32 %evl)
275 declare void @llvm.vp.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, <4 x i1>, i32)
277 define void @vpscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
278 ; RV32-LABEL: vpscatter_v4i16:
280 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
281 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
284 ; RV64-LABEL: vpscatter_v4i16:
286 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
287 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
289 call void @llvm.vp.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
293 define void @vpscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
294 ; RV32-LABEL: vpscatter_truemask_v4i16:
296 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
297 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
300 ; RV64-LABEL: vpscatter_truemask_v4i16:
302 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
303 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
305 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
306 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
307 call void @llvm.vp.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl)
311 declare void @llvm.vp.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, <8 x i1>, i32)
313 define void @vpscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
314 ; RV32-LABEL: vpscatter_v8i16:
316 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
317 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
320 ; RV64-LABEL: vpscatter_v8i16:
322 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
323 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
325 call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
329 define void @vpscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
330 ; RV32-LABEL: vpscatter_baseidx_v8i8_v8i16:
332 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
333 ; RV32-NEXT: vsext.vf4 v10, v9
334 ; RV32-NEXT: vadd.vv v10, v10, v10
335 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
336 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
339 ; RV64-LABEL: vpscatter_baseidx_v8i8_v8i16:
341 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
342 ; RV64-NEXT: vsext.vf8 v12, v9
343 ; RV64-NEXT: vadd.vv v12, v12, v12
344 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
345 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
347 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
348 call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
352 define void @vpscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
353 ; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8i16:
355 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
356 ; RV32-NEXT: vsext.vf4 v10, v9
357 ; RV32-NEXT: vadd.vv v10, v10, v10
358 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
359 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
362 ; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8i16:
364 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
365 ; RV64-NEXT: vsext.vf8 v12, v9
366 ; RV64-NEXT: vadd.vv v12, v12, v12
367 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
368 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
370 %eidxs = sext <8 x i8> %idxs to <8 x i16>
371 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
372 call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
376 define void @vpscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
377 ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i16:
379 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
380 ; RV32-NEXT: vwaddu.vv v10, v9, v9
381 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
382 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
385 ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i16:
387 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
388 ; RV64-NEXT: vwaddu.vv v10, v9, v9
389 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
390 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
392 %eidxs = zext <8 x i8> %idxs to <8 x i16>
393 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
394 call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
398 define void @vpscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
399 ; RV32-LABEL: vpscatter_baseidx_v8i16:
401 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
402 ; RV32-NEXT: vwadd.vv v10, v9, v9
403 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
404 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
407 ; RV64-LABEL: vpscatter_baseidx_v8i16:
409 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
410 ; RV64-NEXT: vsext.vf4 v12, v9
411 ; RV64-NEXT: vadd.vv v12, v12, v12
412 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
413 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
415 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
416 call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
420 declare void @llvm.vp.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, <2 x i1>, i32)
422 define void @vpscatter_v2i32(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
423 ; RV32-LABEL: vpscatter_v2i32:
425 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
426 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
429 ; RV64-LABEL: vpscatter_v2i32:
431 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
432 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
434 call void @llvm.vp.scatter.v2i32.v2p0(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
438 define void @vpscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
439 ; RV32-LABEL: vpscatter_v2i64_truncstore_v2i32:
441 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
442 ; RV32-NEXT: vnsrl.wi v8, v8, 0
443 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
444 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
447 ; RV64-LABEL: vpscatter_v2i64_truncstore_v2i32:
449 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
450 ; RV64-NEXT: vnsrl.wi v8, v8, 0
451 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
452 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
454 %tval = trunc <2 x i64> %val to <2 x i32>
455 call void @llvm.vp.scatter.v2i32.v2p0(<2 x i32> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
459 declare void @llvm.vp.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, <4 x i1>, i32)
461 define void @vpscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
462 ; RV32-LABEL: vpscatter_v4i32:
464 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
465 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
468 ; RV64-LABEL: vpscatter_v4i32:
470 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
471 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
473 call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
477 define void @vpscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
478 ; RV32-LABEL: vpscatter_truemask_v4i32:
480 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
481 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
484 ; RV64-LABEL: vpscatter_truemask_v4i32:
486 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
487 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
489 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
490 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
491 call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl)
495 declare void @llvm.vp.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, <8 x i1>, i32)
497 define void @vpscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
498 ; RV32-LABEL: vpscatter_v8i32:
500 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
501 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
504 ; RV64-LABEL: vpscatter_v8i32:
506 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
507 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
509 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
513 define void @vpscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
514 ; RV32-LABEL: vpscatter_baseidx_v8i8_v8i32:
516 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
517 ; RV32-NEXT: vsext.vf4 v12, v10
518 ; RV32-NEXT: vsll.vi v10, v12, 2
519 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
520 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
523 ; RV64-LABEL: vpscatter_baseidx_v8i8_v8i32:
525 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
526 ; RV64-NEXT: vsext.vf8 v12, v10
527 ; RV64-NEXT: vsll.vi v12, v12, 2
528 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
529 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
531 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
532 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
536 define void @vpscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
537 ; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8i32:
539 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
540 ; RV32-NEXT: vsext.vf4 v12, v10
541 ; RV32-NEXT: vsll.vi v10, v12, 2
542 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
543 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
546 ; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8i32:
548 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
549 ; RV64-NEXT: vsext.vf8 v12, v10
550 ; RV64-NEXT: vsll.vi v12, v12, 2
551 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
552 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
554 %eidxs = sext <8 x i8> %idxs to <8 x i32>
555 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
556 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
560 define void @vpscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
561 ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i32:
563 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
564 ; RV32-NEXT: vzext.vf2 v11, v10
565 ; RV32-NEXT: vsll.vi v10, v11, 2
566 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
567 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
570 ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i32:
572 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
573 ; RV64-NEXT: vzext.vf2 v11, v10
574 ; RV64-NEXT: vsll.vi v10, v11, 2
575 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
576 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
578 %eidxs = zext <8 x i8> %idxs to <8 x i32>
579 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
580 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
584 define void @vpscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
585 ; RV32-LABEL: vpscatter_baseidx_v8i16_v8i32:
587 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
588 ; RV32-NEXT: vsext.vf2 v12, v10
589 ; RV32-NEXT: vsll.vi v10, v12, 2
590 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
591 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
594 ; RV64-LABEL: vpscatter_baseidx_v8i16_v8i32:
596 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
597 ; RV64-NEXT: vsext.vf4 v12, v10
598 ; RV64-NEXT: vsll.vi v12, v12, 2
599 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
600 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
602 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
603 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
607 define void @vpscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
608 ; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8i32:
610 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
611 ; RV32-NEXT: vsext.vf2 v12, v10
612 ; RV32-NEXT: vsll.vi v10, v12, 2
613 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
614 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
617 ; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8i32:
619 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
620 ; RV64-NEXT: vsext.vf4 v12, v10
621 ; RV64-NEXT: vsll.vi v12, v12, 2
622 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
623 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
625 %eidxs = sext <8 x i16> %idxs to <8 x i32>
626 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
627 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
631 define void @vpscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
632 ; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8i32:
634 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
635 ; RV32-NEXT: vzext.vf2 v12, v10
636 ; RV32-NEXT: vsll.vi v10, v12, 2
637 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
638 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
641 ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8i32:
643 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
644 ; RV64-NEXT: vzext.vf2 v12, v10
645 ; RV64-NEXT: vsll.vi v10, v12, 2
646 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
647 ; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t
649 %eidxs = zext <8 x i16> %idxs to <8 x i32>
650 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
651 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
655 define void @vpscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
656 ; RV32-LABEL: vpscatter_baseidx_v8i32:
658 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
659 ; RV32-NEXT: vsll.vi v10, v10, 2
660 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
661 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
664 ; RV64-LABEL: vpscatter_baseidx_v8i32:
666 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
667 ; RV64-NEXT: vsext.vf2 v12, v10
668 ; RV64-NEXT: vsll.vi v12, v12, 2
669 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
670 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
672 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
673 call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
677 declare void @llvm.vp.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, <2 x i1>, i32)
679 define void @vpscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
680 ; RV32-LABEL: vpscatter_v2i64:
682 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
683 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
686 ; RV64-LABEL: vpscatter_v2i64:
688 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
689 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
691 call void @llvm.vp.scatter.v2i64.v2p0(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
695 declare void @llvm.vp.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, <4 x i1>, i32)
697 define void @vpscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
698 ; RV32-LABEL: vpscatter_v4i64:
700 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
701 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
704 ; RV64-LABEL: vpscatter_v4i64:
706 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
707 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
709 call void @llvm.vp.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
713 define void @vpscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
714 ; RV32-LABEL: vpscatter_truemask_v4i64:
716 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
717 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
720 ; RV64-LABEL: vpscatter_truemask_v4i64:
722 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
723 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
725 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
726 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
727 call void @llvm.vp.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl)
731 declare void @llvm.vp.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, <8 x i1>, i32)
733 define void @vpscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
734 ; RV32-LABEL: vpscatter_v8i64:
736 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
737 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
740 ; RV64-LABEL: vpscatter_v8i64:
742 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
743 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
745 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
749 define void @vpscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
750 ; RV32-LABEL: vpscatter_baseidx_v8i8_v8i64:
752 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
753 ; RV32-NEXT: vsext.vf4 v14, v12
754 ; RV32-NEXT: vsll.vi v12, v14, 3
755 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
756 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
759 ; RV64-LABEL: vpscatter_baseidx_v8i8_v8i64:
761 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
762 ; RV64-NEXT: vsext.vf8 v16, v12
763 ; RV64-NEXT: vsll.vi v12, v16, 3
764 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
765 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
767 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs
768 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
772 define void @vpscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
773 ; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8i64:
775 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
776 ; RV32-NEXT: vsext.vf4 v14, v12
777 ; RV32-NEXT: vsll.vi v12, v14, 3
778 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
779 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
782 ; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8i64:
784 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
785 ; RV64-NEXT: vsext.vf8 v16, v12
786 ; RV64-NEXT: vsll.vi v12, v16, 3
787 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
788 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
790 %eidxs = sext <8 x i8> %idxs to <8 x i64>
791 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
792 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
796 define void @vpscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
797 ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i64:
799 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
800 ; RV32-NEXT: vzext.vf2 v13, v12
801 ; RV32-NEXT: vsll.vi v12, v13, 3
802 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
803 ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
806 ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i64:
808 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
809 ; RV64-NEXT: vzext.vf2 v13, v12
810 ; RV64-NEXT: vsll.vi v12, v13, 3
811 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
812 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
814 %eidxs = zext <8 x i8> %idxs to <8 x i64>
815 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
816 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
820 define void @vpscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
821 ; RV32-LABEL: vpscatter_baseidx_v8i16_v8i64:
823 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
824 ; RV32-NEXT: vsext.vf2 v14, v12
825 ; RV32-NEXT: vsll.vi v12, v14, 3
826 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
827 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
830 ; RV64-LABEL: vpscatter_baseidx_v8i16_v8i64:
832 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
833 ; RV64-NEXT: vsext.vf4 v16, v12
834 ; RV64-NEXT: vsll.vi v12, v16, 3
835 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
836 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
838 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs
839 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
843 define void @vpscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
844 ; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8i64:
846 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
847 ; RV32-NEXT: vsext.vf2 v14, v12
848 ; RV32-NEXT: vsll.vi v12, v14, 3
849 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
850 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
853 ; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8i64:
855 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
856 ; RV64-NEXT: vsext.vf4 v16, v12
857 ; RV64-NEXT: vsll.vi v12, v16, 3
858 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
859 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
861 %eidxs = sext <8 x i16> %idxs to <8 x i64>
862 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
863 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
867 define void @vpscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
868 ; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8i64:
870 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
871 ; RV32-NEXT: vzext.vf2 v14, v12
872 ; RV32-NEXT: vsll.vi v12, v14, 3
873 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
874 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
877 ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8i64:
879 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
880 ; RV64-NEXT: vzext.vf2 v14, v12
881 ; RV64-NEXT: vsll.vi v12, v14, 3
882 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
883 ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
885 %eidxs = zext <8 x i16> %idxs to <8 x i64>
886 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
887 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
891 define void @vpscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
892 ; RV32-LABEL: vpscatter_baseidx_v8i32_v8i64:
894 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
895 ; RV32-NEXT: vsll.vi v12, v12, 3
896 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
897 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
900 ; RV64-LABEL: vpscatter_baseidx_v8i32_v8i64:
902 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
903 ; RV64-NEXT: vsext.vf2 v16, v12
904 ; RV64-NEXT: vsll.vi v12, v16, 3
905 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
906 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
908 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs
909 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
913 define void @vpscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
914 ; RV32-LABEL: vpscatter_baseidx_sext_v8i32_v8i64:
916 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
917 ; RV32-NEXT: vsll.vi v12, v12, 3
918 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
919 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
922 ; RV64-LABEL: vpscatter_baseidx_sext_v8i32_v8i64:
924 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
925 ; RV64-NEXT: vsext.vf2 v16, v12
926 ; RV64-NEXT: vsll.vi v12, v16, 3
927 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
928 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
930 %eidxs = sext <8 x i32> %idxs to <8 x i64>
931 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
932 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
936 define void @vpscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
937 ; RV32-LABEL: vpscatter_baseidx_zext_v8i32_v8i64:
939 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
940 ; RV32-NEXT: vsll.vi v12, v12, 3
941 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
942 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
945 ; RV64-LABEL: vpscatter_baseidx_zext_v8i32_v8i64:
947 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
948 ; RV64-NEXT: vzext.vf2 v16, v12
949 ; RV64-NEXT: vsll.vi v12, v16, 3
950 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
951 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
953 %eidxs = zext <8 x i32> %idxs to <8 x i64>
954 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
955 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
959 define void @vpscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) {
960 ; RV32-LABEL: vpscatter_baseidx_v8i64:
962 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
963 ; RV32-NEXT: vnsrl.wi v16, v12, 0
964 ; RV32-NEXT: vsll.vi v12, v16, 3
965 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
966 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
969 ; RV64-LABEL: vpscatter_baseidx_v8i64:
971 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
972 ; RV64-NEXT: vsll.vi v12, v12, 3
973 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
974 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
976 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
977 call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
981 declare void @llvm.vp.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, <2 x i1>, i32)
983 define void @vpscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
984 ; RV32-LABEL: vpscatter_v2f16:
986 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
987 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
990 ; RV64-LABEL: vpscatter_v2f16:
992 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
993 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
995 call void @llvm.vp.scatter.v2f16.v2p0(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
999 declare void @llvm.vp.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, <4 x i1>, i32)
1001 define void @vpscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1002 ; RV32-LABEL: vpscatter_v4f16:
1004 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1005 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1008 ; RV64-LABEL: vpscatter_v4f16:
1010 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1011 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1013 call void @llvm.vp.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1017 define void @vpscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
1018 ; RV32-LABEL: vpscatter_truemask_v4f16:
1020 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1021 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
1024 ; RV64-LABEL: vpscatter_truemask_v4f16:
1026 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1027 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
1029 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
1030 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
1031 call void @llvm.vp.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl)
1035 declare void @llvm.vp.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, <8 x i1>, i32)
1037 define void @vpscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1038 ; RV32-LABEL: vpscatter_v8f16:
1040 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1041 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1044 ; RV64-LABEL: vpscatter_v8f16:
1046 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1047 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1049 call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1053 define void @vpscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1054 ; RV32-LABEL: vpscatter_baseidx_v8i8_v8f16:
1056 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1057 ; RV32-NEXT: vsext.vf4 v10, v9
1058 ; RV32-NEXT: vadd.vv v10, v10, v10
1059 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1060 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1063 ; RV64-LABEL: vpscatter_baseidx_v8i8_v8f16:
1065 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1066 ; RV64-NEXT: vsext.vf8 v12, v9
1067 ; RV64-NEXT: vadd.vv v12, v12, v12
1068 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1069 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1071 %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
1072 call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1076 define void @vpscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1077 ; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8f16:
1079 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1080 ; RV32-NEXT: vsext.vf4 v10, v9
1081 ; RV32-NEXT: vadd.vv v10, v10, v10
1082 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1083 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1086 ; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8f16:
1088 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1089 ; RV64-NEXT: vsext.vf8 v12, v9
1090 ; RV64-NEXT: vadd.vv v12, v12, v12
1091 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1092 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1094 %eidxs = sext <8 x i8> %idxs to <8 x i16>
1095 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
1096 call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1100 define void @vpscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1101 ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f16:
1103 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1104 ; RV32-NEXT: vwaddu.vv v10, v9, v9
1105 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1106 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1109 ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f16:
1111 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1112 ; RV64-NEXT: vwaddu.vv v10, v9, v9
1113 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1114 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1116 %eidxs = zext <8 x i8> %idxs to <8 x i16>
1117 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
1118 call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1122 define void @vpscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1123 ; RV32-LABEL: vpscatter_baseidx_v8f16:
1125 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1126 ; RV32-NEXT: vwadd.vv v10, v9, v9
1127 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1128 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1131 ; RV64-LABEL: vpscatter_baseidx_v8f16:
1133 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1134 ; RV64-NEXT: vsext.vf4 v12, v9
1135 ; RV64-NEXT: vadd.vv v12, v12, v12
1136 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1137 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1139 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
1140 call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1144 declare void @llvm.vp.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, <2 x i1>, i32)
1146 define void @vpscatter_v2f32(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1147 ; RV32-LABEL: vpscatter_v2f32:
1149 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1150 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1153 ; RV64-LABEL: vpscatter_v2f32:
1155 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1156 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1158 call void @llvm.vp.scatter.v2f32.v2p0(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1162 declare void @llvm.vp.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, <4 x i1>, i32)
1164 define void @vpscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1165 ; RV32-LABEL: vpscatter_v4f32:
1167 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1168 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1171 ; RV64-LABEL: vpscatter_v4f32:
1173 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1174 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1176 call void @llvm.vp.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1180 define void @vpscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
1181 ; RV32-LABEL: vpscatter_truemask_v4f32:
1183 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1184 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
1187 ; RV64-LABEL: vpscatter_truemask_v4f32:
1189 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1190 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
1192 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
1193 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
1194 call void @llvm.vp.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl)
1198 declare void @llvm.vp.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, <8 x i1>, i32)
1200 define void @vpscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1201 ; RV32-LABEL: vpscatter_v8f32:
1203 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1204 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1207 ; RV64-LABEL: vpscatter_v8f32:
1209 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1210 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1212 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1216 define void @vpscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1217 ; RV32-LABEL: vpscatter_baseidx_v8i8_v8f32:
1219 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1220 ; RV32-NEXT: vsext.vf4 v12, v10
1221 ; RV32-NEXT: vsll.vi v10, v12, 2
1222 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1223 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1226 ; RV64-LABEL: vpscatter_baseidx_v8i8_v8f32:
1228 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1229 ; RV64-NEXT: vsext.vf8 v12, v10
1230 ; RV64-NEXT: vsll.vi v12, v12, 2
1231 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1232 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1234 %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
1235 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1239 define void @vpscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1240 ; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8f32:
1242 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1243 ; RV32-NEXT: vsext.vf4 v12, v10
1244 ; RV32-NEXT: vsll.vi v10, v12, 2
1245 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1246 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1249 ; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8f32:
1251 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1252 ; RV64-NEXT: vsext.vf8 v12, v10
1253 ; RV64-NEXT: vsll.vi v12, v12, 2
1254 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1255 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1257 %eidxs = sext <8 x i8> %idxs to <8 x i32>
1258 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1259 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1263 define void @vpscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1264 ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f32:
1266 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1267 ; RV32-NEXT: vzext.vf2 v11, v10
1268 ; RV32-NEXT: vsll.vi v10, v11, 2
1269 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1270 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1273 ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f32:
1275 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1276 ; RV64-NEXT: vzext.vf2 v11, v10
1277 ; RV64-NEXT: vsll.vi v10, v11, 2
1278 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1279 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1281 %eidxs = zext <8 x i8> %idxs to <8 x i32>
1282 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1283 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1287 define void @vpscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1288 ; RV32-LABEL: vpscatter_baseidx_v8i16_v8f32:
1290 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1291 ; RV32-NEXT: vsext.vf2 v12, v10
1292 ; RV32-NEXT: vsll.vi v10, v12, 2
1293 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1294 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1297 ; RV64-LABEL: vpscatter_baseidx_v8i16_v8f32:
1299 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1300 ; RV64-NEXT: vsext.vf4 v12, v10
1301 ; RV64-NEXT: vsll.vi v12, v12, 2
1302 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1303 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1305 %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
1306 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1310 define void @vpscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1311 ; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8f32:
1313 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1314 ; RV32-NEXT: vsext.vf2 v12, v10
1315 ; RV32-NEXT: vsll.vi v10, v12, 2
1316 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1317 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1320 ; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8f32:
1322 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1323 ; RV64-NEXT: vsext.vf4 v12, v10
1324 ; RV64-NEXT: vsll.vi v12, v12, 2
1325 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1326 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1328 %eidxs = sext <8 x i16> %idxs to <8 x i32>
1329 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1330 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1334 define void @vpscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1335 ; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8f32:
1337 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1338 ; RV32-NEXT: vzext.vf2 v12, v10
1339 ; RV32-NEXT: vsll.vi v10, v12, 2
1340 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1341 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1344 ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8f32:
1346 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1347 ; RV64-NEXT: vzext.vf2 v12, v10
1348 ; RV64-NEXT: vsll.vi v10, v12, 2
1349 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1350 ; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1352 %eidxs = zext <8 x i16> %idxs to <8 x i32>
1353 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1354 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1358 define void @vpscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1359 ; RV32-LABEL: vpscatter_baseidx_v8f32:
1361 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1362 ; RV32-NEXT: vsll.vi v10, v10, 2
1363 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1364 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1367 ; RV64-LABEL: vpscatter_baseidx_v8f32:
1369 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1370 ; RV64-NEXT: vsext.vf2 v12, v10
1371 ; RV64-NEXT: vsll.vi v12, v12, 2
1372 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1373 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1375 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
1376 call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1380 declare void @llvm.vp.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, <2 x i1>, i32)
1382 define void @vpscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1383 ; RV32-LABEL: vpscatter_v2f64:
1385 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1386 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1389 ; RV64-LABEL: vpscatter_v2f64:
1391 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1392 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1394 call void @llvm.vp.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1398 declare void @llvm.vp.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, <4 x i1>, i32)
1400 define void @vpscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1401 ; RV32-LABEL: vpscatter_v4f64:
1403 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1404 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1407 ; RV64-LABEL: vpscatter_v4f64:
1409 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1410 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1412 call void @llvm.vp.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1416 define void @vpscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
1417 ; RV32-LABEL: vpscatter_truemask_v4f64:
1419 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1420 ; RV32-NEXT: vsoxei32.v v8, (zero), v10
1423 ; RV64-LABEL: vpscatter_truemask_v4f64:
1425 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1426 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
1428 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
1429 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
1430 call void @llvm.vp.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl)
1434 declare void @llvm.vp.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, <8 x i1>, i32)
1436 define void @vpscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1437 ; RV32-LABEL: vpscatter_v8f64:
1439 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1440 ; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
1443 ; RV64-LABEL: vpscatter_v8f64:
1445 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1446 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1448 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1452 define void @vpscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1453 ; RV32-LABEL: vpscatter_baseidx_v8i8_v8f64:
1455 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1456 ; RV32-NEXT: vsext.vf4 v14, v12
1457 ; RV32-NEXT: vsll.vi v12, v14, 3
1458 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1459 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1462 ; RV64-LABEL: vpscatter_baseidx_v8i8_v8f64:
1464 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1465 ; RV64-NEXT: vsext.vf8 v16, v12
1466 ; RV64-NEXT: vsll.vi v12, v16, 3
1467 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1468 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1470 %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
1471 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1475 define void @vpscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1476 ; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8f64:
1478 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1479 ; RV32-NEXT: vsext.vf4 v14, v12
1480 ; RV32-NEXT: vsll.vi v12, v14, 3
1481 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1482 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1485 ; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8f64:
1487 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1488 ; RV64-NEXT: vsext.vf8 v16, v12
1489 ; RV64-NEXT: vsll.vi v12, v16, 3
1490 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1491 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1493 %eidxs = sext <8 x i8> %idxs to <8 x i64>
1494 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1495 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1499 define void @vpscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1500 ; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f64:
1502 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1503 ; RV32-NEXT: vzext.vf2 v13, v12
1504 ; RV32-NEXT: vsll.vi v12, v13, 3
1505 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1506 ; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1509 ; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f64:
1511 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1512 ; RV64-NEXT: vzext.vf2 v13, v12
1513 ; RV64-NEXT: vsll.vi v12, v13, 3
1514 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1515 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
1517 %eidxs = zext <8 x i8> %idxs to <8 x i64>
1518 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1519 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1523 define void @vpscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1524 ; RV32-LABEL: vpscatter_baseidx_v8i16_v8f64:
1526 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1527 ; RV32-NEXT: vsext.vf2 v14, v12
1528 ; RV32-NEXT: vsll.vi v12, v14, 3
1529 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1530 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1533 ; RV64-LABEL: vpscatter_baseidx_v8i16_v8f64:
1535 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1536 ; RV64-NEXT: vsext.vf4 v16, v12
1537 ; RV64-NEXT: vsll.vi v12, v16, 3
1538 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1539 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1541 %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
1542 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1546 define void @vpscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1547 ; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8f64:
1549 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1550 ; RV32-NEXT: vsext.vf2 v14, v12
1551 ; RV32-NEXT: vsll.vi v12, v14, 3
1552 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1553 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1556 ; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8f64:
1558 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1559 ; RV64-NEXT: vsext.vf4 v16, v12
1560 ; RV64-NEXT: vsll.vi v12, v16, 3
1561 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1562 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1564 %eidxs = sext <8 x i16> %idxs to <8 x i64>
1565 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1566 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1570 define void @vpscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1571 ; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8f64:
1573 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1574 ; RV32-NEXT: vzext.vf2 v14, v12
1575 ; RV32-NEXT: vsll.vi v12, v14, 3
1576 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1577 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1580 ; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8f64:
1582 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1583 ; RV64-NEXT: vzext.vf2 v14, v12
1584 ; RV64-NEXT: vsll.vi v12, v14, 3
1585 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1586 ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1588 %eidxs = zext <8 x i16> %idxs to <8 x i64>
1589 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1590 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1594 define void @vpscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1595 ; RV32-LABEL: vpscatter_baseidx_v8i32_v8f64:
1597 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1598 ; RV32-NEXT: vsll.vi v12, v12, 3
1599 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1600 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1603 ; RV64-LABEL: vpscatter_baseidx_v8i32_v8f64:
1605 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1606 ; RV64-NEXT: vsext.vf2 v16, v12
1607 ; RV64-NEXT: vsll.vi v12, v16, 3
1608 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1609 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1611 %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
1612 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1616 define void @vpscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1617 ; RV32-LABEL: vpscatter_baseidx_sext_v8i32_v8f64:
1619 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1620 ; RV32-NEXT: vsll.vi v12, v12, 3
1621 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1622 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1625 ; RV64-LABEL: vpscatter_baseidx_sext_v8i32_v8f64:
1627 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1628 ; RV64-NEXT: vsext.vf2 v16, v12
1629 ; RV64-NEXT: vsll.vi v12, v16, 3
1630 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1631 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1633 %eidxs = sext <8 x i32> %idxs to <8 x i64>
1634 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1635 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1639 define void @vpscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1640 ; RV32-LABEL: vpscatter_baseidx_zext_v8i32_v8f64:
1642 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1643 ; RV32-NEXT: vsll.vi v12, v12, 3
1644 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1645 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1648 ; RV64-LABEL: vpscatter_baseidx_zext_v8i32_v8f64:
1650 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1651 ; RV64-NEXT: vzext.vf2 v16, v12
1652 ; RV64-NEXT: vsll.vi v12, v16, 3
1653 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1654 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1656 %eidxs = zext <8 x i32> %idxs to <8 x i64>
1657 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1658 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1662 define void @vpscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1663 ; RV32-LABEL: vpscatter_baseidx_v8f64:
1665 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1666 ; RV32-NEXT: vnsrl.wi v16, v12, 0
1667 ; RV32-NEXT: vsll.vi v12, v16, 3
1668 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1669 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
1672 ; RV64-LABEL: vpscatter_baseidx_v8f64:
1674 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1675 ; RV64-NEXT: vsll.vi v12, v12, 3
1676 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1677 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1679 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
1680 call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1684 declare void @llvm.vp.scatter.v32f64.v32p0(<32 x double>, <32 x ptr>, <32 x i1>, i32)
1686 define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 zeroext %evl) {
1687 ; RV32-LABEL: vpscatter_v32f64:
1689 ; RV32-NEXT: li a2, 32
1690 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
1691 ; RV32-NEXT: vle32.v v24, (a0)
1692 ; RV32-NEXT: li a2, 16
1693 ; RV32-NEXT: mv a0, a1
1694 ; RV32-NEXT: bltu a1, a2, .LBB79_2
1695 ; RV32-NEXT: # %bb.1:
1696 ; RV32-NEXT: li a0, 16
1697 ; RV32-NEXT: .LBB79_2:
1698 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1699 ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t
1700 ; RV32-NEXT: addi a0, a1, -16
1701 ; RV32-NEXT: sltu a1, a1, a0
1702 ; RV32-NEXT: addi a1, a1, -1
1703 ; RV32-NEXT: and a0, a1, a0
1704 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1705 ; RV32-NEXT: vslidedown.vi v8, v24, 16
1706 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1707 ; RV32-NEXT: vslidedown.vi v0, v0, 2
1708 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1709 ; RV32-NEXT: vsoxei32.v v16, (zero), v8, v0.t
1712 ; RV64-LABEL: vpscatter_v32f64:
1714 ; RV64-NEXT: addi sp, sp, -16
1715 ; RV64-NEXT: .cfi_def_cfa_offset 16
1716 ; RV64-NEXT: csrr a1, vlenb
1717 ; RV64-NEXT: slli a1, a1, 3
1718 ; RV64-NEXT: sub sp, sp, a1
1719 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1720 ; RV64-NEXT: addi a1, a0, 128
1721 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1722 ; RV64-NEXT: vle64.v v24, (a1)
1723 ; RV64-NEXT: addi a1, sp, 16
1724 ; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
1725 ; RV64-NEXT: vle64.v v24, (a0)
1726 ; RV64-NEXT: li a1, 16
1727 ; RV64-NEXT: mv a0, a2
1728 ; RV64-NEXT: bltu a2, a1, .LBB79_2
1729 ; RV64-NEXT: # %bb.1:
1730 ; RV64-NEXT: li a0, 16
1731 ; RV64-NEXT: .LBB79_2:
1732 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1733 ; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t
1734 ; RV64-NEXT: addi a0, a2, -16
1735 ; RV64-NEXT: sltu a1, a2, a0
1736 ; RV64-NEXT: addi a1, a1, -1
1737 ; RV64-NEXT: and a0, a1, a0
1738 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1739 ; RV64-NEXT: vslidedown.vi v0, v0, 2
1740 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1741 ; RV64-NEXT: addi a0, sp, 16
1742 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1743 ; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t
1744 ; RV64-NEXT: csrr a0, vlenb
1745 ; RV64-NEXT: slli a0, a0, 3
1746 ; RV64-NEXT: add sp, sp, a0
1747 ; RV64-NEXT: addi sp, sp, 16
1749 call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
1753 define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
1754 ; RV32-LABEL: vpscatter_baseidx_v32i32_v32f64:
1756 ; RV32-NEXT: li a3, 32
1757 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1758 ; RV32-NEXT: vle32.v v24, (a1)
1759 ; RV32-NEXT: li a3, 16
1760 ; RV32-NEXT: vsll.vi v24, v24, 3
1761 ; RV32-NEXT: mv a1, a2
1762 ; RV32-NEXT: bltu a2, a3, .LBB80_2
1763 ; RV32-NEXT: # %bb.1:
1764 ; RV32-NEXT: li a1, 16
1765 ; RV32-NEXT: .LBB80_2:
1766 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1767 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
1768 ; RV32-NEXT: addi a1, a2, -16
1769 ; RV32-NEXT: sltu a2, a2, a1
1770 ; RV32-NEXT: addi a2, a2, -1
1771 ; RV32-NEXT: and a1, a2, a1
1772 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1773 ; RV32-NEXT: vslidedown.vi v8, v24, 16
1774 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1775 ; RV32-NEXT: vslidedown.vi v0, v0, 2
1776 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1777 ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t
1780 ; RV64-LABEL: vpscatter_baseidx_v32i32_v32f64:
1782 ; RV64-NEXT: addi sp, sp, -16
1783 ; RV64-NEXT: .cfi_def_cfa_offset 16
1784 ; RV64-NEXT: csrr a3, vlenb
1785 ; RV64-NEXT: li a4, 10
1786 ; RV64-NEXT: mul a3, a3, a4
1787 ; RV64-NEXT: sub sp, sp, a3
1788 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
1789 ; RV64-NEXT: li a3, 32
1790 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1791 ; RV64-NEXT: vle32.v v24, (a1)
1792 ; RV64-NEXT: addi a1, sp, 16
1793 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
1794 ; RV64-NEXT: csrr a1, vlenb
1795 ; RV64-NEXT: add a1, sp, a1
1796 ; RV64-NEXT: addi a1, a1, 16
1797 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1798 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1799 ; RV64-NEXT: vslidedown.vi v0, v24, 16
1800 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1801 ; RV64-NEXT: vsext.vf2 v16, v0
1802 ; RV64-NEXT: vsll.vi v16, v16, 3
1803 ; RV64-NEXT: vsext.vf2 v0, v24
1804 ; RV64-NEXT: li a3, 16
1805 ; RV64-NEXT: vsll.vi v24, v0, 3
1806 ; RV64-NEXT: mv a1, a2
1807 ; RV64-NEXT: bltu a2, a3, .LBB80_2
1808 ; RV64-NEXT: # %bb.1:
1809 ; RV64-NEXT: li a1, 16
1810 ; RV64-NEXT: .LBB80_2:
1811 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1812 ; RV64-NEXT: addi a1, sp, 16
1813 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
1814 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
1815 ; RV64-NEXT: addi a1, a2, -16
1816 ; RV64-NEXT: sltu a2, a2, a1
1817 ; RV64-NEXT: addi a2, a2, -1
1818 ; RV64-NEXT: and a1, a2, a1
1819 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1820 ; RV64-NEXT: vslidedown.vi v0, v0, 2
1821 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1822 ; RV64-NEXT: csrr a1, vlenb
1823 ; RV64-NEXT: add a1, sp, a1
1824 ; RV64-NEXT: addi a1, a1, 16
1825 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
1826 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1827 ; RV64-NEXT: csrr a0, vlenb
1828 ; RV64-NEXT: li a1, 10
1829 ; RV64-NEXT: mul a0, a0, a1
1830 ; RV64-NEXT: add sp, sp, a0
1831 ; RV64-NEXT: addi sp, sp, 16
1833 %ptrs = getelementptr inbounds double, ptr %base, <32 x i32> %idxs
1834 call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
1838 define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
1839 ; RV32-LABEL: vpscatter_baseidx_sext_v32i32_v32f64:
1841 ; RV32-NEXT: li a3, 32
1842 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1843 ; RV32-NEXT: vle32.v v24, (a1)
1844 ; RV32-NEXT: li a3, 16
1845 ; RV32-NEXT: vsll.vi v24, v24, 3
1846 ; RV32-NEXT: mv a1, a2
1847 ; RV32-NEXT: bltu a2, a3, .LBB81_2
1848 ; RV32-NEXT: # %bb.1:
1849 ; RV32-NEXT: li a1, 16
1850 ; RV32-NEXT: .LBB81_2:
1851 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1852 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
1853 ; RV32-NEXT: addi a1, a2, -16
1854 ; RV32-NEXT: sltu a2, a2, a1
1855 ; RV32-NEXT: addi a2, a2, -1
1856 ; RV32-NEXT: and a1, a2, a1
1857 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1858 ; RV32-NEXT: vslidedown.vi v8, v24, 16
1859 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1860 ; RV32-NEXT: vslidedown.vi v0, v0, 2
1861 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1862 ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t
1865 ; RV64-LABEL: vpscatter_baseidx_sext_v32i32_v32f64:
1867 ; RV64-NEXT: addi sp, sp, -16
1868 ; RV64-NEXT: .cfi_def_cfa_offset 16
1869 ; RV64-NEXT: csrr a3, vlenb
1870 ; RV64-NEXT: li a4, 10
1871 ; RV64-NEXT: mul a3, a3, a4
1872 ; RV64-NEXT: sub sp, sp, a3
1873 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
1874 ; RV64-NEXT: li a3, 32
1875 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1876 ; RV64-NEXT: vle32.v v24, (a1)
1877 ; RV64-NEXT: addi a1, sp, 16
1878 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
1879 ; RV64-NEXT: csrr a1, vlenb
1880 ; RV64-NEXT: add a1, sp, a1
1881 ; RV64-NEXT: addi a1, a1, 16
1882 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1883 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1884 ; RV64-NEXT: vsext.vf2 v0, v24
1885 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1886 ; RV64-NEXT: vslidedown.vi v24, v24, 16
1887 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1888 ; RV64-NEXT: vsext.vf2 v16, v24
1889 ; RV64-NEXT: vsll.vi v16, v16, 3
1890 ; RV64-NEXT: li a3, 16
1891 ; RV64-NEXT: vsll.vi v24, v0, 3
1892 ; RV64-NEXT: mv a1, a2
1893 ; RV64-NEXT: bltu a2, a3, .LBB81_2
1894 ; RV64-NEXT: # %bb.1:
1895 ; RV64-NEXT: li a1, 16
1896 ; RV64-NEXT: .LBB81_2:
1897 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1898 ; RV64-NEXT: addi a1, sp, 16
1899 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
1900 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
1901 ; RV64-NEXT: addi a1, a2, -16
1902 ; RV64-NEXT: sltu a2, a2, a1
1903 ; RV64-NEXT: addi a2, a2, -1
1904 ; RV64-NEXT: and a1, a2, a1
1905 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1906 ; RV64-NEXT: vslidedown.vi v0, v0, 2
1907 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1908 ; RV64-NEXT: csrr a1, vlenb
1909 ; RV64-NEXT: add a1, sp, a1
1910 ; RV64-NEXT: addi a1, a1, 16
1911 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
1912 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
1913 ; RV64-NEXT: csrr a0, vlenb
1914 ; RV64-NEXT: li a1, 10
1915 ; RV64-NEXT: mul a0, a0, a1
1916 ; RV64-NEXT: add sp, sp, a0
1917 ; RV64-NEXT: addi sp, sp, 16
1919 %eidxs = sext <32 x i32> %idxs to <32 x i64>
1920 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
1921 call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
1925 define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
1926 ; RV32-LABEL: vpscatter_baseidx_zext_v32i32_v32f64:
1928 ; RV32-NEXT: li a3, 32
1929 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1930 ; RV32-NEXT: vle32.v v24, (a1)
1931 ; RV32-NEXT: li a3, 16
1932 ; RV32-NEXT: vsll.vi v24, v24, 3
1933 ; RV32-NEXT: mv a1, a2
1934 ; RV32-NEXT: bltu a2, a3, .LBB82_2
1935 ; RV32-NEXT: # %bb.1:
1936 ; RV32-NEXT: li a1, 16
1937 ; RV32-NEXT: .LBB82_2:
1938 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1939 ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
1940 ; RV32-NEXT: addi a1, a2, -16
1941 ; RV32-NEXT: sltu a2, a2, a1
1942 ; RV32-NEXT: addi a2, a2, -1
1943 ; RV32-NEXT: and a1, a2, a1
1944 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1945 ; RV32-NEXT: vslidedown.vi v8, v24, 16
1946 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1947 ; RV32-NEXT: vslidedown.vi v0, v0, 2
1948 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1949 ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t
1952 ; RV64-LABEL: vpscatter_baseidx_zext_v32i32_v32f64:
1954 ; RV64-NEXT: addi sp, sp, -16
1955 ; RV64-NEXT: .cfi_def_cfa_offset 16
1956 ; RV64-NEXT: csrr a3, vlenb
1957 ; RV64-NEXT: li a4, 10
1958 ; RV64-NEXT: mul a3, a3, a4
1959 ; RV64-NEXT: sub sp, sp, a3
1960 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb
1961 ; RV64-NEXT: li a3, 32
1962 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1963 ; RV64-NEXT: vle32.v v24, (a1)
1964 ; RV64-NEXT: addi a1, sp, 16
1965 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
1966 ; RV64-NEXT: csrr a1, vlenb
1967 ; RV64-NEXT: add a1, sp, a1
1968 ; RV64-NEXT: addi a1, a1, 16
1969 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
1970 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1971 ; RV64-NEXT: vzext.vf2 v0, v24
1972 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1973 ; RV64-NEXT: vslidedown.vi v24, v24, 16
1974 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1975 ; RV64-NEXT: vzext.vf2 v16, v24
1976 ; RV64-NEXT: vsll.vi v16, v16, 3
1977 ; RV64-NEXT: li a3, 16
1978 ; RV64-NEXT: vsll.vi v24, v0, 3
1979 ; RV64-NEXT: mv a1, a2
1980 ; RV64-NEXT: bltu a2, a3, .LBB82_2
1981 ; RV64-NEXT: # %bb.1:
1982 ; RV64-NEXT: li a1, 16
1983 ; RV64-NEXT: .LBB82_2:
1984 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1985 ; RV64-NEXT: addi a1, sp, 16
1986 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
1987 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
1988 ; RV64-NEXT: addi a1, a2, -16
1989 ; RV64-NEXT: sltu a2, a2, a1
1990 ; RV64-NEXT: addi a2, a2, -1
1991 ; RV64-NEXT: and a1, a2, a1
1992 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1993 ; RV64-NEXT: vslidedown.vi v0, v0, 2
1994 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1995 ; RV64-NEXT: csrr a1, vlenb
1996 ; RV64-NEXT: add a1, sp, a1
1997 ; RV64-NEXT: addi a1, a1, 16
1998 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
1999 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
2000 ; RV64-NEXT: csrr a0, vlenb
2001 ; RV64-NEXT: li a1, 10
2002 ; RV64-NEXT: mul a0, a0, a1
2003 ; RV64-NEXT: add sp, sp, a0
2004 ; RV64-NEXT: addi sp, sp, 16
2006 %eidxs = zext <32 x i32> %idxs to <32 x i64>
2007 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2008 call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)