1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
7 declare <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i8>)
9 define <vscale x 1 x i8> @mgather_nxv1i8(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x i8> %passthru) {
10 ; RV32-LABEL: mgather_nxv1i8:
12 ; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, mu
13 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
14 ; RV32-NEXT: vmv1r.v v8, v9
17 ; RV64-LABEL: mgather_nxv1i8:
19 ; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, mu
20 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
21 ; RV64-NEXT: vmv1r.v v8, v9
23 %v = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m, <vscale x 1 x i8> %passthru)
24 ret <vscale x 1 x i8> %v
27 declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
29 define <vscale x 2 x i8> @mgather_nxv2i8(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) {
30 ; RV32-LABEL: mgather_nxv2i8:
32 ; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
33 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
34 ; RV32-NEXT: vmv1r.v v8, v9
37 ; RV64-LABEL: mgather_nxv2i8:
39 ; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
40 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
41 ; RV64-NEXT: vmv1r.v v8, v10
43 %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru)
44 ret <vscale x 2 x i8> %v
47 define <vscale x 2 x i16> @mgather_nxv2i8_sextload_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) {
48 ; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i16:
50 ; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
51 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
52 ; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
53 ; RV32-NEXT: vsext.vf2 v8, v9
56 ; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i16:
58 ; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
59 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
60 ; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
61 ; RV64-NEXT: vsext.vf2 v8, v10
63 %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru)
64 %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
65 ret <vscale x 2 x i16> %ev
68 define <vscale x 2 x i16> @mgather_nxv2i8_zextload_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) {
69 ; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i16:
71 ; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
72 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
73 ; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
74 ; RV32-NEXT: vzext.vf2 v8, v9
77 ; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i16:
79 ; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
80 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
81 ; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
82 ; RV64-NEXT: vzext.vf2 v8, v10
84 %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru)
85 %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i16>
86 ret <vscale x 2 x i16> %ev
89 define <vscale x 2 x i32> @mgather_nxv2i8_sextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) {
90 ; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i32:
92 ; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
93 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
94 ; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
95 ; RV32-NEXT: vsext.vf4 v8, v9
98 ; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i32:
100 ; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
101 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
102 ; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
103 ; RV64-NEXT: vsext.vf4 v8, v10
105 %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru)
106 %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i32>
107 ret <vscale x 2 x i32> %ev
110 define <vscale x 2 x i32> @mgather_nxv2i8_zextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) {
111 ; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i32:
113 ; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
114 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
115 ; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
116 ; RV32-NEXT: vzext.vf4 v8, v9
119 ; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i32:
121 ; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
122 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
123 ; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
124 ; RV64-NEXT: vzext.vf4 v8, v10
126 %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru)
127 %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i32>
128 ret <vscale x 2 x i32> %ev
131 define <vscale x 2 x i64> @mgather_nxv2i8_sextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) {
132 ; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i64:
134 ; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
135 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
136 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
137 ; RV32-NEXT: vsext.vf8 v10, v9
138 ; RV32-NEXT: vmv.v.v v8, v10
141 ; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i64:
143 ; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
144 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
145 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
146 ; RV64-NEXT: vsext.vf8 v8, v10
148 %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru)
149 %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i64>
150 ret <vscale x 2 x i64> %ev
153 define <vscale x 2 x i64> @mgather_nxv2i8_zextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) {
154 ; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i64:
156 ; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
157 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
158 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
159 ; RV32-NEXT: vzext.vf8 v10, v9
160 ; RV32-NEXT: vmv.v.v v8, v10
163 ; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i64:
165 ; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
166 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
167 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
168 ; RV64-NEXT: vzext.vf8 v8, v10
170 %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru)
171 %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i64>
172 ret <vscale x 2 x i64> %ev
175 declare <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
177 define <vscale x 4 x i8> @mgather_nxv4i8(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x i8> %passthru) {
178 ; RV32-LABEL: mgather_nxv4i8:
180 ; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, mu
181 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
182 ; RV32-NEXT: vmv1r.v v8, v10
185 ; RV64-LABEL: mgather_nxv4i8:
187 ; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, mu
188 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
189 ; RV64-NEXT: vmv1r.v v8, v12
191 %v = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> %m, <vscale x 4 x i8> %passthru)
192 ret <vscale x 4 x i8> %v
195 define <vscale x 4 x i8> @mgather_truemask_nxv4i8(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i8> %passthru) {
196 ; RV32-LABEL: mgather_truemask_nxv4i8:
198 ; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
199 ; RV32-NEXT: vluxei32.v v10, (zero), v8
200 ; RV32-NEXT: vmv1r.v v8, v10
203 ; RV64-LABEL: mgather_truemask_nxv4i8:
205 ; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
206 ; RV64-NEXT: vluxei64.v v12, (zero), v8
207 ; RV64-NEXT: vmv1r.v v8, v12
209 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
210 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
211 %v = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> %mtrue, <vscale x 4 x i8> %passthru)
212 ret <vscale x 4 x i8> %v
215 define <vscale x 4 x i8> @mgather_falsemask_nxv4i8(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i8> %passthru) {
216 ; RV32-LABEL: mgather_falsemask_nxv4i8:
218 ; RV32-NEXT: vmv1r.v v8, v10
221 ; RV64-LABEL: mgather_falsemask_nxv4i8:
223 ; RV64-NEXT: vmv1r.v v8, v12
225 %v = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i8> %passthru)
226 ret <vscale x 4 x i8> %v
229 declare <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i8>)
231 define <vscale x 8 x i8> @mgather_nxv8i8(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x i8> %passthru) {
232 ; RV32-LABEL: mgather_nxv8i8:
234 ; RV32-NEXT: vsetvli a0, zero, e8, m1, ta, mu
235 ; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
236 ; RV32-NEXT: vmv.v.v v8, v12
239 ; RV64-LABEL: mgather_nxv8i8:
241 ; RV64-NEXT: vsetvli a0, zero, e8, m1, ta, mu
242 ; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t
243 ; RV64-NEXT: vmv.v.v v8, v16
245 %v = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 1, <vscale x 8 x i1> %m, <vscale x 8 x i8> %passthru)
246 ret <vscale x 8 x i8> %v
249 define <vscale x 8 x i8> @mgather_baseidx_nxv8i8(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i8> %passthru) {
250 ; RV32-LABEL: mgather_baseidx_nxv8i8:
252 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
253 ; RV32-NEXT: vsext.vf4 v12, v8
254 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
255 ; RV32-NEXT: vluxei32.v v9, (a0), v12, v0.t
256 ; RV32-NEXT: vmv.v.v v8, v9
259 ; RV64-LABEL: mgather_baseidx_nxv8i8:
261 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
262 ; RV64-NEXT: vsext.vf8 v16, v8
263 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu
264 ; RV64-NEXT: vluxei64.v v9, (a0), v16, v0.t
265 ; RV64-NEXT: vmv.v.v v8, v9
267 %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
268 %v = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 1, <vscale x 8 x i1> %m, <vscale x 8 x i8> %passthru)
269 ret <vscale x 8 x i8> %v
272 declare <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i16>)
274 define <vscale x 1 x i16> @mgather_nxv1i16(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x i16> %passthru) {
275 ; RV32-LABEL: mgather_nxv1i16:
277 ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
278 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
279 ; RV32-NEXT: vmv1r.v v8, v9
282 ; RV64-LABEL: mgather_nxv1i16:
284 ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
285 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
286 ; RV64-NEXT: vmv1r.v v8, v9
288 %v = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m, <vscale x 1 x i16> %passthru)
289 ret <vscale x 1 x i16> %v
292 declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
294 define <vscale x 2 x i16> @mgather_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) {
295 ; RV32-LABEL: mgather_nxv2i16:
297 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
298 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
299 ; RV32-NEXT: vmv1r.v v8, v9
302 ; RV64-LABEL: mgather_nxv2i16:
304 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
305 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
306 ; RV64-NEXT: vmv1r.v v8, v10
308 %v = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru)
309 ret <vscale x 2 x i16> %v
312 define <vscale x 2 x i32> @mgather_nxv2i16_sextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) {
313 ; RV32-LABEL: mgather_nxv2i16_sextload_nxv2i32:
315 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
316 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
317 ; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
318 ; RV32-NEXT: vsext.vf2 v8, v9
321 ; RV64-LABEL: mgather_nxv2i16_sextload_nxv2i32:
323 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
324 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
325 ; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
326 ; RV64-NEXT: vsext.vf2 v8, v10
328 %v = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru)
329 %ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
330 ret <vscale x 2 x i32> %ev
333 define <vscale x 2 x i32> @mgather_nxv2i16_zextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) {
334 ; RV32-LABEL: mgather_nxv2i16_zextload_nxv2i32:
336 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
337 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
338 ; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
339 ; RV32-NEXT: vzext.vf2 v8, v9
342 ; RV64-LABEL: mgather_nxv2i16_zextload_nxv2i32:
344 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
345 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
346 ; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
347 ; RV64-NEXT: vzext.vf2 v8, v10
349 %v = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru)
350 %ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
351 ret <vscale x 2 x i32> %ev
354 define <vscale x 2 x i64> @mgather_nxv2i16_sextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) {
355 ; RV32-LABEL: mgather_nxv2i16_sextload_nxv2i64:
357 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
358 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
359 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
360 ; RV32-NEXT: vsext.vf4 v10, v9
361 ; RV32-NEXT: vmv.v.v v8, v10
364 ; RV64-LABEL: mgather_nxv2i16_sextload_nxv2i64:
366 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
367 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
368 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
369 ; RV64-NEXT: vsext.vf4 v8, v10
371 %v = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru)
372 %ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i64>
373 ret <vscale x 2 x i64> %ev
376 define <vscale x 2 x i64> @mgather_nxv2i16_zextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) {
377 ; RV32-LABEL: mgather_nxv2i16_zextload_nxv2i64:
379 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
380 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
381 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
382 ; RV32-NEXT: vzext.vf4 v10, v9
383 ; RV32-NEXT: vmv.v.v v8, v10
386 ; RV64-LABEL: mgather_nxv2i16_zextload_nxv2i64:
388 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
389 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
390 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
391 ; RV64-NEXT: vzext.vf4 v8, v10
393 %v = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru)
394 %ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i64>
395 ret <vscale x 2 x i64> %ev
398 declare <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i16>)
400 define <vscale x 4 x i16> @mgather_nxv4i16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x i16> %passthru) {
401 ; RV32-LABEL: mgather_nxv4i16:
403 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu
404 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
405 ; RV32-NEXT: vmv.v.v v8, v10
408 ; RV64-LABEL: mgather_nxv4i16:
410 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu
411 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
412 ; RV64-NEXT: vmv.v.v v8, v12
414 %v = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m, <vscale x 4 x i16> %passthru)
415 ret <vscale x 4 x i16> %v
418 define <vscale x 4 x i16> @mgather_truemask_nxv4i16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i16> %passthru) {
419 ; RV32-LABEL: mgather_truemask_nxv4i16:
421 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
422 ; RV32-NEXT: vluxei32.v v10, (zero), v8
423 ; RV32-NEXT: vmv.v.v v8, v10
426 ; RV64-LABEL: mgather_truemask_nxv4i16:
428 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
429 ; RV64-NEXT: vluxei64.v v12, (zero), v8
430 ; RV64-NEXT: vmv.v.v v8, v12
432 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
433 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
434 %v = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %mtrue, <vscale x 4 x i16> %passthru)
435 ret <vscale x 4 x i16> %v
438 define <vscale x 4 x i16> @mgather_falsemask_nxv4i16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i16> %passthru) {
439 ; RV32-LABEL: mgather_falsemask_nxv4i16:
441 ; RV32-NEXT: vmv1r.v v8, v10
444 ; RV64-LABEL: mgather_falsemask_nxv4i16:
446 ; RV64-NEXT: vmv1r.v v8, v12
448 %v = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i16> %passthru)
449 ret <vscale x 4 x i16> %v
452 declare <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
454 define <vscale x 8 x i16> @mgather_nxv8i16(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) {
455 ; RV32-LABEL: mgather_nxv8i16:
457 ; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu
458 ; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
459 ; RV32-NEXT: vmv.v.v v8, v12
462 ; RV64-LABEL: mgather_nxv8i16:
464 ; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu
465 ; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t
466 ; RV64-NEXT: vmv.v.v v8, v16
468 %v = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru)
469 ret <vscale x 8 x i16> %v
472 define <vscale x 8 x i16> @mgather_baseidx_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) {
473 ; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8i16:
475 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
476 ; RV32-NEXT: vsext.vf4 v12, v8
477 ; RV32-NEXT: vadd.vv v12, v12, v12
478 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu
479 ; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t
480 ; RV32-NEXT: vmv.v.v v8, v10
483 ; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i16:
485 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
486 ; RV64-NEXT: vsext.vf8 v16, v8
487 ; RV64-NEXT: vadd.vv v16, v16, v16
488 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu
489 ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t
490 ; RV64-NEXT: vmv.v.v v8, v10
492 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
493 %v = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru)
494 ret <vscale x 8 x i16> %v
497 define <vscale x 8 x i16> @mgather_baseidx_sext_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) {
498 ; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i16:
500 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
501 ; RV32-NEXT: vsext.vf4 v12, v8
502 ; RV32-NEXT: vadd.vv v12, v12, v12
503 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu
504 ; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t
505 ; RV32-NEXT: vmv.v.v v8, v10
508 ; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i16:
510 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
511 ; RV64-NEXT: vsext.vf8 v16, v8
512 ; RV64-NEXT: vadd.vv v16, v16, v16
513 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu
514 ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t
515 ; RV64-NEXT: vmv.v.v v8, v10
517 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
518 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
519 %v = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru)
520 ret <vscale x 8 x i16> %v
523 define <vscale x 8 x i16> @mgather_baseidx_zext_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) {
524 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i16:
526 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
527 ; CHECK-NEXT: vwaddu.vv v12, v8, v8
528 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
529 ; CHECK-NEXT: vluxei16.v v10, (a0), v12, v0.t
530 ; CHECK-NEXT: vmv.v.v v8, v10
532 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
533 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
534 %v = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru)
535 ret <vscale x 8 x i16> %v
538 define <vscale x 8 x i16> @mgather_baseidx_nxv8i16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) {
539 ; RV32-LABEL: mgather_baseidx_nxv8i16:
541 ; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, mu
542 ; RV32-NEXT: vwadd.vv v12, v8, v8
543 ; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t
544 ; RV32-NEXT: vmv.v.v v8, v10
547 ; RV64-LABEL: mgather_baseidx_nxv8i16:
549 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
550 ; RV64-NEXT: vsext.vf4 v16, v8
551 ; RV64-NEXT: vadd.vv v16, v16, v16
552 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu
553 ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t
554 ; RV64-NEXT: vmv.v.v v8, v10
556 %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
557 %v = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru)
558 ret <vscale x 8 x i16> %v
561 declare <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i32>)
563 define <vscale x 1 x i32> @mgather_nxv1i32(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x i32> %passthru) {
564 ; RV32-LABEL: mgather_nxv1i32:
566 ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu
567 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
568 ; RV32-NEXT: vmv1r.v v8, v9
571 ; RV64-LABEL: mgather_nxv1i32:
573 ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu
574 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
575 ; RV64-NEXT: vmv1r.v v8, v9
577 %v = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 4, <vscale x 1 x i1> %m, <vscale x 1 x i32> %passthru)
578 ret <vscale x 1 x i32> %v
581 declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
583 define <vscale x 2 x i32> @mgather_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru) {
584 ; RV32-LABEL: mgather_nxv2i32:
586 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu
587 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
588 ; RV32-NEXT: vmv.v.v v8, v9
591 ; RV64-LABEL: mgather_nxv2i32:
593 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu
594 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
595 ; RV64-NEXT: vmv.v.v v8, v10
597 %v = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru)
598 ret <vscale x 2 x i32> %v
601 define <vscale x 2 x i64> @mgather_nxv2i32_sextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru) {
602 ; RV32-LABEL: mgather_nxv2i32_sextload_nxv2i64:
604 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu
605 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
606 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
607 ; RV32-NEXT: vsext.vf2 v10, v9
608 ; RV32-NEXT: vmv.v.v v8, v10
611 ; RV64-LABEL: mgather_nxv2i32_sextload_nxv2i64:
613 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu
614 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
615 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
616 ; RV64-NEXT: vsext.vf2 v8, v10
618 %v = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru)
619 %ev = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
620 ret <vscale x 2 x i64> %ev
623 define <vscale x 2 x i64> @mgather_nxv2i32_zextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru) {
624 ; RV32-LABEL: mgather_nxv2i32_zextload_nxv2i64:
626 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu
627 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
628 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
629 ; RV32-NEXT: vzext.vf2 v10, v9
630 ; RV32-NEXT: vmv.v.v v8, v10
633 ; RV64-LABEL: mgather_nxv2i32_zextload_nxv2i64:
635 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu
636 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
637 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
638 ; RV64-NEXT: vzext.vf2 v8, v10
640 %v = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru)
641 %ev = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
642 ret <vscale x 2 x i64> %ev
645 declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
647 define <vscale x 4 x i32> @mgather_nxv4i32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x i32> %passthru) {
648 ; RV32-LABEL: mgather_nxv4i32:
650 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu
651 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
652 ; RV32-NEXT: vmv.v.v v8, v10
655 ; RV64-LABEL: mgather_nxv4i32:
657 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu
658 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
659 ; RV64-NEXT: vmv.v.v v8, v12
661 %v = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %m, <vscale x 4 x i32> %passthru)
662 ret <vscale x 4 x i32> %v
665 define <vscale x 4 x i32> @mgather_truemask_nxv4i32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i32> %passthru) {
666 ; RV32-LABEL: mgather_truemask_nxv4i32:
668 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
669 ; RV32-NEXT: vluxei32.v v8, (zero), v8
672 ; RV64-LABEL: mgather_truemask_nxv4i32:
674 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
675 ; RV64-NEXT: vluxei64.v v12, (zero), v8
676 ; RV64-NEXT: vmv.v.v v8, v12
678 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
679 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
680 %v = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mtrue, <vscale x 4 x i32> %passthru)
681 ret <vscale x 4 x i32> %v
684 define <vscale x 4 x i32> @mgather_falsemask_nxv4i32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i32> %passthru) {
685 ; RV32-LABEL: mgather_falsemask_nxv4i32:
687 ; RV32-NEXT: vmv2r.v v8, v10
690 ; RV64-LABEL: mgather_falsemask_nxv4i32:
692 ; RV64-NEXT: vmv2r.v v8, v12
694 %v = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %passthru)
695 ret <vscale x 4 x i32> %v
698 declare <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i32>)
700 define <vscale x 8 x i32> @mgather_nxv8i32(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
701 ; RV32-LABEL: mgather_nxv8i32:
703 ; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, mu
704 ; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
705 ; RV32-NEXT: vmv.v.v v8, v12
708 ; RV64-LABEL: mgather_nxv8i32:
710 ; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, mu
711 ; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t
712 ; RV64-NEXT: vmv.v.v v8, v16
714 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
715 ret <vscale x 8 x i32> %v
718 define <vscale x 8 x i32> @mgather_baseidx_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
719 ; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8i32:
721 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
722 ; RV32-NEXT: vsext.vf4 v16, v8
723 ; RV32-NEXT: vsll.vi v8, v16, 2
724 ; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t
725 ; RV32-NEXT: vmv.v.v v8, v12
728 ; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i32:
730 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
731 ; RV64-NEXT: vsext.vf8 v16, v8
732 ; RV64-NEXT: vsll.vi v16, v16, 2
733 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu
734 ; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t
735 ; RV64-NEXT: vmv.v.v v8, v12
737 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
738 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
739 ret <vscale x 8 x i32> %v
742 define <vscale x 8 x i32> @mgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
743 ; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i32:
745 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
746 ; RV32-NEXT: vsext.vf4 v16, v8
747 ; RV32-NEXT: vsll.vi v8, v16, 2
748 ; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t
749 ; RV32-NEXT: vmv.v.v v8, v12
752 ; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i32:
754 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
755 ; RV64-NEXT: vsext.vf8 v16, v8
756 ; RV64-NEXT: vsll.vi v16, v16, 2
757 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu
758 ; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t
759 ; RV64-NEXT: vmv.v.v v8, v12
761 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
762 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
763 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
764 ret <vscale x 8 x i32> %v
767 define <vscale x 8 x i32> @mgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
768 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i32:
770 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
771 ; CHECK-NEXT: vzext.vf2 v10, v8
772 ; CHECK-NEXT: vsll.vi v8, v10, 2
773 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
774 ; CHECK-NEXT: vluxei16.v v12, (a0), v8, v0.t
775 ; CHECK-NEXT: vmv.v.v v8, v12
777 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
778 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
779 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
780 ret <vscale x 8 x i32> %v
783 define <vscale x 8 x i32> @mgather_baseidx_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
784 ; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i32:
786 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
787 ; RV32-NEXT: vsext.vf2 v16, v8
788 ; RV32-NEXT: vsll.vi v8, v16, 2
789 ; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t
790 ; RV32-NEXT: vmv.v.v v8, v12
793 ; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8i32:
795 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
796 ; RV64-NEXT: vsext.vf4 v16, v8
797 ; RV64-NEXT: vsll.vi v16, v16, 2
798 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu
799 ; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t
800 ; RV64-NEXT: vmv.v.v v8, v12
802 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
803 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
804 ret <vscale x 8 x i32> %v
807 define <vscale x 8 x i32> @mgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
808 ; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i32:
810 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
811 ; RV32-NEXT: vsext.vf2 v16, v8
812 ; RV32-NEXT: vsll.vi v8, v16, 2
813 ; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t
814 ; RV32-NEXT: vmv.v.v v8, v12
817 ; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i32:
819 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
820 ; RV64-NEXT: vsext.vf4 v16, v8
821 ; RV64-NEXT: vsll.vi v16, v16, 2
822 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu
823 ; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t
824 ; RV64-NEXT: vmv.v.v v8, v12
826 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
827 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
828 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
829 ret <vscale x 8 x i32> %v
832 define <vscale x 8 x i32> @mgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
833 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i32:
835 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu
836 ; CHECK-NEXT: vzext.vf2 v16, v8
837 ; CHECK-NEXT: vsll.vi v8, v16, 2
838 ; CHECK-NEXT: vluxei32.v v12, (a0), v8, v0.t
839 ; CHECK-NEXT: vmv.v.v v8, v12
841 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
842 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
843 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
844 ret <vscale x 8 x i32> %v
847 define <vscale x 8 x i32> @mgather_baseidx_nxv8i32(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
848 ; RV32-LABEL: mgather_baseidx_nxv8i32:
850 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
851 ; RV32-NEXT: vsll.vi v8, v8, 2
852 ; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t
853 ; RV32-NEXT: vmv.v.v v8, v12
856 ; RV64-LABEL: mgather_baseidx_nxv8i32:
858 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
859 ; RV64-NEXT: vsext.vf2 v16, v8
860 ; RV64-NEXT: vsll.vi v16, v16, 2
861 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu
862 ; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t
863 ; RV64-NEXT: vmv.v.v v8, v12
865 %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
866 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
867 ret <vscale x 8 x i32> %v
870 declare <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
872 define <vscale x 1 x i64> @mgather_nxv1i64(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x i64> %passthru) {
873 ; RV32-LABEL: mgather_nxv1i64:
875 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu
876 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
877 ; RV32-NEXT: vmv.v.v v8, v9
880 ; RV64-LABEL: mgather_nxv1i64:
882 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu
883 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
884 ; RV64-NEXT: vmv.v.v v8, v9
886 %v = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 8, <vscale x 1 x i1> %m, <vscale x 1 x i64> %passthru)
887 ret <vscale x 1 x i64> %v
890 declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
892 define <vscale x 2 x i64> @mgather_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i64> %passthru) {
893 ; RV32-LABEL: mgather_nxv2i64:
895 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu
896 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
897 ; RV32-NEXT: vmv.v.v v8, v10
900 ; RV64-LABEL: mgather_nxv2i64:
902 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu
903 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
904 ; RV64-NEXT: vmv.v.v v8, v10
906 %v = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %m, <vscale x 2 x i64> %passthru)
907 ret <vscale x 2 x i64> %v
910 declare <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
912 define <vscale x 4 x i64> @mgather_nxv4i64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x i64> %passthru) {
913 ; RV32-LABEL: mgather_nxv4i64:
915 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu
916 ; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
917 ; RV32-NEXT: vmv.v.v v8, v12
920 ; RV64-LABEL: mgather_nxv4i64:
922 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu
923 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
924 ; RV64-NEXT: vmv.v.v v8, v12
926 %v = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %m, <vscale x 4 x i64> %passthru)
927 ret <vscale x 4 x i64> %v
930 define <vscale x 4 x i64> @mgather_truemask_nxv4i64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i64> %passthru) {
931 ; RV32-LABEL: mgather_truemask_nxv4i64:
933 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
934 ; RV32-NEXT: vluxei32.v v12, (zero), v8
935 ; RV32-NEXT: vmv.v.v v8, v12
938 ; RV64-LABEL: mgather_truemask_nxv4i64:
940 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
941 ; RV64-NEXT: vluxei64.v v8, (zero), v8
943 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
944 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
945 %v = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %mtrue, <vscale x 4 x i64> %passthru)
946 ret <vscale x 4 x i64> %v
949 define <vscale x 4 x i64> @mgather_falsemask_nxv4i64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i64> %passthru) {
950 ; CHECK-LABEL: mgather_falsemask_nxv4i64:
952 ; CHECK-NEXT: vmv4r.v v8, v12
954 %v = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i64> %passthru)
955 ret <vscale x 4 x i64> %v
958 declare <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i64>)
960 define <vscale x 8 x i64> @mgather_nxv8i64(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
961 ; RV32-LABEL: mgather_nxv8i64:
963 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu
964 ; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t
965 ; RV32-NEXT: vmv.v.v v8, v16
968 ; RV64-LABEL: mgather_nxv8i64:
970 ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu
971 ; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t
972 ; RV64-NEXT: vmv.v.v v8, v16
974 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
975 ret <vscale x 8 x i64> %v
978 define <vscale x 8 x i64> @mgather_baseidx_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
979 ; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8i64:
981 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
982 ; RV32-NEXT: vsext.vf4 v12, v8
983 ; RV32-NEXT: vsll.vi v8, v12, 3
984 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
985 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
986 ; RV32-NEXT: vmv.v.v v8, v16
989 ; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i64:
991 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
992 ; RV64-NEXT: vsext.vf8 v24, v8
993 ; RV64-NEXT: vsll.vi v8, v24, 3
994 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
995 ; RV64-NEXT: vmv.v.v v8, v16
997 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
998 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
999 ret <vscale x 8 x i64> %v
1002 define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1003 ; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i64:
1005 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1006 ; RV32-NEXT: vsext.vf4 v12, v8
1007 ; RV32-NEXT: vsll.vi v8, v12, 3
1008 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1009 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
1010 ; RV32-NEXT: vmv.v.v v8, v16
1013 ; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i64:
1015 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
1016 ; RV64-NEXT: vsext.vf8 v24, v8
1017 ; RV64-NEXT: vsll.vi v8, v24, 3
1018 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
1019 ; RV64-NEXT: vmv.v.v v8, v16
1021 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1022 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1023 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1024 ret <vscale x 8 x i64> %v
1027 define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1028 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i64:
1030 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1031 ; CHECK-NEXT: vzext.vf2 v10, v8
1032 ; CHECK-NEXT: vsll.vi v8, v10, 3
1033 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1034 ; CHECK-NEXT: vluxei16.v v16, (a0), v8, v0.t
1035 ; CHECK-NEXT: vmv.v.v v8, v16
1037 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1038 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1039 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1040 ret <vscale x 8 x i64> %v
1043 define <vscale x 8 x i64> @mgather_baseidx_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1044 ; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i64:
1046 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1047 ; RV32-NEXT: vsext.vf2 v12, v8
1048 ; RV32-NEXT: vsll.vi v8, v12, 3
1049 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1050 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
1051 ; RV32-NEXT: vmv.v.v v8, v16
1054 ; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8i64:
1056 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
1057 ; RV64-NEXT: vsext.vf4 v24, v8
1058 ; RV64-NEXT: vsll.vi v8, v24, 3
1059 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
1060 ; RV64-NEXT: vmv.v.v v8, v16
1062 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
1063 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1064 ret <vscale x 8 x i64> %v
1067 define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1068 ; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i64:
1070 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1071 ; RV32-NEXT: vsext.vf2 v12, v8
1072 ; RV32-NEXT: vsll.vi v8, v12, 3
1073 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1074 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
1075 ; RV32-NEXT: vmv.v.v v8, v16
1078 ; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i64:
1080 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
1081 ; RV64-NEXT: vsext.vf4 v24, v8
1082 ; RV64-NEXT: vsll.vi v8, v24, 3
1083 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
1084 ; RV64-NEXT: vmv.v.v v8, v16
1086 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1087 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1088 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1089 ret <vscale x 8 x i64> %v
1092 define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1093 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i64:
1095 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1096 ; CHECK-NEXT: vzext.vf2 v12, v8
1097 ; CHECK-NEXT: vsll.vi v8, v12, 3
1098 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1099 ; CHECK-NEXT: vluxei32.v v16, (a0), v8, v0.t
1100 ; CHECK-NEXT: vmv.v.v v8, v16
1102 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1103 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1104 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1105 ret <vscale x 8 x i64> %v
1108 define <vscale x 8 x i64> @mgather_baseidx_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1109 ; RV32-LABEL: mgather_baseidx_nxv8i32_nxv8i64:
1111 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1112 ; RV32-NEXT: vsll.vi v8, v8, 3
1113 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1114 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
1115 ; RV32-NEXT: vmv.v.v v8, v16
1118 ; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8i64:
1120 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
1121 ; RV64-NEXT: vsext.vf2 v24, v8
1122 ; RV64-NEXT: vsll.vi v8, v24, 3
1123 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
1124 ; RV64-NEXT: vmv.v.v v8, v16
1126 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
1127 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1128 ret <vscale x 8 x i64> %v
1131 define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1132 ; RV32-LABEL: mgather_baseidx_sext_nxv8i32_nxv8i64:
1134 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1135 ; RV32-NEXT: vsll.vi v8, v8, 3
1136 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1137 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
1138 ; RV32-NEXT: vmv.v.v v8, v16
1141 ; RV64-LABEL: mgather_baseidx_sext_nxv8i32_nxv8i64:
1143 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
1144 ; RV64-NEXT: vsext.vf2 v24, v8
1145 ; RV64-NEXT: vsll.vi v8, v24, 3
1146 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
1147 ; RV64-NEXT: vmv.v.v v8, v16
1149 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1150 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1151 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1152 ret <vscale x 8 x i64> %v
1155 define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1156 ; RV32-LABEL: mgather_baseidx_zext_nxv8i32_nxv8i64:
1158 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1159 ; RV32-NEXT: vsll.vi v8, v8, 3
1160 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1161 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
1162 ; RV32-NEXT: vmv.v.v v8, v16
1165 ; RV64-LABEL: mgather_baseidx_zext_nxv8i32_nxv8i64:
1167 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
1168 ; RV64-NEXT: vzext.vf2 v24, v8
1169 ; RV64-NEXT: vsll.vi v8, v24, 3
1170 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
1171 ; RV64-NEXT: vmv.v.v v8, v16
1173 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1174 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1175 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1176 ret <vscale x 8 x i64> %v
1179 define <vscale x 8 x i64> @mgather_baseidx_nxv8i64(ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1180 ; RV32-LABEL: mgather_baseidx_nxv8i64:
1182 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1183 ; RV32-NEXT: vnsrl.wi v24, v8, 0
1184 ; RV32-NEXT: vsll.vi v8, v24, 3
1185 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1186 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
1187 ; RV32-NEXT: vmv.v.v v8, v16
1190 ; RV64-LABEL: mgather_baseidx_nxv8i64:
1192 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
1193 ; RV64-NEXT: vsll.vi v8, v8, 3
1194 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
1195 ; RV64-NEXT: vmv.v.v v8, v16
1197 %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
1198 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1199 ret <vscale x 8 x i64> %v
1202 declare <vscale x 16 x i64> @llvm.masked.gather.nxv16i64.nxv16p0(<vscale x 16 x ptr>, i32, <vscale x 16 x i1>, <vscale x 16 x i64>)
1204 declare <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64>, <vscale x 8 x i64>, i64 %idx)
1205 declare <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr>, <vscale x 8 x ptr>, i64 %idx)
1207 define void @mgather_nxv16i64(<vscale x 8 x ptr> %ptrs0, <vscale x 8 x ptr> %ptrs1, <vscale x 16 x i1> %m, <vscale x 8 x i64> %passthru0, <vscale x 8 x i64> %passthru1, <vscale x 16 x i64>* %out) {
1208 ; RV32-LABEL: mgather_nxv16i64:
1210 ; RV32-NEXT: vl8re64.v v24, (a0)
1211 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu
1212 ; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t
1213 ; RV32-NEXT: csrr a0, vlenb
1214 ; RV32-NEXT: srli a2, a0, 3
1215 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
1216 ; RV32-NEXT: vslidedown.vx v0, v0, a2
1217 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu
1218 ; RV32-NEXT: vluxei32.v v24, (zero), v12, v0.t
1219 ; RV32-NEXT: slli a0, a0, 3
1220 ; RV32-NEXT: add a0, a1, a0
1221 ; RV32-NEXT: vs8r.v v24, (a0)
1222 ; RV32-NEXT: vs8r.v v16, (a1)
1225 ; RV64-LABEL: mgather_nxv16i64:
1227 ; RV64-NEXT: addi sp, sp, -16
1228 ; RV64-NEXT: .cfi_def_cfa_offset 16
1229 ; RV64-NEXT: csrr a3, vlenb
1230 ; RV64-NEXT: slli a3, a3, 3
1231 ; RV64-NEXT: sub sp, sp, a3
1232 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1233 ; RV64-NEXT: vl8re64.v v24, (a0)
1234 ; RV64-NEXT: addi a0, sp, 16
1235 ; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1236 ; RV64-NEXT: vmv8r.v v16, v8
1237 ; RV64-NEXT: vl8re64.v v8, (a1)
1238 ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu
1239 ; RV64-NEXT: vluxei64.v v24, (zero), v16, v0.t
1240 ; RV64-NEXT: csrr a0, vlenb
1241 ; RV64-NEXT: srli a1, a0, 3
1242 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
1243 ; RV64-NEXT: vslidedown.vx v0, v0, a1
1244 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
1245 ; RV64-NEXT: addi a1, sp, 16
1246 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
1247 ; RV64-NEXT: vluxei64.v v8, (zero), v16, v0.t
1248 ; RV64-NEXT: slli a0, a0, 3
1249 ; RV64-NEXT: add a0, a2, a0
1250 ; RV64-NEXT: vs8r.v v8, (a0)
1251 ; RV64-NEXT: vs8r.v v24, (a2)
1252 ; RV64-NEXT: csrr a0, vlenb
1253 ; RV64-NEXT: slli a0, a0, 3
1254 ; RV64-NEXT: add sp, sp, a0
1255 ; RV64-NEXT: addi sp, sp, 16
1257 %p0 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> undef, <vscale x 8 x ptr> %ptrs0, i64 0)
1258 %p1 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> %p0, <vscale x 8 x ptr> %ptrs1, i64 8)
1260 %pt0 = call <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 8 x i64> %passthru0, i64 0)
1261 %pt1 = call <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> %pt0, <vscale x 8 x i64> %passthru1, i64 8)
1263 %v = call <vscale x 16 x i64> @llvm.masked.gather.nxv16i64.nxv16p0(<vscale x 16 x ptr> %p1, i32 8, <vscale x 16 x i1> %m, <vscale x 16 x i64> %pt1)
1264 store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
1269 declare <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x half>)
1271 define <vscale x 1 x half> @mgather_nxv1f16(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x half> %passthru) {
1272 ; RV32-LABEL: mgather_nxv1f16:
1274 ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
1275 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
1276 ; RV32-NEXT: vmv1r.v v8, v9
1279 ; RV64-LABEL: mgather_nxv1f16:
1281 ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
1282 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
1283 ; RV64-NEXT: vmv1r.v v8, v9
1285 %v = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m, <vscale x 1 x half> %passthru)
1286 ret <vscale x 1 x half> %v
1289 declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
1291 define <vscale x 2 x half> @mgather_nxv2f16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x half> %passthru) {
1292 ; RV32-LABEL: mgather_nxv2f16:
1294 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
1295 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
1296 ; RV32-NEXT: vmv1r.v v8, v9
1299 ; RV64-LABEL: mgather_nxv2f16:
1301 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
1302 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
1303 ; RV64-NEXT: vmv1r.v v8, v10
1305 %v = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x half> %passthru)
1306 ret <vscale x 2 x half> %v
1309 declare <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x half>)
1311 define <vscale x 4 x half> @mgather_nxv4f16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x half> %passthru) {
1312 ; RV32-LABEL: mgather_nxv4f16:
1314 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu
1315 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
1316 ; RV32-NEXT: vmv.v.v v8, v10
1319 ; RV64-LABEL: mgather_nxv4f16:
1321 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu
1322 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
1323 ; RV64-NEXT: vmv.v.v v8, v12
1325 %v = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m, <vscale x 4 x half> %passthru)
1326 ret <vscale x 4 x half> %v
1329 define <vscale x 4 x half> @mgather_truemask_nxv4f16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x half> %passthru) {
1330 ; RV32-LABEL: mgather_truemask_nxv4f16:
1332 ; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1333 ; RV32-NEXT: vluxei32.v v10, (zero), v8
1334 ; RV32-NEXT: vmv.v.v v8, v10
1337 ; RV64-LABEL: mgather_truemask_nxv4f16:
1339 ; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1340 ; RV64-NEXT: vluxei64.v v12, (zero), v8
1341 ; RV64-NEXT: vmv.v.v v8, v12
1343 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1344 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1345 %v = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %mtrue, <vscale x 4 x half> %passthru)
1346 ret <vscale x 4 x half> %v
1349 define <vscale x 4 x half> @mgather_falsemask_nxv4f16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x half> %passthru) {
1350 ; RV32-LABEL: mgather_falsemask_nxv4f16:
1352 ; RV32-NEXT: vmv1r.v v8, v10
1355 ; RV64-LABEL: mgather_falsemask_nxv4f16:
1357 ; RV64-NEXT: vmv1r.v v8, v12
1359 %v = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x half> %passthru)
1360 ret <vscale x 4 x half> %v
1363 declare <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
1365 define <vscale x 8 x half> @mgather_nxv8f16(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) {
1366 ; RV32-LABEL: mgather_nxv8f16:
1368 ; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu
1369 ; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
1370 ; RV32-NEXT: vmv.v.v v8, v12
1373 ; RV64-LABEL: mgather_nxv8f16:
1375 ; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu
1376 ; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t
1377 ; RV64-NEXT: vmv.v.v v8, v16
1379 %v = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru)
1380 ret <vscale x 8 x half> %v
1383 define <vscale x 8 x half> @mgather_baseidx_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) {
1384 ; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8f16:
1386 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1387 ; RV32-NEXT: vsext.vf4 v12, v8
1388 ; RV32-NEXT: vadd.vv v12, v12, v12
1389 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu
1390 ; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t
1391 ; RV32-NEXT: vmv.v.v v8, v10
1394 ; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f16:
1396 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1397 ; RV64-NEXT: vsext.vf8 v16, v8
1398 ; RV64-NEXT: vadd.vv v16, v16, v16
1399 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu
1400 ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t
1401 ; RV64-NEXT: vmv.v.v v8, v10
1403 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
1404 %v = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru)
1405 ret <vscale x 8 x half> %v
1408 define <vscale x 8 x half> @mgather_baseidx_sext_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) {
1409 ; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f16:
1411 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1412 ; RV32-NEXT: vsext.vf4 v12, v8
1413 ; RV32-NEXT: vadd.vv v12, v12, v12
1414 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu
1415 ; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t
1416 ; RV32-NEXT: vmv.v.v v8, v10
1419 ; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f16:
1421 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1422 ; RV64-NEXT: vsext.vf8 v16, v8
1423 ; RV64-NEXT: vadd.vv v16, v16, v16
1424 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu
1425 ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t
1426 ; RV64-NEXT: vmv.v.v v8, v10
1428 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1429 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1430 %v = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru)
1431 ret <vscale x 8 x half> %v
1434 define <vscale x 8 x half> @mgather_baseidx_zext_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) {
1435 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f16:
1437 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
1438 ; CHECK-NEXT: vwaddu.vv v12, v8, v8
1439 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
1440 ; CHECK-NEXT: vluxei16.v v10, (a0), v12, v0.t
1441 ; CHECK-NEXT: vmv.v.v v8, v10
1443 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1444 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1445 %v = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru)
1446 ret <vscale x 8 x half> %v
1449 define <vscale x 8 x half> @mgather_baseidx_nxv8f16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) {
1450 ; RV32-LABEL: mgather_baseidx_nxv8f16:
1452 ; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, mu
1453 ; RV32-NEXT: vwadd.vv v12, v8, v8
1454 ; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t
1455 ; RV32-NEXT: vmv.v.v v8, v10
1458 ; RV64-LABEL: mgather_baseidx_nxv8f16:
1460 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1461 ; RV64-NEXT: vsext.vf4 v16, v8
1462 ; RV64-NEXT: vadd.vv v16, v16, v16
1463 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu
1464 ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t
1465 ; RV64-NEXT: vmv.v.v v8, v10
1467 %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
1468 %v = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru)
1469 ret <vscale x 8 x half> %v
1472 declare <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x float>)
1474 define <vscale x 1 x float> @mgather_nxv1f32(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x float> %passthru) {
1475 ; RV32-LABEL: mgather_nxv1f32:
1477 ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu
1478 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
1479 ; RV32-NEXT: vmv1r.v v8, v9
1482 ; RV64-LABEL: mgather_nxv1f32:
1484 ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu
1485 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
1486 ; RV64-NEXT: vmv1r.v v8, v9
1488 %v = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 4, <vscale x 1 x i1> %m, <vscale x 1 x float> %passthru)
1489 ret <vscale x 1 x float> %v
1492 declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
1494 define <vscale x 2 x float> @mgather_nxv2f32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x float> %passthru) {
1495 ; RV32-LABEL: mgather_nxv2f32:
1497 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu
1498 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
1499 ; RV32-NEXT: vmv.v.v v8, v9
1502 ; RV64-LABEL: mgather_nxv2f32:
1504 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu
1505 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
1506 ; RV64-NEXT: vmv.v.v v8, v10
1508 %v = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m, <vscale x 2 x float> %passthru)
1509 ret <vscale x 2 x float> %v
1512 declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
1514 define <vscale x 4 x float> @mgather_nxv4f32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x float> %passthru) {
1515 ; RV32-LABEL: mgather_nxv4f32:
1517 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu
1518 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
1519 ; RV32-NEXT: vmv.v.v v8, v10
1522 ; RV64-LABEL: mgather_nxv4f32:
1524 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu
1525 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
1526 ; RV64-NEXT: vmv.v.v v8, v12
1528 %v = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %m, <vscale x 4 x float> %passthru)
1529 ret <vscale x 4 x float> %v
1532 define <vscale x 4 x float> @mgather_truemask_nxv4f32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x float> %passthru) {
1533 ; RV32-LABEL: mgather_truemask_nxv4f32:
1535 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1536 ; RV32-NEXT: vluxei32.v v8, (zero), v8
1539 ; RV64-LABEL: mgather_truemask_nxv4f32:
1541 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1542 ; RV64-NEXT: vluxei64.v v12, (zero), v8
1543 ; RV64-NEXT: vmv.v.v v8, v12
1545 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1546 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1547 %v = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mtrue, <vscale x 4 x float> %passthru)
1548 ret <vscale x 4 x float> %v
1551 define <vscale x 4 x float> @mgather_falsemask_nxv4f32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x float> %passthru) {
1552 ; RV32-LABEL: mgather_falsemask_nxv4f32:
1554 ; RV32-NEXT: vmv2r.v v8, v10
1557 ; RV64-LABEL: mgather_falsemask_nxv4f32:
1559 ; RV64-NEXT: vmv2r.v v8, v12
1561 %v = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %passthru)
1562 ret <vscale x 4 x float> %v
1565 declare <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x float>)
1567 define <vscale x 8 x float> @mgather_nxv8f32(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1568 ; RV32-LABEL: mgather_nxv8f32:
1570 ; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, mu
1571 ; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
1572 ; RV32-NEXT: vmv.v.v v8, v12
1575 ; RV64-LABEL: mgather_nxv8f32:
1577 ; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, mu
1578 ; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t
1579 ; RV64-NEXT: vmv.v.v v8, v16
1581 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1582 ret <vscale x 8 x float> %v
1585 define <vscale x 8 x float> @mgather_baseidx_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1586 ; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8f32:
1588 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
1589 ; RV32-NEXT: vsext.vf4 v16, v8
1590 ; RV32-NEXT: vsll.vi v8, v16, 2
1591 ; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t
1592 ; RV32-NEXT: vmv.v.v v8, v12
1595 ; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f32:
1597 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1598 ; RV64-NEXT: vsext.vf8 v16, v8
1599 ; RV64-NEXT: vsll.vi v16, v16, 2
1600 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu
1601 ; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t
1602 ; RV64-NEXT: vmv.v.v v8, v12
1604 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
1605 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1606 ret <vscale x 8 x float> %v
1609 define <vscale x 8 x float> @mgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1610 ; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f32:
1612 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
1613 ; RV32-NEXT: vsext.vf4 v16, v8
1614 ; RV32-NEXT: vsll.vi v8, v16, 2
1615 ; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t
1616 ; RV32-NEXT: vmv.v.v v8, v12
1619 ; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f32:
1621 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1622 ; RV64-NEXT: vsext.vf8 v16, v8
1623 ; RV64-NEXT: vsll.vi v16, v16, 2
1624 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu
1625 ; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t
1626 ; RV64-NEXT: vmv.v.v v8, v12
1628 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1629 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1630 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1631 ret <vscale x 8 x float> %v
1634 define <vscale x 8 x float> @mgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1635 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f32:
1637 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1638 ; CHECK-NEXT: vzext.vf2 v10, v8
1639 ; CHECK-NEXT: vsll.vi v8, v10, 2
1640 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
1641 ; CHECK-NEXT: vluxei16.v v12, (a0), v8, v0.t
1642 ; CHECK-NEXT: vmv.v.v v8, v12
1644 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1645 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1646 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1647 ret <vscale x 8 x float> %v
1650 define <vscale x 8 x float> @mgather_baseidx_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1651 ; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f32:
1653 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
1654 ; RV32-NEXT: vsext.vf2 v16, v8
1655 ; RV32-NEXT: vsll.vi v8, v16, 2
1656 ; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t
1657 ; RV32-NEXT: vmv.v.v v8, v12
1660 ; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8f32:
1662 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1663 ; RV64-NEXT: vsext.vf4 v16, v8
1664 ; RV64-NEXT: vsll.vi v16, v16, 2
1665 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu
1666 ; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t
1667 ; RV64-NEXT: vmv.v.v v8, v12
1669 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
1670 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1671 ret <vscale x 8 x float> %v
1674 define <vscale x 8 x float> @mgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1675 ; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f32:
1677 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
1678 ; RV32-NEXT: vsext.vf2 v16, v8
1679 ; RV32-NEXT: vsll.vi v8, v16, 2
1680 ; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t
1681 ; RV32-NEXT: vmv.v.v v8, v12
1684 ; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f32:
1686 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1687 ; RV64-NEXT: vsext.vf4 v16, v8
1688 ; RV64-NEXT: vsll.vi v16, v16, 2
1689 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu
1690 ; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t
1691 ; RV64-NEXT: vmv.v.v v8, v12
1693 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1694 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1695 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1696 ret <vscale x 8 x float> %v
1699 define <vscale x 8 x float> @mgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1700 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f32:
1702 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu
1703 ; CHECK-NEXT: vzext.vf2 v16, v8
1704 ; CHECK-NEXT: vsll.vi v8, v16, 2
1705 ; CHECK-NEXT: vluxei32.v v12, (a0), v8, v0.t
1706 ; CHECK-NEXT: vmv.v.v v8, v12
1708 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1709 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1710 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1711 ret <vscale x 8 x float> %v
1714 define <vscale x 8 x float> @mgather_baseidx_nxv8f32(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1715 ; RV32-LABEL: mgather_baseidx_nxv8f32:
1717 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
1718 ; RV32-NEXT: vsll.vi v8, v8, 2
1719 ; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t
1720 ; RV32-NEXT: vmv.v.v v8, v12
1723 ; RV64-LABEL: mgather_baseidx_nxv8f32:
1725 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
1726 ; RV64-NEXT: vsext.vf2 v16, v8
1727 ; RV64-NEXT: vsll.vi v16, v16, 2
1728 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu
1729 ; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t
1730 ; RV64-NEXT: vmv.v.v v8, v12
1732 %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
1733 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1734 ret <vscale x 8 x float> %v
1737 declare <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x double>)
1739 define <vscale x 1 x double> @mgather_nxv1f64(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x double> %passthru) {
1740 ; RV32-LABEL: mgather_nxv1f64:
1742 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu
1743 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
1744 ; RV32-NEXT: vmv.v.v v8, v9
1747 ; RV64-LABEL: mgather_nxv1f64:
1749 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu
1750 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
1751 ; RV64-NEXT: vmv.v.v v8, v9
1753 %v = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 8, <vscale x 1 x i1> %m, <vscale x 1 x double> %passthru)
1754 ret <vscale x 1 x double> %v
1757 declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
1759 define <vscale x 2 x double> @mgather_nxv2f64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x double> %passthru) {
1760 ; RV32-LABEL: mgather_nxv2f64:
1762 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu
1763 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
1764 ; RV32-NEXT: vmv.v.v v8, v10
1767 ; RV64-LABEL: mgather_nxv2f64:
1769 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu
1770 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
1771 ; RV64-NEXT: vmv.v.v v8, v10
1773 %v = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %m, <vscale x 2 x double> %passthru)
1774 ret <vscale x 2 x double> %v
1777 declare <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x double>)
1779 define <vscale x 4 x double> @mgather_nxv4f64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x double> %passthru) {
1780 ; RV32-LABEL: mgather_nxv4f64:
1782 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu
1783 ; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
1784 ; RV32-NEXT: vmv.v.v v8, v12
1787 ; RV64-LABEL: mgather_nxv4f64:
1789 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu
1790 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
1791 ; RV64-NEXT: vmv.v.v v8, v12
1793 %v = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %m, <vscale x 4 x double> %passthru)
1794 ret <vscale x 4 x double> %v
1797 define <vscale x 4 x double> @mgather_truemask_nxv4f64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x double> %passthru) {
1798 ; RV32-LABEL: mgather_truemask_nxv4f64:
1800 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1801 ; RV32-NEXT: vluxei32.v v12, (zero), v8
1802 ; RV32-NEXT: vmv.v.v v8, v12
1805 ; RV64-LABEL: mgather_truemask_nxv4f64:
1807 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1808 ; RV64-NEXT: vluxei64.v v8, (zero), v8
1810 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1811 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1812 %v = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %mtrue, <vscale x 4 x double> %passthru)
1813 ret <vscale x 4 x double> %v
1816 define <vscale x 4 x double> @mgather_falsemask_nxv4f64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x double> %passthru) {
1817 ; CHECK-LABEL: mgather_falsemask_nxv4f64:
1819 ; CHECK-NEXT: vmv4r.v v8, v12
1821 %v = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x double> %passthru)
1822 ret <vscale x 4 x double> %v
1825 declare <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x double>)
1827 define <vscale x 8 x double> @mgather_nxv8f64(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
1828 ; RV32-LABEL: mgather_nxv8f64:
1830 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu
1831 ; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t
1832 ; RV32-NEXT: vmv.v.v v8, v16
1835 ; RV64-LABEL: mgather_nxv8f64:
1837 ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu
1838 ; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t
1839 ; RV64-NEXT: vmv.v.v v8, v16
1841 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
1842 ret <vscale x 8 x double> %v
1845 define <vscale x 8 x double> @mgather_baseidx_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
1846 ; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8f64:
1848 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1849 ; RV32-NEXT: vsext.vf4 v12, v8
1850 ; RV32-NEXT: vsll.vi v8, v12, 3
1851 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1852 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
1853 ; RV32-NEXT: vmv.v.v v8, v16
1856 ; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f64:
1858 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
1859 ; RV64-NEXT: vsext.vf8 v24, v8
1860 ; RV64-NEXT: vsll.vi v8, v24, 3
1861 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
1862 ; RV64-NEXT: vmv.v.v v8, v16
1864 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
1865 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
1866 ret <vscale x 8 x double> %v
1869 define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
1870 ; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f64:
1872 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1873 ; RV32-NEXT: vsext.vf4 v12, v8
1874 ; RV32-NEXT: vsll.vi v8, v12, 3
1875 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1876 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
1877 ; RV32-NEXT: vmv.v.v v8, v16
1880 ; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f64:
1882 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
1883 ; RV64-NEXT: vsext.vf8 v24, v8
1884 ; RV64-NEXT: vsll.vi v8, v24, 3
1885 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
1886 ; RV64-NEXT: vmv.v.v v8, v16
1888 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1889 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1890 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
1891 ret <vscale x 8 x double> %v
1894 define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
1895 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f64:
1897 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1898 ; CHECK-NEXT: vzext.vf2 v10, v8
1899 ; CHECK-NEXT: vsll.vi v8, v10, 3
1900 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1901 ; CHECK-NEXT: vluxei16.v v16, (a0), v8, v0.t
1902 ; CHECK-NEXT: vmv.v.v v8, v16
1904 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1905 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1906 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
1907 ret <vscale x 8 x double> %v
1910 define <vscale x 8 x double> @mgather_baseidx_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
1911 ; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f64:
1913 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1914 ; RV32-NEXT: vsext.vf2 v12, v8
1915 ; RV32-NEXT: vsll.vi v8, v12, 3
1916 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1917 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
1918 ; RV32-NEXT: vmv.v.v v8, v16
1921 ; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8f64:
1923 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
1924 ; RV64-NEXT: vsext.vf4 v24, v8
1925 ; RV64-NEXT: vsll.vi v8, v24, 3
1926 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
1927 ; RV64-NEXT: vmv.v.v v8, v16
1929 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
1930 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
1931 ret <vscale x 8 x double> %v
1934 define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
1935 ; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f64:
1937 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1938 ; RV32-NEXT: vsext.vf2 v12, v8
1939 ; RV32-NEXT: vsll.vi v8, v12, 3
1940 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1941 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
1942 ; RV32-NEXT: vmv.v.v v8, v16
1945 ; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f64:
1947 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
1948 ; RV64-NEXT: vsext.vf4 v24, v8
1949 ; RV64-NEXT: vsll.vi v8, v24, 3
1950 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
1951 ; RV64-NEXT: vmv.v.v v8, v16
1953 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1954 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1955 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
1956 ret <vscale x 8 x double> %v
1959 define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
1960 ; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f64:
1962 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1963 ; CHECK-NEXT: vzext.vf2 v12, v8
1964 ; CHECK-NEXT: vsll.vi v8, v12, 3
1965 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1966 ; CHECK-NEXT: vluxei32.v v16, (a0), v8, v0.t
1967 ; CHECK-NEXT: vmv.v.v v8, v16
1969 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1970 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1971 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
1972 ret <vscale x 8 x double> %v
1975 define <vscale x 8 x double> @mgather_baseidx_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
1976 ; RV32-LABEL: mgather_baseidx_nxv8i32_nxv8f64:
1978 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1979 ; RV32-NEXT: vsll.vi v8, v8, 3
1980 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1981 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
1982 ; RV32-NEXT: vmv.v.v v8, v16
1985 ; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8f64:
1987 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
1988 ; RV64-NEXT: vsext.vf2 v24, v8
1989 ; RV64-NEXT: vsll.vi v8, v24, 3
1990 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
1991 ; RV64-NEXT: vmv.v.v v8, v16
1993 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
1994 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
1995 ret <vscale x 8 x double> %v
1998 define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
1999 ; RV32-LABEL: mgather_baseidx_sext_nxv8i32_nxv8f64:
2001 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
2002 ; RV32-NEXT: vsll.vi v8, v8, 3
2003 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
2004 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
2005 ; RV32-NEXT: vmv.v.v v8, v16
2008 ; RV64-LABEL: mgather_baseidx_sext_nxv8i32_nxv8f64:
2010 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
2011 ; RV64-NEXT: vsext.vf2 v24, v8
2012 ; RV64-NEXT: vsll.vi v8, v24, 3
2013 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
2014 ; RV64-NEXT: vmv.v.v v8, v16
2016 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2017 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2018 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
2019 ret <vscale x 8 x double> %v
2022 define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
2023 ; RV32-LABEL: mgather_baseidx_zext_nxv8i32_nxv8f64:
2025 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
2026 ; RV32-NEXT: vsll.vi v8, v8, 3
2027 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
2028 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
2029 ; RV32-NEXT: vmv.v.v v8, v16
2032 ; RV64-LABEL: mgather_baseidx_zext_nxv8i32_nxv8f64:
2034 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
2035 ; RV64-NEXT: vzext.vf2 v24, v8
2036 ; RV64-NEXT: vsll.vi v8, v24, 3
2037 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
2038 ; RV64-NEXT: vmv.v.v v8, v16
2040 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2041 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2042 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
2043 ret <vscale x 8 x double> %v
2046 define <vscale x 8 x double> @mgather_baseidx_nxv8f64(ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
2047 ; RV32-LABEL: mgather_baseidx_nxv8f64:
2049 ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
2050 ; RV32-NEXT: vnsrl.wi v24, v8, 0
2051 ; RV32-NEXT: vsll.vi v8, v24, 3
2052 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
2053 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
2054 ; RV32-NEXT: vmv.v.v v8, v16
2057 ; RV64-LABEL: mgather_baseidx_nxv8f64:
2059 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
2060 ; RV64-NEXT: vsll.vi v8, v8, 3
2061 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
2062 ; RV64-NEXT: vmv.v.v v8, v16
2064 %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
2065 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
2066 ret <vscale x 8 x double> %v
2069 declare <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr>, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
2071 define <vscale x 16 x i8> @mgather_baseidx_nxv16i8(ptr %base, <vscale x 16 x i8> %idxs, <vscale x 16 x i1> %m, <vscale x 16 x i8> %passthru) {
2072 ; RV32-LABEL: mgather_baseidx_nxv16i8:
2074 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2075 ; RV32-NEXT: vsext.vf4 v16, v8
2076 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu
2077 ; RV32-NEXT: vluxei32.v v10, (a0), v16, v0.t
2078 ; RV32-NEXT: vmv.v.v v8, v10
2081 ; RV64-LABEL: mgather_baseidx_nxv16i8:
2083 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2084 ; RV64-NEXT: vsext.vf8 v16, v8
2085 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu
2086 ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t
2087 ; RV64-NEXT: csrr a1, vlenb
2088 ; RV64-NEXT: srli a1, a1, 3
2089 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
2090 ; RV64-NEXT: vslidedown.vx v0, v0, a1
2091 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2092 ; RV64-NEXT: vsext.vf8 v16, v9
2093 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu
2094 ; RV64-NEXT: vluxei64.v v11, (a0), v16, v0.t
2095 ; RV64-NEXT: vmv2r.v v8, v10
2097 %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 16 x i8> %idxs
2098 %v = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> %ptrs, i32 2, <vscale x 16 x i1> %m, <vscale x 16 x i8> %passthru)
2099 ret <vscale x 16 x i8> %v
2102 declare <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr>, i32, <vscale x 32 x i1>, <vscale x 32 x i8>)
2104 define <vscale x 32 x i8> @mgather_baseidx_nxv32i8(ptr %base, <vscale x 32 x i8> %idxs, <vscale x 32 x i1> %m, <vscale x 32 x i8> %passthru) {
2105 ; RV32-LABEL: mgather_baseidx_nxv32i8:
2107 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2108 ; RV32-NEXT: vsext.vf4 v16, v8
2109 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu
2110 ; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t
2111 ; RV32-NEXT: csrr a1, vlenb
2112 ; RV32-NEXT: srli a1, a1, 2
2113 ; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
2114 ; RV32-NEXT: vslidedown.vx v0, v0, a1
2115 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
2116 ; RV32-NEXT: vsext.vf4 v16, v10
2117 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu
2118 ; RV32-NEXT: vluxei32.v v14, (a0), v16, v0.t
2119 ; RV32-NEXT: vmv4r.v v8, v12
2122 ; RV64-LABEL: mgather_baseidx_nxv32i8:
2124 ; RV64-NEXT: vmv1r.v v16, v0
2125 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2126 ; RV64-NEXT: vsext.vf8 v24, v8
2127 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu
2128 ; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t
2129 ; RV64-NEXT: csrr a1, vlenb
2130 ; RV64-NEXT: srli a2, a1, 3
2131 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
2132 ; RV64-NEXT: vslidedown.vx v0, v0, a2
2133 ; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma
2134 ; RV64-NEXT: vsext.vf8 v24, v9
2135 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu
2136 ; RV64-NEXT: vluxei64.v v13, (a0), v24, v0.t
2137 ; RV64-NEXT: srli a1, a1, 2
2138 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
2139 ; RV64-NEXT: vslidedown.vx v0, v16, a1
2140 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2141 ; RV64-NEXT: vsext.vf8 v16, v10
2142 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu
2143 ; RV64-NEXT: vluxei64.v v14, (a0), v16, v0.t
2144 ; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
2145 ; RV64-NEXT: vslidedown.vx v0, v0, a2
2146 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2147 ; RV64-NEXT: vsext.vf8 v16, v11
2148 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu
2149 ; RV64-NEXT: vluxei64.v v15, (a0), v16, v0.t
2150 ; RV64-NEXT: vmv4r.v v8, v12
2152 %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 32 x i8> %idxs
2153 %v = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> %ptrs, i32 2, <vscale x 32 x i1> %m, <vscale x 32 x i8> %passthru)
2154 ret <vscale x 32 x i8> %v