1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
7 declare <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr>, <2 x i1>, i32)
9 define <2 x i8> @vpgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
10 ; RV32-LABEL: vpgather_v2i8:
12 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
13 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
14 ; RV32-NEXT: vmv1r.v v8, v9
17 ; RV64-LABEL: vpgather_v2i8:
19 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
20 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
21 ; RV64-NEXT: vmv1r.v v8, v9
23 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
27 define <2 x i16> @vpgather_v2i8_sextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
28 ; RV32-LABEL: vpgather_v2i8_sextload_v2i16:
30 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
31 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
32 ; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
33 ; RV32-NEXT: vsext.vf2 v8, v9
36 ; RV64-LABEL: vpgather_v2i8_sextload_v2i16:
38 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
39 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
40 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
41 ; RV64-NEXT: vsext.vf2 v8, v9
43 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
44 %ev = sext <2 x i8> %v to <2 x i16>
48 define <2 x i16> @vpgather_v2i8_zextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
49 ; RV32-LABEL: vpgather_v2i8_zextload_v2i16:
51 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
52 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
53 ; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
54 ; RV32-NEXT: vzext.vf2 v8, v9
57 ; RV64-LABEL: vpgather_v2i8_zextload_v2i16:
59 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
60 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
61 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
62 ; RV64-NEXT: vzext.vf2 v8, v9
64 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
65 %ev = zext <2 x i8> %v to <2 x i16>
69 define <2 x i32> @vpgather_v2i8_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
70 ; RV32-LABEL: vpgather_v2i8_sextload_v2i32:
72 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
73 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
74 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
75 ; RV32-NEXT: vsext.vf4 v8, v9
78 ; RV64-LABEL: vpgather_v2i8_sextload_v2i32:
80 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
81 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
82 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
83 ; RV64-NEXT: vsext.vf4 v8, v9
85 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
86 %ev = sext <2 x i8> %v to <2 x i32>
90 define <2 x i32> @vpgather_v2i8_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
91 ; RV32-LABEL: vpgather_v2i8_zextload_v2i32:
93 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
94 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
95 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
96 ; RV32-NEXT: vzext.vf4 v8, v9
99 ; RV64-LABEL: vpgather_v2i8_zextload_v2i32:
101 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
102 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
103 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
104 ; RV64-NEXT: vzext.vf4 v8, v9
106 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
107 %ev = zext <2 x i8> %v to <2 x i32>
111 define <2 x i64> @vpgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
112 ; RV32-LABEL: vpgather_v2i8_sextload_v2i64:
114 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
115 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
116 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
117 ; RV32-NEXT: vsext.vf8 v8, v9
120 ; RV64-LABEL: vpgather_v2i8_sextload_v2i64:
122 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
123 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
124 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
125 ; RV64-NEXT: vsext.vf8 v8, v9
127 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
128 %ev = sext <2 x i8> %v to <2 x i64>
132 define <2 x i64> @vpgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
133 ; RV32-LABEL: vpgather_v2i8_zextload_v2i64:
135 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
136 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
137 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
138 ; RV32-NEXT: vzext.vf8 v8, v9
141 ; RV64-LABEL: vpgather_v2i8_zextload_v2i64:
143 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
144 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
145 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
146 ; RV64-NEXT: vzext.vf8 v8, v9
148 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
149 %ev = zext <2 x i8> %v to <2 x i64>
153 declare <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr>, <3 x i1>, i32)
155 define <3 x i8> @vpgather_v3i8(<3 x ptr> %ptrs, <3 x i1> %m, i32 zeroext %evl) {
156 ; RV32-LABEL: vpgather_v3i8:
158 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
159 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
160 ; RV32-NEXT: vmv1r.v v8, v9
163 ; RV64-LABEL: vpgather_v3i8:
165 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
166 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
167 ; RV64-NEXT: vmv1r.v v8, v10
169 %v = call <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr> %ptrs, <3 x i1> %m, i32 %evl)
173 define <3 x i8> @vpgather_truemask_v3i8(<3 x ptr> %ptrs, i32 zeroext %evl) {
174 ; RV32-LABEL: vpgather_truemask_v3i8:
176 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
177 ; RV32-NEXT: vluxei32.v v9, (zero), v8
178 ; RV32-NEXT: vmv1r.v v8, v9
181 ; RV64-LABEL: vpgather_truemask_v3i8:
183 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
184 ; RV64-NEXT: vluxei64.v v10, (zero), v8
185 ; RV64-NEXT: vmv1r.v v8, v10
187 %mhead = insertelement <3 x i1> poison, i1 1, i32 0
188 %mtrue = shufflevector <3 x i1> %mhead, <3 x i1> poison, <3 x i32> zeroinitializer
189 %v = call <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr> %ptrs, <3 x i1> %mtrue, i32 %evl)
193 declare <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr>, <4 x i1>, i32)
195 define <4 x i8> @vpgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
196 ; RV32-LABEL: vpgather_v4i8:
198 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
199 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
200 ; RV32-NEXT: vmv1r.v v8, v9
203 ; RV64-LABEL: vpgather_v4i8:
205 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
206 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
207 ; RV64-NEXT: vmv1r.v v8, v10
209 %v = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
213 define <4 x i8> @vpgather_truemask_v4i8(<4 x ptr> %ptrs, i32 zeroext %evl) {
214 ; RV32-LABEL: vpgather_truemask_v4i8:
216 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
217 ; RV32-NEXT: vluxei32.v v9, (zero), v8
218 ; RV32-NEXT: vmv1r.v v8, v9
221 ; RV64-LABEL: vpgather_truemask_v4i8:
223 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
224 ; RV64-NEXT: vluxei64.v v10, (zero), v8
225 ; RV64-NEXT: vmv1r.v v8, v10
227 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
228 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
229 %v = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl)
233 declare <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr>, <8 x i1>, i32)
235 define <8 x i8> @vpgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
236 ; RV32-LABEL: vpgather_v8i8:
238 ; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
239 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
240 ; RV32-NEXT: vmv1r.v v8, v10
243 ; RV64-LABEL: vpgather_v8i8:
245 ; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
246 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
247 ; RV64-NEXT: vmv1r.v v8, v12
249 %v = call <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
253 define <8 x i8> @vpgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
254 ; RV32-LABEL: vpgather_baseidx_v8i8:
256 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
257 ; RV32-NEXT: vsext.vf4 v10, v8
258 ; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
259 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
262 ; RV64-LABEL: vpgather_baseidx_v8i8:
264 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
265 ; RV64-NEXT: vsext.vf8 v12, v8
266 ; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
267 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
269 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
270 %v = call <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
274 declare <32 x i8> @llvm.vp.gather.v32i8.v32p0(<32 x ptr>, <32 x i1>, i32)
276 define <32 x i8> @vpgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
277 ; RV32-LABEL: vpgather_baseidx_v32i8:
279 ; RV32-NEXT: li a2, 32
280 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
281 ; RV32-NEXT: vsext.vf4 v16, v8
282 ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
283 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
286 ; RV64-LABEL: vpgather_baseidx_v32i8:
288 ; RV64-NEXT: li a3, 16
289 ; RV64-NEXT: mv a2, a1
290 ; RV64-NEXT: bltu a1, a3, .LBB13_2
291 ; RV64-NEXT: # %bb.1:
292 ; RV64-NEXT: li a2, 16
293 ; RV64-NEXT: .LBB13_2:
294 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
295 ; RV64-NEXT: vsext.vf8 v16, v8
296 ; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, ma
297 ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t
298 ; RV64-NEXT: addi a2, a1, -16
299 ; RV64-NEXT: sltu a1, a1, a2
300 ; RV64-NEXT: addi a1, a1, -1
301 ; RV64-NEXT: and a1, a1, a2
302 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
303 ; RV64-NEXT: vslidedown.vi v8, v8, 16
304 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
305 ; RV64-NEXT: vsext.vf8 v16, v8
306 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
307 ; RV64-NEXT: vslidedown.vi v0, v0, 2
308 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
309 ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
310 ; RV64-NEXT: li a0, 32
311 ; RV64-NEXT: vsetvli zero, a0, e8, m2, ta, ma
312 ; RV64-NEXT: vslideup.vi v10, v8, 16
313 ; RV64-NEXT: vmv.v.v v8, v10
315 %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
316 %v = call <32 x i8> @llvm.vp.gather.v32i8.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
320 declare <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr>, <2 x i1>, i32)
322 define <2 x i16> @vpgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
323 ; RV32-LABEL: vpgather_v2i16:
325 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
326 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
327 ; RV32-NEXT: vmv1r.v v8, v9
330 ; RV64-LABEL: vpgather_v2i16:
332 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
333 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
334 ; RV64-NEXT: vmv1r.v v8, v9
336 %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
340 define <2 x i32> @vpgather_v2i16_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
341 ; RV32-LABEL: vpgather_v2i16_sextload_v2i32:
343 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
344 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
345 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
346 ; RV32-NEXT: vsext.vf2 v8, v9
349 ; RV64-LABEL: vpgather_v2i16_sextload_v2i32:
351 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
352 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
353 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
354 ; RV64-NEXT: vsext.vf2 v8, v9
356 %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
357 %ev = sext <2 x i16> %v to <2 x i32>
361 define <2 x i32> @vpgather_v2i16_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
362 ; RV32-LABEL: vpgather_v2i16_zextload_v2i32:
364 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
365 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
366 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
367 ; RV32-NEXT: vzext.vf2 v8, v9
370 ; RV64-LABEL: vpgather_v2i16_zextload_v2i32:
372 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
373 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
374 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
375 ; RV64-NEXT: vzext.vf2 v8, v9
377 %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
378 %ev = zext <2 x i16> %v to <2 x i32>
382 define <2 x i64> @vpgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
383 ; RV32-LABEL: vpgather_v2i16_sextload_v2i64:
385 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
386 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
387 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
388 ; RV32-NEXT: vsext.vf4 v8, v9
391 ; RV64-LABEL: vpgather_v2i16_sextload_v2i64:
393 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
394 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
395 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
396 ; RV64-NEXT: vsext.vf4 v8, v9
398 %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
399 %ev = sext <2 x i16> %v to <2 x i64>
403 define <2 x i64> @vpgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
404 ; RV32-LABEL: vpgather_v2i16_zextload_v2i64:
406 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
407 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
408 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
409 ; RV32-NEXT: vzext.vf4 v8, v9
412 ; RV64-LABEL: vpgather_v2i16_zextload_v2i64:
414 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
415 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
416 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
417 ; RV64-NEXT: vzext.vf4 v8, v9
419 %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
420 %ev = zext <2 x i16> %v to <2 x i64>
424 declare <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr>, <4 x i1>, i32)
426 define <4 x i16> @vpgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
427 ; RV32-LABEL: vpgather_v4i16:
429 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
430 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
431 ; RV32-NEXT: vmv1r.v v8, v9
434 ; RV64-LABEL: vpgather_v4i16:
436 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
437 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
438 ; RV64-NEXT: vmv1r.v v8, v10
440 %v = call <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
444 define <4 x i16> @vpgather_truemask_v4i16(<4 x ptr> %ptrs, i32 zeroext %evl) {
445 ; RV32-LABEL: vpgather_truemask_v4i16:
447 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
448 ; RV32-NEXT: vluxei32.v v9, (zero), v8
449 ; RV32-NEXT: vmv1r.v v8, v9
452 ; RV64-LABEL: vpgather_truemask_v4i16:
454 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
455 ; RV64-NEXT: vluxei64.v v10, (zero), v8
456 ; RV64-NEXT: vmv1r.v v8, v10
458 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
459 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
460 %v = call <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl)
464 declare <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr>, <8 x i1>, i32)
466 define <8 x i16> @vpgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
467 ; RV32-LABEL: vpgather_v8i16:
469 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
470 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
471 ; RV32-NEXT: vmv.v.v v8, v10
474 ; RV64-LABEL: vpgather_v8i16:
476 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
477 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
478 ; RV64-NEXT: vmv.v.v v8, v12
480 %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
484 define <8 x i16> @vpgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
485 ; RV32-LABEL: vpgather_baseidx_v8i8_v8i16:
487 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
488 ; RV32-NEXT: vsext.vf4 v10, v8
489 ; RV32-NEXT: vadd.vv v10, v10, v10
490 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
491 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
494 ; RV64-LABEL: vpgather_baseidx_v8i8_v8i16:
496 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
497 ; RV64-NEXT: vsext.vf8 v12, v8
498 ; RV64-NEXT: vadd.vv v12, v12, v12
499 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
500 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
502 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
503 %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
507 define <8 x i16> @vpgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
508 ; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8i16:
510 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
511 ; RV32-NEXT: vsext.vf4 v10, v8
512 ; RV32-NEXT: vadd.vv v10, v10, v10
513 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
514 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
517 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8i16:
519 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
520 ; RV64-NEXT: vsext.vf8 v12, v8
521 ; RV64-NEXT: vadd.vv v12, v12, v12
522 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
523 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
525 %eidxs = sext <8 x i8> %idxs to <8 x i16>
526 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
527 %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
531 define <8 x i16> @vpgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
532 ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i16:
534 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
535 ; RV32-NEXT: vwaddu.vv v9, v8, v8
536 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
537 ; RV32-NEXT: vluxei16.v v8, (a0), v9, v0.t
540 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i16:
542 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
543 ; RV64-NEXT: vwaddu.vv v9, v8, v8
544 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
545 ; RV64-NEXT: vluxei16.v v8, (a0), v9, v0.t
547 %eidxs = zext <8 x i8> %idxs to <8 x i16>
548 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
549 %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
553 define <8 x i16> @vpgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
554 ; RV32-LABEL: vpgather_baseidx_v8i16:
556 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
557 ; RV32-NEXT: vwadd.vv v10, v8, v8
558 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
559 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
562 ; RV64-LABEL: vpgather_baseidx_v8i16:
564 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
565 ; RV64-NEXT: vsext.vf4 v12, v8
566 ; RV64-NEXT: vadd.vv v12, v12, v12
567 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
568 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
570 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
571 %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
575 declare <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr>, <2 x i1>, i32)
577 define <2 x i32> @vpgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
578 ; RV32-LABEL: vpgather_v2i32:
580 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
581 ; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
584 ; RV64-LABEL: vpgather_v2i32:
586 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
587 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
588 ; RV64-NEXT: vmv1r.v v8, v9
590 %v = call <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
594 define <2 x i64> @vpgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
595 ; RV32-LABEL: vpgather_v2i32_sextload_v2i64:
597 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
598 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
599 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
600 ; RV32-NEXT: vsext.vf2 v8, v9
603 ; RV64-LABEL: vpgather_v2i32_sextload_v2i64:
605 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
606 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
607 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
608 ; RV64-NEXT: vsext.vf2 v8, v9
610 %v = call <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
611 %ev = sext <2 x i32> %v to <2 x i64>
615 define <2 x i64> @vpgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
616 ; RV32-LABEL: vpgather_v2i32_zextload_v2i64:
618 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
619 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
620 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
621 ; RV32-NEXT: vzext.vf2 v8, v9
624 ; RV64-LABEL: vpgather_v2i32_zextload_v2i64:
626 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
627 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
628 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
629 ; RV64-NEXT: vzext.vf2 v8, v9
631 %v = call <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
632 %ev = zext <2 x i32> %v to <2 x i64>
636 declare <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr>, <4 x i1>, i32)
638 define <4 x i32> @vpgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
639 ; RV32-LABEL: vpgather_v4i32:
641 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
642 ; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
645 ; RV64-LABEL: vpgather_v4i32:
647 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
648 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
649 ; RV64-NEXT: vmv.v.v v8, v10
651 %v = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
655 define <4 x i32> @vpgather_truemask_v4i32(<4 x ptr> %ptrs, i32 zeroext %evl) {
656 ; RV32-LABEL: vpgather_truemask_v4i32:
658 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
659 ; RV32-NEXT: vluxei32.v v8, (zero), v8
662 ; RV64-LABEL: vpgather_truemask_v4i32:
664 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
665 ; RV64-NEXT: vluxei64.v v10, (zero), v8
666 ; RV64-NEXT: vmv.v.v v8, v10
668 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
669 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
670 %v = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl)
674 declare <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr>, <8 x i1>, i32)
676 define <8 x i32> @vpgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
677 ; RV32-LABEL: vpgather_v8i32:
679 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
680 ; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
683 ; RV64-LABEL: vpgather_v8i32:
685 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
686 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
687 ; RV64-NEXT: vmv.v.v v8, v12
689 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
693 define <8 x i32> @vpgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
694 ; RV32-LABEL: vpgather_baseidx_v8i8_v8i32:
696 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
697 ; RV32-NEXT: vsext.vf4 v10, v8
698 ; RV32-NEXT: vsll.vi v8, v10, 2
699 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
700 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
703 ; RV64-LABEL: vpgather_baseidx_v8i8_v8i32:
705 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
706 ; RV64-NEXT: vsext.vf8 v12, v8
707 ; RV64-NEXT: vsll.vi v12, v12, 2
708 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
709 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
711 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
712 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
716 define <8 x i32> @vpgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
717 ; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8i32:
719 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
720 ; RV32-NEXT: vsext.vf4 v10, v8
721 ; RV32-NEXT: vsll.vi v8, v10, 2
722 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
723 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
726 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8i32:
728 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
729 ; RV64-NEXT: vsext.vf8 v12, v8
730 ; RV64-NEXT: vsll.vi v12, v12, 2
731 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
732 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
734 %eidxs = sext <8 x i8> %idxs to <8 x i32>
735 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
736 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
740 define <8 x i32> @vpgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
741 ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i32:
743 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
744 ; RV32-NEXT: vzext.vf2 v9, v8
745 ; RV32-NEXT: vsll.vi v10, v9, 2
746 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
747 ; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t
750 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i32:
752 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
753 ; RV64-NEXT: vzext.vf2 v9, v8
754 ; RV64-NEXT: vsll.vi v10, v9, 2
755 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
756 ; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t
758 %eidxs = zext <8 x i8> %idxs to <8 x i32>
759 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
760 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
764 define <8 x i32> @vpgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
765 ; RV32-LABEL: vpgather_baseidx_v8i16_v8i32:
767 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
768 ; RV32-NEXT: vsext.vf2 v10, v8
769 ; RV32-NEXT: vsll.vi v8, v10, 2
770 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
771 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
774 ; RV64-LABEL: vpgather_baseidx_v8i16_v8i32:
776 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
777 ; RV64-NEXT: vsext.vf4 v12, v8
778 ; RV64-NEXT: vsll.vi v12, v12, 2
779 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
780 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
782 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
783 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
787 define <8 x i32> @vpgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
788 ; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8i32:
790 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
791 ; RV32-NEXT: vsext.vf2 v10, v8
792 ; RV32-NEXT: vsll.vi v8, v10, 2
793 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
794 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
797 ; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8i32:
799 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
800 ; RV64-NEXT: vsext.vf4 v12, v8
801 ; RV64-NEXT: vsll.vi v12, v12, 2
802 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
803 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
805 %eidxs = sext <8 x i16> %idxs to <8 x i32>
806 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
807 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
811 define <8 x i32> @vpgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
812 ; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8i32:
814 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
815 ; RV32-NEXT: vzext.vf2 v10, v8
816 ; RV32-NEXT: vsll.vi v8, v10, 2
817 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
818 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
821 ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8i32:
823 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
824 ; RV64-NEXT: vzext.vf2 v10, v8
825 ; RV64-NEXT: vsll.vi v8, v10, 2
826 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
827 ; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t
829 %eidxs = zext <8 x i16> %idxs to <8 x i32>
830 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
831 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
835 define <8 x i32> @vpgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
836 ; RV32-LABEL: vpgather_baseidx_v8i32:
838 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
839 ; RV32-NEXT: vsll.vi v8, v8, 2
840 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
841 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
844 ; RV64-LABEL: vpgather_baseidx_v8i32:
846 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
847 ; RV64-NEXT: vsext.vf2 v12, v8
848 ; RV64-NEXT: vsll.vi v12, v12, 2
849 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
850 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
852 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
853 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
857 declare <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr>, <2 x i1>, i32)
859 define <2 x i64> @vpgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
860 ; RV32-LABEL: vpgather_v2i64:
862 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
863 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
864 ; RV32-NEXT: vmv.v.v v8, v9
867 ; RV64-LABEL: vpgather_v2i64:
869 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
870 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
872 %v = call <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
876 declare <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr>, <4 x i1>, i32)
878 define <4 x i64> @vpgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
879 ; RV32-LABEL: vpgather_v4i64:
881 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
882 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
883 ; RV32-NEXT: vmv.v.v v8, v10
886 ; RV64-LABEL: vpgather_v4i64:
888 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
889 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
891 %v = call <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
895 define <4 x i64> @vpgather_truemask_v4i64(<4 x ptr> %ptrs, i32 zeroext %evl) {
896 ; RV32-LABEL: vpgather_truemask_v4i64:
898 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
899 ; RV32-NEXT: vluxei32.v v10, (zero), v8
900 ; RV32-NEXT: vmv.v.v v8, v10
903 ; RV64-LABEL: vpgather_truemask_v4i64:
905 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
906 ; RV64-NEXT: vluxei64.v v8, (zero), v8
908 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
909 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
910 %v = call <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl)
914 declare <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr>, <8 x i1>, i32)
916 define <8 x i64> @vpgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
917 ; RV32-LABEL: vpgather_v8i64:
919 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
920 ; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
921 ; RV32-NEXT: vmv.v.v v8, v12
924 ; RV64-LABEL: vpgather_v8i64:
926 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
927 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
929 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
933 define <8 x i64> @vpgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
934 ; RV32-LABEL: vpgather_baseidx_v8i8_v8i64:
936 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
937 ; RV32-NEXT: vsext.vf4 v10, v8
938 ; RV32-NEXT: vsll.vi v12, v10, 3
939 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
940 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
943 ; RV64-LABEL: vpgather_baseidx_v8i8_v8i64:
945 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
946 ; RV64-NEXT: vsext.vf8 v12, v8
947 ; RV64-NEXT: vsll.vi v8, v12, 3
948 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
949 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
951 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs
952 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
956 define <8 x i64> @vpgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
957 ; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8i64:
959 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
960 ; RV32-NEXT: vsext.vf4 v10, v8
961 ; RV32-NEXT: vsll.vi v12, v10, 3
962 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
963 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
966 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8i64:
968 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
969 ; RV64-NEXT: vsext.vf8 v12, v8
970 ; RV64-NEXT: vsll.vi v8, v12, 3
971 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
972 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
974 %eidxs = sext <8 x i8> %idxs to <8 x i64>
975 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
976 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
980 define <8 x i64> @vpgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
981 ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i64:
983 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
984 ; RV32-NEXT: vzext.vf2 v9, v8
985 ; RV32-NEXT: vsll.vi v12, v9, 3
986 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
987 ; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t
990 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i64:
992 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
993 ; RV64-NEXT: vzext.vf2 v9, v8
994 ; RV64-NEXT: vsll.vi v12, v9, 3
995 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
996 ; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t
998 %eidxs = zext <8 x i8> %idxs to <8 x i64>
999 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
1000 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1004 define <8 x i64> @vpgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1005 ; RV32-LABEL: vpgather_baseidx_v8i16_v8i64:
1007 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1008 ; RV32-NEXT: vsext.vf2 v10, v8
1009 ; RV32-NEXT: vsll.vi v12, v10, 3
1010 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1011 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1014 ; RV64-LABEL: vpgather_baseidx_v8i16_v8i64:
1016 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1017 ; RV64-NEXT: vsext.vf4 v12, v8
1018 ; RV64-NEXT: vsll.vi v8, v12, 3
1019 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1020 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1022 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs
1023 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1027 define <8 x i64> @vpgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1028 ; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8i64:
1030 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1031 ; RV32-NEXT: vsext.vf2 v10, v8
1032 ; RV32-NEXT: vsll.vi v12, v10, 3
1033 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1034 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1037 ; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8i64:
1039 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1040 ; RV64-NEXT: vsext.vf4 v12, v8
1041 ; RV64-NEXT: vsll.vi v8, v12, 3
1042 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1043 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1045 %eidxs = sext <8 x i16> %idxs to <8 x i64>
1046 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
1047 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1051 define <8 x i64> @vpgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1052 ; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8i64:
1054 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1055 ; RV32-NEXT: vzext.vf2 v10, v8
1056 ; RV32-NEXT: vsll.vi v12, v10, 3
1057 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1058 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1061 ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8i64:
1063 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1064 ; RV64-NEXT: vzext.vf2 v10, v8
1065 ; RV64-NEXT: vsll.vi v12, v10, 3
1066 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1067 ; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t
1069 %eidxs = zext <8 x i16> %idxs to <8 x i64>
1070 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
1071 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1075 define <8 x i64> @vpgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1076 ; RV32-LABEL: vpgather_baseidx_v8i32_v8i64:
1078 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1079 ; RV32-NEXT: vsll.vi v12, v8, 3
1080 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1081 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1084 ; RV64-LABEL: vpgather_baseidx_v8i32_v8i64:
1086 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1087 ; RV64-NEXT: vsext.vf2 v12, v8
1088 ; RV64-NEXT: vsll.vi v8, v12, 3
1089 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1090 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1092 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs
1093 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1097 define <8 x i64> @vpgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1098 ; RV32-LABEL: vpgather_baseidx_sext_v8i32_v8i64:
1100 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1101 ; RV32-NEXT: vsll.vi v12, v8, 3
1102 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1103 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1106 ; RV64-LABEL: vpgather_baseidx_sext_v8i32_v8i64:
1108 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1109 ; RV64-NEXT: vsext.vf2 v12, v8
1110 ; RV64-NEXT: vsll.vi v8, v12, 3
1111 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1112 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1114 %eidxs = sext <8 x i32> %idxs to <8 x i64>
1115 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
1116 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1120 define <8 x i64> @vpgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1121 ; RV32-LABEL: vpgather_baseidx_zext_v8i32_v8i64:
1123 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1124 ; RV32-NEXT: vsll.vi v12, v8, 3
1125 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1126 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1129 ; RV64-LABEL: vpgather_baseidx_zext_v8i32_v8i64:
1131 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1132 ; RV64-NEXT: vzext.vf2 v12, v8
1133 ; RV64-NEXT: vsll.vi v8, v12, 3
1134 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1135 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1137 %eidxs = zext <8 x i32> %idxs to <8 x i64>
1138 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
1139 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1143 define <8 x i64> @vpgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1144 ; RV32-LABEL: vpgather_baseidx_v8i64:
1146 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1147 ; RV32-NEXT: vnsrl.wi v12, v8, 0
1148 ; RV32-NEXT: vsll.vi v12, v12, 3
1149 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1150 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1153 ; RV64-LABEL: vpgather_baseidx_v8i64:
1155 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1156 ; RV64-NEXT: vsll.vi v8, v8, 3
1157 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1158 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1160 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
1161 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1165 declare <2 x half> @llvm.vp.gather.v2f16.v2p0(<2 x ptr>, <2 x i1>, i32)
1167 define <2 x half> @vpgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1168 ; RV32-LABEL: vpgather_v2f16:
1170 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1171 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
1172 ; RV32-NEXT: vmv1r.v v8, v9
1175 ; RV64-LABEL: vpgather_v2f16:
1177 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1178 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
1179 ; RV64-NEXT: vmv1r.v v8, v9
1181 %v = call <2 x half> @llvm.vp.gather.v2f16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1185 declare <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr>, <4 x i1>, i32)
1187 define <4 x half> @vpgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1188 ; RV32-LABEL: vpgather_v4f16:
1190 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1191 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
1192 ; RV32-NEXT: vmv1r.v v8, v9
1195 ; RV64-LABEL: vpgather_v4f16:
1197 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1198 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
1199 ; RV64-NEXT: vmv1r.v v8, v10
1201 %v = call <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1205 define <4 x half> @vpgather_truemask_v4f16(<4 x ptr> %ptrs, i32 zeroext %evl) {
1206 ; RV32-LABEL: vpgather_truemask_v4f16:
1208 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1209 ; RV32-NEXT: vluxei32.v v9, (zero), v8
1210 ; RV32-NEXT: vmv1r.v v8, v9
1213 ; RV64-LABEL: vpgather_truemask_v4f16:
1215 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1216 ; RV64-NEXT: vluxei64.v v10, (zero), v8
1217 ; RV64-NEXT: vmv1r.v v8, v10
1219 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
1220 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
1221 %v = call <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl)
1225 declare <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr>, <8 x i1>, i32)
1227 define <8 x half> @vpgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1228 ; RV32-LABEL: vpgather_v8f16:
1230 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1231 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
1232 ; RV32-NEXT: vmv.v.v v8, v10
1235 ; RV64-LABEL: vpgather_v8f16:
1237 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1238 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
1239 ; RV64-NEXT: vmv.v.v v8, v12
1241 %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1245 define <8 x half> @vpgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1246 ; RV32-LABEL: vpgather_baseidx_v8i8_v8f16:
1248 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1249 ; RV32-NEXT: vsext.vf4 v10, v8
1250 ; RV32-NEXT: vadd.vv v10, v10, v10
1251 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1252 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
1255 ; RV64-LABEL: vpgather_baseidx_v8i8_v8f16:
1257 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1258 ; RV64-NEXT: vsext.vf8 v12, v8
1259 ; RV64-NEXT: vadd.vv v12, v12, v12
1260 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1261 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1263 %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
1264 %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1268 define <8 x half> @vpgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1269 ; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8f16:
1271 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1272 ; RV32-NEXT: vsext.vf4 v10, v8
1273 ; RV32-NEXT: vadd.vv v10, v10, v10
1274 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1275 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
1278 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8f16:
1280 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1281 ; RV64-NEXT: vsext.vf8 v12, v8
1282 ; RV64-NEXT: vadd.vv v12, v12, v12
1283 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1284 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1286 %eidxs = sext <8 x i8> %idxs to <8 x i16>
1287 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
1288 %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1292 define <8 x half> @vpgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1293 ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f16:
1295 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1296 ; RV32-NEXT: vwaddu.vv v9, v8, v8
1297 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1298 ; RV32-NEXT: vluxei16.v v8, (a0), v9, v0.t
1301 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f16:
1303 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1304 ; RV64-NEXT: vwaddu.vv v9, v8, v8
1305 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1306 ; RV64-NEXT: vluxei16.v v8, (a0), v9, v0.t
1308 %eidxs = zext <8 x i8> %idxs to <8 x i16>
1309 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
1310 %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1314 define <8 x half> @vpgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1315 ; RV32-LABEL: vpgather_baseidx_v8f16:
1317 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1318 ; RV32-NEXT: vwadd.vv v10, v8, v8
1319 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1320 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
1323 ; RV64-LABEL: vpgather_baseidx_v8f16:
1325 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1326 ; RV64-NEXT: vsext.vf4 v12, v8
1327 ; RV64-NEXT: vadd.vv v12, v12, v12
1328 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1329 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1331 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
1332 %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1336 declare <2 x float> @llvm.vp.gather.v2f32.v2p0(<2 x ptr>, <2 x i1>, i32)
1338 define <2 x float> @vpgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1339 ; RV32-LABEL: vpgather_v2f32:
1341 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1342 ; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
1345 ; RV64-LABEL: vpgather_v2f32:
1347 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1348 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
1349 ; RV64-NEXT: vmv1r.v v8, v9
1351 %v = call <2 x float> @llvm.vp.gather.v2f32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1355 declare <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr>, <4 x i1>, i32)
1357 define <4 x float> @vpgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1358 ; RV32-LABEL: vpgather_v4f32:
1360 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1361 ; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
1364 ; RV64-LABEL: vpgather_v4f32:
1366 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1367 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
1368 ; RV64-NEXT: vmv.v.v v8, v10
1370 %v = call <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1374 define <4 x float> @vpgather_truemask_v4f32(<4 x ptr> %ptrs, i32 zeroext %evl) {
1375 ; RV32-LABEL: vpgather_truemask_v4f32:
1377 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1378 ; RV32-NEXT: vluxei32.v v8, (zero), v8
1381 ; RV64-LABEL: vpgather_truemask_v4f32:
1383 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1384 ; RV64-NEXT: vluxei64.v v10, (zero), v8
1385 ; RV64-NEXT: vmv.v.v v8, v10
1387 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
1388 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
1389 %v = call <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl)
1393 declare <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr>, <8 x i1>, i32)
1395 define <8 x float> @vpgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1396 ; RV32-LABEL: vpgather_v8f32:
1398 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1399 ; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
1402 ; RV64-LABEL: vpgather_v8f32:
1404 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1405 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
1406 ; RV64-NEXT: vmv.v.v v8, v12
1408 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1412 define <8 x float> @vpgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1413 ; RV32-LABEL: vpgather_baseidx_v8i8_v8f32:
1415 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1416 ; RV32-NEXT: vsext.vf4 v10, v8
1417 ; RV32-NEXT: vsll.vi v8, v10, 2
1418 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1419 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
1422 ; RV64-LABEL: vpgather_baseidx_v8i8_v8f32:
1424 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1425 ; RV64-NEXT: vsext.vf8 v12, v8
1426 ; RV64-NEXT: vsll.vi v12, v12, 2
1427 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1428 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1430 %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
1431 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1435 define <8 x float> @vpgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1436 ; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8f32:
1438 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1439 ; RV32-NEXT: vsext.vf4 v10, v8
1440 ; RV32-NEXT: vsll.vi v8, v10, 2
1441 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1442 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
1445 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8f32:
1447 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1448 ; RV64-NEXT: vsext.vf8 v12, v8
1449 ; RV64-NEXT: vsll.vi v12, v12, 2
1450 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1451 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1453 %eidxs = sext <8 x i8> %idxs to <8 x i32>
1454 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1455 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1459 define <8 x float> @vpgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1460 ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f32:
1462 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1463 ; RV32-NEXT: vzext.vf2 v9, v8
1464 ; RV32-NEXT: vsll.vi v10, v9, 2
1465 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1466 ; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t
1469 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f32:
1471 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1472 ; RV64-NEXT: vzext.vf2 v9, v8
1473 ; RV64-NEXT: vsll.vi v10, v9, 2
1474 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1475 ; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t
1477 %eidxs = zext <8 x i8> %idxs to <8 x i32>
1478 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1479 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1483 define <8 x float> @vpgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1484 ; RV32-LABEL: vpgather_baseidx_v8i16_v8f32:
1486 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1487 ; RV32-NEXT: vsext.vf2 v10, v8
1488 ; RV32-NEXT: vsll.vi v8, v10, 2
1489 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1490 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
1493 ; RV64-LABEL: vpgather_baseidx_v8i16_v8f32:
1495 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1496 ; RV64-NEXT: vsext.vf4 v12, v8
1497 ; RV64-NEXT: vsll.vi v12, v12, 2
1498 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1499 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1501 %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
1502 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1506 define <8 x float> @vpgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1507 ; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8f32:
1509 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1510 ; RV32-NEXT: vsext.vf2 v10, v8
1511 ; RV32-NEXT: vsll.vi v8, v10, 2
1512 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1513 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
1516 ; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8f32:
1518 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1519 ; RV64-NEXT: vsext.vf4 v12, v8
1520 ; RV64-NEXT: vsll.vi v12, v12, 2
1521 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1522 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1524 %eidxs = sext <8 x i16> %idxs to <8 x i32>
1525 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1526 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1530 define <8 x float> @vpgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1531 ; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8f32:
1533 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1534 ; RV32-NEXT: vzext.vf2 v10, v8
1535 ; RV32-NEXT: vsll.vi v8, v10, 2
1536 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1537 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
1540 ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8f32:
1542 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1543 ; RV64-NEXT: vzext.vf2 v10, v8
1544 ; RV64-NEXT: vsll.vi v8, v10, 2
1545 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1546 ; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t
1548 %eidxs = zext <8 x i16> %idxs to <8 x i32>
1549 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1550 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1554 define <8 x float> @vpgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1555 ; RV32-LABEL: vpgather_baseidx_v8f32:
1557 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1558 ; RV32-NEXT: vsll.vi v8, v8, 2
1559 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1560 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
1563 ; RV64-LABEL: vpgather_baseidx_v8f32:
1565 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1566 ; RV64-NEXT: vsext.vf2 v12, v8
1567 ; RV64-NEXT: vsll.vi v12, v12, 2
1568 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1569 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1571 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
1572 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1576 declare <2 x double> @llvm.vp.gather.v2f64.v2p0(<2 x ptr>, <2 x i1>, i32)
1578 define <2 x double> @vpgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1579 ; RV32-LABEL: vpgather_v2f64:
1581 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1582 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
1583 ; RV32-NEXT: vmv.v.v v8, v9
1586 ; RV64-LABEL: vpgather_v2f64:
1588 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1589 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
1591 %v = call <2 x double> @llvm.vp.gather.v2f64.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1595 declare <4 x double> @llvm.vp.gather.v4f64.v4p0(<4 x ptr>, <4 x i1>, i32)
1597 define <4 x double> @vpgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1598 ; RV32-LABEL: vpgather_v4f64:
1600 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1601 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
1602 ; RV32-NEXT: vmv.v.v v8, v10
1605 ; RV64-LABEL: vpgather_v4f64:
1607 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1608 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
1610 %v = call <4 x double> @llvm.vp.gather.v4f64.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1614 define <4 x double> @vpgather_truemask_v4f64(<4 x ptr> %ptrs, i32 zeroext %evl) {
1615 ; RV32-LABEL: vpgather_truemask_v4f64:
1617 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1618 ; RV32-NEXT: vluxei32.v v10, (zero), v8
1619 ; RV32-NEXT: vmv.v.v v8, v10
1622 ; RV64-LABEL: vpgather_truemask_v4f64:
1624 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1625 ; RV64-NEXT: vluxei64.v v8, (zero), v8
1627 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
1628 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
1629 %v = call <4 x double> @llvm.vp.gather.v4f64.v4p0(<4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl)
1633 declare <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr>, <8 x i1>, i32)
1635 define <8 x double> @vpgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1636 ; RV32-LABEL: vpgather_v8f64:
1638 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1639 ; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
1640 ; RV32-NEXT: vmv.v.v v8, v12
1643 ; RV64-LABEL: vpgather_v8f64:
1645 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1646 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
1648 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1652 define <8 x double> @vpgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1653 ; RV32-LABEL: vpgather_baseidx_v8i8_v8f64:
1655 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1656 ; RV32-NEXT: vsext.vf4 v10, v8
1657 ; RV32-NEXT: vsll.vi v12, v10, 3
1658 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1659 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1662 ; RV64-LABEL: vpgather_baseidx_v8i8_v8f64:
1664 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1665 ; RV64-NEXT: vsext.vf8 v12, v8
1666 ; RV64-NEXT: vsll.vi v8, v12, 3
1667 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1668 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1670 %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
1671 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1675 define <8 x double> @vpgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1676 ; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8f64:
1678 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1679 ; RV32-NEXT: vsext.vf4 v10, v8
1680 ; RV32-NEXT: vsll.vi v12, v10, 3
1681 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1682 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1685 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8f64:
1687 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1688 ; RV64-NEXT: vsext.vf8 v12, v8
1689 ; RV64-NEXT: vsll.vi v8, v12, 3
1690 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1691 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1693 %eidxs = sext <8 x i8> %idxs to <8 x i64>
1694 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1695 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1699 define <8 x double> @vpgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1700 ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f64:
1702 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1703 ; RV32-NEXT: vzext.vf2 v9, v8
1704 ; RV32-NEXT: vsll.vi v12, v9, 3
1705 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1706 ; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t
1709 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f64:
1711 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1712 ; RV64-NEXT: vzext.vf2 v9, v8
1713 ; RV64-NEXT: vsll.vi v12, v9, 3
1714 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1715 ; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t
1717 %eidxs = zext <8 x i8> %idxs to <8 x i64>
1718 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1719 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1723 define <8 x double> @vpgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1724 ; RV32-LABEL: vpgather_baseidx_v8i16_v8f64:
1726 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1727 ; RV32-NEXT: vsext.vf2 v10, v8
1728 ; RV32-NEXT: vsll.vi v12, v10, 3
1729 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1730 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1733 ; RV64-LABEL: vpgather_baseidx_v8i16_v8f64:
1735 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1736 ; RV64-NEXT: vsext.vf4 v12, v8
1737 ; RV64-NEXT: vsll.vi v8, v12, 3
1738 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1739 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1741 %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
1742 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1746 define <8 x double> @vpgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1747 ; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8f64:
1749 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1750 ; RV32-NEXT: vsext.vf2 v10, v8
1751 ; RV32-NEXT: vsll.vi v12, v10, 3
1752 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1753 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1756 ; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8f64:
1758 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1759 ; RV64-NEXT: vsext.vf4 v12, v8
1760 ; RV64-NEXT: vsll.vi v8, v12, 3
1761 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1762 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1764 %eidxs = sext <8 x i16> %idxs to <8 x i64>
1765 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1766 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1770 define <8 x double> @vpgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1771 ; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8f64:
1773 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1774 ; RV32-NEXT: vzext.vf2 v10, v8
1775 ; RV32-NEXT: vsll.vi v12, v10, 3
1776 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1777 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1780 ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8f64:
1782 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1783 ; RV64-NEXT: vzext.vf2 v10, v8
1784 ; RV64-NEXT: vsll.vi v12, v10, 3
1785 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1786 ; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t
1788 %eidxs = zext <8 x i16> %idxs to <8 x i64>
1789 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1790 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1794 define <8 x double> @vpgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1795 ; RV32-LABEL: vpgather_baseidx_v8i32_v8f64:
1797 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1798 ; RV32-NEXT: vsll.vi v12, v8, 3
1799 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1800 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1803 ; RV64-LABEL: vpgather_baseidx_v8i32_v8f64:
1805 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1806 ; RV64-NEXT: vsext.vf2 v12, v8
1807 ; RV64-NEXT: vsll.vi v8, v12, 3
1808 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1809 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1811 %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
1812 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1816 define <8 x double> @vpgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1817 ; RV32-LABEL: vpgather_baseidx_sext_v8i32_v8f64:
1819 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1820 ; RV32-NEXT: vsll.vi v12, v8, 3
1821 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1822 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1825 ; RV64-LABEL: vpgather_baseidx_sext_v8i32_v8f64:
1827 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1828 ; RV64-NEXT: vsext.vf2 v12, v8
1829 ; RV64-NEXT: vsll.vi v8, v12, 3
1830 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1831 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1833 %eidxs = sext <8 x i32> %idxs to <8 x i64>
1834 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1835 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1839 define <8 x double> @vpgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1840 ; RV32-LABEL: vpgather_baseidx_zext_v8i32_v8f64:
1842 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1843 ; RV32-NEXT: vsll.vi v12, v8, 3
1844 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1845 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1848 ; RV64-LABEL: vpgather_baseidx_zext_v8i32_v8f64:
1850 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1851 ; RV64-NEXT: vzext.vf2 v12, v8
1852 ; RV64-NEXT: vsll.vi v8, v12, 3
1853 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1854 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1856 %eidxs = zext <8 x i32> %idxs to <8 x i64>
1857 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1858 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1862 define <8 x double> @vpgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1863 ; RV32-LABEL: vpgather_baseidx_v8f64:
1865 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1866 ; RV32-NEXT: vnsrl.wi v12, v8, 0
1867 ; RV32-NEXT: vsll.vi v12, v12, 3
1868 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1869 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1872 ; RV64-LABEL: vpgather_baseidx_v8f64:
1874 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1875 ; RV64-NEXT: vsll.vi v8, v8, 3
1876 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1877 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1879 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
1880 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1884 declare <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr>, <32 x i1>, i32)
1886 define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroext %evl) {
1887 ; RV32-LABEL: vpgather_v32f64:
1889 ; RV32-NEXT: li a2, 16
1890 ; RV32-NEXT: mv a1, a0
1891 ; RV32-NEXT: bltu a0, a2, .LBB86_2
1892 ; RV32-NEXT: # %bb.1:
1893 ; RV32-NEXT: li a1, 16
1894 ; RV32-NEXT: .LBB86_2:
1895 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1896 ; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t
1897 ; RV32-NEXT: addi a1, a0, -16
1898 ; RV32-NEXT: sltu a0, a0, a1
1899 ; RV32-NEXT: addi a0, a0, -1
1900 ; RV32-NEXT: and a0, a0, a1
1901 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1902 ; RV32-NEXT: vslidedown.vi v8, v8, 16
1903 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1904 ; RV32-NEXT: vslidedown.vi v0, v0, 2
1905 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1906 ; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t
1907 ; RV32-NEXT: vmv8r.v v8, v24
1910 ; RV64-LABEL: vpgather_v32f64:
1912 ; RV64-NEXT: li a2, 16
1913 ; RV64-NEXT: mv a1, a0
1914 ; RV64-NEXT: bltu a0, a2, .LBB86_2
1915 ; RV64-NEXT: # %bb.1:
1916 ; RV64-NEXT: li a1, 16
1917 ; RV64-NEXT: .LBB86_2:
1918 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1919 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
1920 ; RV64-NEXT: addi a1, a0, -16
1921 ; RV64-NEXT: sltu a0, a0, a1
1922 ; RV64-NEXT: addi a0, a0, -1
1923 ; RV64-NEXT: and a0, a0, a1
1924 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1925 ; RV64-NEXT: vslidedown.vi v0, v0, 2
1926 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1927 ; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t
1929 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
1930 ret <32 x double> %v
1933 define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
1934 ; RV32-LABEL: vpgather_baseidx_v32i8_v32f64:
1936 ; RV32-NEXT: li a2, 32
1937 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
1938 ; RV32-NEXT: vsext.vf4 v16, v8
1939 ; RV32-NEXT: li a3, 16
1940 ; RV32-NEXT: vsll.vi v16, v16, 3
1941 ; RV32-NEXT: mv a2, a1
1942 ; RV32-NEXT: bltu a1, a3, .LBB87_2
1943 ; RV32-NEXT: # %bb.1:
1944 ; RV32-NEXT: li a2, 16
1945 ; RV32-NEXT: .LBB87_2:
1946 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
1947 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
1948 ; RV32-NEXT: addi a2, a1, -16
1949 ; RV32-NEXT: sltu a1, a1, a2
1950 ; RV32-NEXT: addi a1, a1, -1
1951 ; RV32-NEXT: and a1, a1, a2
1952 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
1953 ; RV32-NEXT: vslidedown.vi v24, v16, 16
1954 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1955 ; RV32-NEXT: vslidedown.vi v0, v0, 2
1956 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1957 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
1960 ; RV64-LABEL: vpgather_baseidx_v32i8_v32f64:
1962 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
1963 ; RV64-NEXT: vslidedown.vi v10, v8, 16
1964 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
1965 ; RV64-NEXT: vsext.vf8 v16, v10
1966 ; RV64-NEXT: vsll.vi v16, v16, 3
1967 ; RV64-NEXT: vsext.vf8 v24, v8
1968 ; RV64-NEXT: li a3, 16
1969 ; RV64-NEXT: vsll.vi v8, v24, 3
1970 ; RV64-NEXT: mv a2, a1
1971 ; RV64-NEXT: bltu a1, a3, .LBB87_2
1972 ; RV64-NEXT: # %bb.1:
1973 ; RV64-NEXT: li a2, 16
1974 ; RV64-NEXT: .LBB87_2:
1975 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
1976 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1977 ; RV64-NEXT: addi a2, a1, -16
1978 ; RV64-NEXT: sltu a1, a1, a2
1979 ; RV64-NEXT: addi a1, a1, -1
1980 ; RV64-NEXT: and a1, a1, a2
1981 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1982 ; RV64-NEXT: vslidedown.vi v0, v0, 2
1983 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
1984 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
1986 %ptrs = getelementptr inbounds double, ptr %base, <32 x i8> %idxs
1987 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
1988 ret <32 x double> %v
1991 define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
1992 ; RV32-LABEL: vpgather_baseidx_sext_v32i8_v32f64:
1994 ; RV32-NEXT: li a2, 32
1995 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
1996 ; RV32-NEXT: vsext.vf4 v16, v8
1997 ; RV32-NEXT: li a3, 16
1998 ; RV32-NEXT: vsll.vi v16, v16, 3
1999 ; RV32-NEXT: mv a2, a1
2000 ; RV32-NEXT: bltu a1, a3, .LBB88_2
2001 ; RV32-NEXT: # %bb.1:
2002 ; RV32-NEXT: li a2, 16
2003 ; RV32-NEXT: .LBB88_2:
2004 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2005 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
2006 ; RV32-NEXT: addi a2, a1, -16
2007 ; RV32-NEXT: sltu a1, a1, a2
2008 ; RV32-NEXT: addi a1, a1, -1
2009 ; RV32-NEXT: and a1, a1, a2
2010 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2011 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2012 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2013 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2014 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2015 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
2018 ; RV64-LABEL: vpgather_baseidx_sext_v32i8_v32f64:
2020 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2021 ; RV64-NEXT: vsext.vf8 v24, v8
2022 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
2023 ; RV64-NEXT: vslidedown.vi v8, v8, 16
2024 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2025 ; RV64-NEXT: vsext.vf8 v16, v8
2026 ; RV64-NEXT: vsll.vi v16, v16, 3
2027 ; RV64-NEXT: li a3, 16
2028 ; RV64-NEXT: vsll.vi v8, v24, 3
2029 ; RV64-NEXT: mv a2, a1
2030 ; RV64-NEXT: bltu a1, a3, .LBB88_2
2031 ; RV64-NEXT: # %bb.1:
2032 ; RV64-NEXT: li a2, 16
2033 ; RV64-NEXT: .LBB88_2:
2034 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2035 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2036 ; RV64-NEXT: addi a2, a1, -16
2037 ; RV64-NEXT: sltu a1, a1, a2
2038 ; RV64-NEXT: addi a1, a1, -1
2039 ; RV64-NEXT: and a1, a1, a2
2040 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2041 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2042 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2043 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
2045 %eidxs = sext <32 x i8> %idxs to <32 x i64>
2046 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2047 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2048 ret <32 x double> %v
2051 define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2052 ; RV32-LABEL: vpgather_baseidx_zext_v32i8_v32f64:
2054 ; RV32-NEXT: li a2, 32
2055 ; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma
2056 ; RV32-NEXT: vzext.vf2 v12, v8
2057 ; RV32-NEXT: li a3, 16
2058 ; RV32-NEXT: vsll.vi v16, v12, 3
2059 ; RV32-NEXT: mv a2, a1
2060 ; RV32-NEXT: bltu a1, a3, .LBB89_2
2061 ; RV32-NEXT: # %bb.1:
2062 ; RV32-NEXT: li a2, 16
2063 ; RV32-NEXT: .LBB89_2:
2064 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2065 ; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t
2066 ; RV32-NEXT: addi a2, a1, -16
2067 ; RV32-NEXT: sltu a1, a1, a2
2068 ; RV32-NEXT: addi a1, a1, -1
2069 ; RV32-NEXT: and a1, a1, a2
2070 ; RV32-NEXT: vsetivli zero, 16, e16, m4, ta, ma
2071 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2072 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2073 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2074 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2075 ; RV32-NEXT: vluxei16.v v16, (a0), v24, v0.t
2078 ; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64:
2080 ; RV64-NEXT: li a2, 32
2081 ; RV64-NEXT: vsetvli zero, a2, e16, m4, ta, ma
2082 ; RV64-NEXT: vzext.vf2 v12, v8
2083 ; RV64-NEXT: li a3, 16
2084 ; RV64-NEXT: vsll.vi v16, v12, 3
2085 ; RV64-NEXT: mv a2, a1
2086 ; RV64-NEXT: bltu a1, a3, .LBB89_2
2087 ; RV64-NEXT: # %bb.1:
2088 ; RV64-NEXT: li a2, 16
2089 ; RV64-NEXT: .LBB89_2:
2090 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2091 ; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t
2092 ; RV64-NEXT: addi a2, a1, -16
2093 ; RV64-NEXT: sltu a1, a1, a2
2094 ; RV64-NEXT: addi a1, a1, -1
2095 ; RV64-NEXT: and a1, a1, a2
2096 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma
2097 ; RV64-NEXT: vslidedown.vi v24, v16, 16
2098 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2099 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2100 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2101 ; RV64-NEXT: vluxei16.v v16, (a0), v24, v0.t
2103 %eidxs = zext <32 x i8> %idxs to <32 x i64>
2104 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2105 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2106 ret <32 x double> %v
2109 define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2110 ; RV32-LABEL: vpgather_baseidx_v32i16_v32f64:
2112 ; RV32-NEXT: li a2, 32
2113 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2114 ; RV32-NEXT: vsext.vf2 v16, v8
2115 ; RV32-NEXT: li a3, 16
2116 ; RV32-NEXT: vsll.vi v16, v16, 3
2117 ; RV32-NEXT: mv a2, a1
2118 ; RV32-NEXT: bltu a1, a3, .LBB90_2
2119 ; RV32-NEXT: # %bb.1:
2120 ; RV32-NEXT: li a2, 16
2121 ; RV32-NEXT: .LBB90_2:
2122 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2123 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
2124 ; RV32-NEXT: addi a2, a1, -16
2125 ; RV32-NEXT: sltu a1, a1, a2
2126 ; RV32-NEXT: addi a1, a1, -1
2127 ; RV32-NEXT: and a1, a1, a2
2128 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2129 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2130 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2131 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2132 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2133 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
2136 ; RV64-LABEL: vpgather_baseidx_v32i16_v32f64:
2138 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma
2139 ; RV64-NEXT: vslidedown.vi v12, v8, 16
2140 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2141 ; RV64-NEXT: vsext.vf4 v16, v12
2142 ; RV64-NEXT: vsll.vi v16, v16, 3
2143 ; RV64-NEXT: vsext.vf4 v24, v8
2144 ; RV64-NEXT: li a3, 16
2145 ; RV64-NEXT: vsll.vi v8, v24, 3
2146 ; RV64-NEXT: mv a2, a1
2147 ; RV64-NEXT: bltu a1, a3, .LBB90_2
2148 ; RV64-NEXT: # %bb.1:
2149 ; RV64-NEXT: li a2, 16
2150 ; RV64-NEXT: .LBB90_2:
2151 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2152 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2153 ; RV64-NEXT: addi a2, a1, -16
2154 ; RV64-NEXT: sltu a1, a1, a2
2155 ; RV64-NEXT: addi a1, a1, -1
2156 ; RV64-NEXT: and a1, a1, a2
2157 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2158 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2159 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2160 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
2162 %ptrs = getelementptr inbounds double, ptr %base, <32 x i16> %idxs
2163 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2164 ret <32 x double> %v
2167 define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2168 ; RV32-LABEL: vpgather_baseidx_sext_v32i16_v32f64:
2170 ; RV32-NEXT: li a2, 32
2171 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2172 ; RV32-NEXT: vsext.vf2 v16, v8
2173 ; RV32-NEXT: li a3, 16
2174 ; RV32-NEXT: vsll.vi v16, v16, 3
2175 ; RV32-NEXT: mv a2, a1
2176 ; RV32-NEXT: bltu a1, a3, .LBB91_2
2177 ; RV32-NEXT: # %bb.1:
2178 ; RV32-NEXT: li a2, 16
2179 ; RV32-NEXT: .LBB91_2:
2180 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2181 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
2182 ; RV32-NEXT: addi a2, a1, -16
2183 ; RV32-NEXT: sltu a1, a1, a2
2184 ; RV32-NEXT: addi a1, a1, -1
2185 ; RV32-NEXT: and a1, a1, a2
2186 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2187 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2188 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2189 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2190 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2191 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
2194 ; RV64-LABEL: vpgather_baseidx_sext_v32i16_v32f64:
2196 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2197 ; RV64-NEXT: vsext.vf4 v24, v8
2198 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma
2199 ; RV64-NEXT: vslidedown.vi v8, v8, 16
2200 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2201 ; RV64-NEXT: vsext.vf4 v16, v8
2202 ; RV64-NEXT: vsll.vi v16, v16, 3
2203 ; RV64-NEXT: li a3, 16
2204 ; RV64-NEXT: vsll.vi v8, v24, 3
2205 ; RV64-NEXT: mv a2, a1
2206 ; RV64-NEXT: bltu a1, a3, .LBB91_2
2207 ; RV64-NEXT: # %bb.1:
2208 ; RV64-NEXT: li a2, 16
2209 ; RV64-NEXT: .LBB91_2:
2210 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2211 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2212 ; RV64-NEXT: addi a2, a1, -16
2213 ; RV64-NEXT: sltu a1, a1, a2
2214 ; RV64-NEXT: addi a1, a1, -1
2215 ; RV64-NEXT: and a1, a1, a2
2216 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2217 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2218 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2219 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
2221 %eidxs = sext <32 x i16> %idxs to <32 x i64>
2222 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2223 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2224 ret <32 x double> %v
2227 define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2228 ; RV32-LABEL: vpgather_baseidx_zext_v32i16_v32f64:
2230 ; RV32-NEXT: li a2, 32
2231 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2232 ; RV32-NEXT: vzext.vf2 v16, v8
2233 ; RV32-NEXT: li a3, 16
2234 ; RV32-NEXT: vsll.vi v16, v16, 3
2235 ; RV32-NEXT: mv a2, a1
2236 ; RV32-NEXT: bltu a1, a3, .LBB92_2
2237 ; RV32-NEXT: # %bb.1:
2238 ; RV32-NEXT: li a2, 16
2239 ; RV32-NEXT: .LBB92_2:
2240 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2241 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
2242 ; RV32-NEXT: addi a2, a1, -16
2243 ; RV32-NEXT: sltu a1, a1, a2
2244 ; RV32-NEXT: addi a1, a1, -1
2245 ; RV32-NEXT: and a1, a1, a2
2246 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2247 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2248 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2249 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2250 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2251 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
2254 ; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64:
2256 ; RV64-NEXT: li a2, 32
2257 ; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2258 ; RV64-NEXT: vzext.vf2 v16, v8
2259 ; RV64-NEXT: li a3, 16
2260 ; RV64-NEXT: vsll.vi v16, v16, 3
2261 ; RV64-NEXT: mv a2, a1
2262 ; RV64-NEXT: bltu a1, a3, .LBB92_2
2263 ; RV64-NEXT: # %bb.1:
2264 ; RV64-NEXT: li a2, 16
2265 ; RV64-NEXT: .LBB92_2:
2266 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2267 ; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t
2268 ; RV64-NEXT: addi a2, a1, -16
2269 ; RV64-NEXT: sltu a1, a1, a2
2270 ; RV64-NEXT: addi a1, a1, -1
2271 ; RV64-NEXT: and a1, a1, a2
2272 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2273 ; RV64-NEXT: vslidedown.vi v24, v16, 16
2274 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2275 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2276 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2277 ; RV64-NEXT: vluxei32.v v16, (a0), v24, v0.t
2279 %eidxs = zext <32 x i16> %idxs to <32 x i64>
2280 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2281 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2282 ret <32 x double> %v
2285 define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2286 ; RV32-LABEL: vpgather_baseidx_v32i32_v32f64:
2288 ; RV32-NEXT: li a2, 32
2289 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2290 ; RV32-NEXT: li a3, 16
2291 ; RV32-NEXT: vsll.vi v16, v8, 3
2292 ; RV32-NEXT: mv a2, a1
2293 ; RV32-NEXT: bltu a1, a3, .LBB93_2
2294 ; RV32-NEXT: # %bb.1:
2295 ; RV32-NEXT: li a2, 16
2296 ; RV32-NEXT: .LBB93_2:
2297 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2298 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
2299 ; RV32-NEXT: addi a2, a1, -16
2300 ; RV32-NEXT: sltu a1, a1, a2
2301 ; RV32-NEXT: addi a1, a1, -1
2302 ; RV32-NEXT: and a1, a1, a2
2303 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2304 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2305 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2306 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2307 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2308 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
2311 ; RV64-LABEL: vpgather_baseidx_v32i32_v32f64:
2313 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2314 ; RV64-NEXT: vslidedown.vi v16, v8, 16
2315 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2316 ; RV64-NEXT: vsext.vf2 v24, v16
2317 ; RV64-NEXT: vsll.vi v16, v24, 3
2318 ; RV64-NEXT: vsext.vf2 v24, v8
2319 ; RV64-NEXT: li a3, 16
2320 ; RV64-NEXT: vsll.vi v8, v24, 3
2321 ; RV64-NEXT: mv a2, a1
2322 ; RV64-NEXT: bltu a1, a3, .LBB93_2
2323 ; RV64-NEXT: # %bb.1:
2324 ; RV64-NEXT: li a2, 16
2325 ; RV64-NEXT: .LBB93_2:
2326 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2327 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2328 ; RV64-NEXT: addi a2, a1, -16
2329 ; RV64-NEXT: sltu a1, a1, a2
2330 ; RV64-NEXT: addi a1, a1, -1
2331 ; RV64-NEXT: and a1, a1, a2
2332 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2333 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2334 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2335 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
2337 %ptrs = getelementptr inbounds double, ptr %base, <32 x i32> %idxs
2338 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2339 ret <32 x double> %v
2342 define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2343 ; RV32-LABEL: vpgather_baseidx_sext_v32i32_v32f64:
2345 ; RV32-NEXT: li a2, 32
2346 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2347 ; RV32-NEXT: li a3, 16
2348 ; RV32-NEXT: vsll.vi v16, v8, 3
2349 ; RV32-NEXT: mv a2, a1
2350 ; RV32-NEXT: bltu a1, a3, .LBB94_2
2351 ; RV32-NEXT: # %bb.1:
2352 ; RV32-NEXT: li a2, 16
2353 ; RV32-NEXT: .LBB94_2:
2354 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2355 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
2356 ; RV32-NEXT: addi a2, a1, -16
2357 ; RV32-NEXT: sltu a1, a1, a2
2358 ; RV32-NEXT: addi a1, a1, -1
2359 ; RV32-NEXT: and a1, a1, a2
2360 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2361 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2362 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2363 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2364 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2365 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
2368 ; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64:
2370 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2371 ; RV64-NEXT: vsext.vf2 v24, v8
2372 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2373 ; RV64-NEXT: vslidedown.vi v8, v8, 16
2374 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2375 ; RV64-NEXT: vsext.vf2 v16, v8
2376 ; RV64-NEXT: vsll.vi v16, v16, 3
2377 ; RV64-NEXT: li a3, 16
2378 ; RV64-NEXT: vsll.vi v8, v24, 3
2379 ; RV64-NEXT: mv a2, a1
2380 ; RV64-NEXT: bltu a1, a3, .LBB94_2
2381 ; RV64-NEXT: # %bb.1:
2382 ; RV64-NEXT: li a2, 16
2383 ; RV64-NEXT: .LBB94_2:
2384 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2385 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2386 ; RV64-NEXT: addi a2, a1, -16
2387 ; RV64-NEXT: sltu a1, a1, a2
2388 ; RV64-NEXT: addi a1, a1, -1
2389 ; RV64-NEXT: and a1, a1, a2
2390 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2391 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2392 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2393 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
2395 %eidxs = sext <32 x i32> %idxs to <32 x i64>
2396 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2397 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2398 ret <32 x double> %v
2401 define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2402 ; RV32-LABEL: vpgather_baseidx_zext_v32i32_v32f64:
2404 ; RV32-NEXT: li a2, 32
2405 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2406 ; RV32-NEXT: li a3, 16
2407 ; RV32-NEXT: vsll.vi v16, v8, 3
2408 ; RV32-NEXT: mv a2, a1
2409 ; RV32-NEXT: bltu a1, a3, .LBB95_2
2410 ; RV32-NEXT: # %bb.1:
2411 ; RV32-NEXT: li a2, 16
2412 ; RV32-NEXT: .LBB95_2:
2413 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2414 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
2415 ; RV32-NEXT: addi a2, a1, -16
2416 ; RV32-NEXT: sltu a1, a1, a2
2417 ; RV32-NEXT: addi a1, a1, -1
2418 ; RV32-NEXT: and a1, a1, a2
2419 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2420 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2421 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2422 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2423 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2424 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
2427 ; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64:
2429 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2430 ; RV64-NEXT: vzext.vf2 v24, v8
2431 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2432 ; RV64-NEXT: vslidedown.vi v8, v8, 16
2433 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2434 ; RV64-NEXT: vzext.vf2 v16, v8
2435 ; RV64-NEXT: vsll.vi v16, v16, 3
2436 ; RV64-NEXT: li a3, 16
2437 ; RV64-NEXT: vsll.vi v8, v24, 3
2438 ; RV64-NEXT: mv a2, a1
2439 ; RV64-NEXT: bltu a1, a3, .LBB95_2
2440 ; RV64-NEXT: # %bb.1:
2441 ; RV64-NEXT: li a2, 16
2442 ; RV64-NEXT: .LBB95_2:
2443 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2444 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2445 ; RV64-NEXT: addi a2, a1, -16
2446 ; RV64-NEXT: sltu a1, a1, a2
2447 ; RV64-NEXT: addi a1, a1, -1
2448 ; RV64-NEXT: and a1, a1, a2
2449 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2450 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2451 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2452 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
2454 %eidxs = zext <32 x i32> %idxs to <32 x i64>
2455 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2456 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2457 ret <32 x double> %v
2460 define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2461 ; RV32-LABEL: vpgather_baseidx_v32f64:
2463 ; RV32-NEXT: vmv1r.v v1, v0
2464 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
2465 ; RV32-NEXT: vnsrl.wi v24, v16, 0
2466 ; RV32-NEXT: vnsrl.wi v16, v8, 0
2467 ; RV32-NEXT: li a2, 32
2468 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2469 ; RV32-NEXT: vslideup.vi v16, v24, 16
2470 ; RV32-NEXT: vsll.vi v24, v16, 3
2471 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2472 ; RV32-NEXT: vslidedown.vi v8, v24, 16
2473 ; RV32-NEXT: addi a2, a1, -16
2474 ; RV32-NEXT: sltu a3, a1, a2
2475 ; RV32-NEXT: addi a3, a3, -1
2476 ; RV32-NEXT: and a2, a3, a2
2477 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2478 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2479 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2480 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
2481 ; RV32-NEXT: li a2, 16
2482 ; RV32-NEXT: bltu a1, a2, .LBB96_2
2483 ; RV32-NEXT: # %bb.1:
2484 ; RV32-NEXT: li a1, 16
2485 ; RV32-NEXT: .LBB96_2:
2486 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2487 ; RV32-NEXT: vmv1r.v v0, v1
2488 ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
2491 ; RV64-LABEL: vpgather_baseidx_v32f64:
2493 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2494 ; RV64-NEXT: vsll.vi v16, v16, 3
2495 ; RV64-NEXT: li a3, 16
2496 ; RV64-NEXT: vsll.vi v8, v8, 3
2497 ; RV64-NEXT: mv a2, a1
2498 ; RV64-NEXT: bltu a1, a3, .LBB96_2
2499 ; RV64-NEXT: # %bb.1:
2500 ; RV64-NEXT: li a2, 16
2501 ; RV64-NEXT: .LBB96_2:
2502 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2503 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2504 ; RV64-NEXT: addi a2, a1, -16
2505 ; RV64-NEXT: sltu a1, a1, a2
2506 ; RV64-NEXT: addi a1, a1, -1
2507 ; RV64-NEXT: and a1, a1, a2
2508 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2509 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2510 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2511 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
2513 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %idxs
2514 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2515 ret <32 x double> %v