1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
6 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
11 declare <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr>, <2 x i1>, i32)
13 define <2 x i8> @vpgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
14 ; RV32-LABEL: vpgather_v2i8:
16 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
17 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
18 ; RV32-NEXT: vmv1r.v v8, v9
21 ; RV64-LABEL: vpgather_v2i8:
23 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
24 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
25 ; RV64-NEXT: vmv1r.v v8, v9
27 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
31 define <2 x i16> @vpgather_v2i8_sextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
32 ; RV32-LABEL: vpgather_v2i8_sextload_v2i16:
34 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
35 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
36 ; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
37 ; RV32-NEXT: vsext.vf2 v8, v9
40 ; RV64-LABEL: vpgather_v2i8_sextload_v2i16:
42 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
43 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
44 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
45 ; RV64-NEXT: vsext.vf2 v8, v9
47 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
48 %ev = sext <2 x i8> %v to <2 x i16>
52 define <2 x i16> @vpgather_v2i8_zextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
53 ; RV32-LABEL: vpgather_v2i8_zextload_v2i16:
55 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
56 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
57 ; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
58 ; RV32-NEXT: vzext.vf2 v8, v9
61 ; RV64-LABEL: vpgather_v2i8_zextload_v2i16:
63 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
64 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
65 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
66 ; RV64-NEXT: vzext.vf2 v8, v9
68 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
69 %ev = zext <2 x i8> %v to <2 x i16>
73 define <2 x i32> @vpgather_v2i8_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
74 ; RV32-LABEL: vpgather_v2i8_sextload_v2i32:
76 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
77 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
78 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
79 ; RV32-NEXT: vsext.vf4 v8, v9
82 ; RV64-LABEL: vpgather_v2i8_sextload_v2i32:
84 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
85 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
86 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
87 ; RV64-NEXT: vsext.vf4 v8, v9
89 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
90 %ev = sext <2 x i8> %v to <2 x i32>
94 define <2 x i32> @vpgather_v2i8_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
95 ; RV32-LABEL: vpgather_v2i8_zextload_v2i32:
97 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
98 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
99 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
100 ; RV32-NEXT: vzext.vf4 v8, v9
103 ; RV64-LABEL: vpgather_v2i8_zextload_v2i32:
105 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
106 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
107 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
108 ; RV64-NEXT: vzext.vf4 v8, v9
110 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
111 %ev = zext <2 x i8> %v to <2 x i32>
115 define <2 x i64> @vpgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
116 ; RV32-LABEL: vpgather_v2i8_sextload_v2i64:
118 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
119 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
120 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
121 ; RV32-NEXT: vsext.vf8 v8, v9
124 ; RV64-LABEL: vpgather_v2i8_sextload_v2i64:
126 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
127 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
128 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
129 ; RV64-NEXT: vsext.vf8 v8, v9
131 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
132 %ev = sext <2 x i8> %v to <2 x i64>
136 define <2 x i64> @vpgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
137 ; RV32-LABEL: vpgather_v2i8_zextload_v2i64:
139 ; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
140 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
141 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
142 ; RV32-NEXT: vzext.vf8 v8, v9
145 ; RV64-LABEL: vpgather_v2i8_zextload_v2i64:
147 ; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
148 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
149 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
150 ; RV64-NEXT: vzext.vf8 v8, v9
152 %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
153 %ev = zext <2 x i8> %v to <2 x i64>
157 declare <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr>, <3 x i1>, i32)
159 define <3 x i8> @vpgather_v3i8(<3 x ptr> %ptrs, <3 x i1> %m, i32 zeroext %evl) {
160 ; RV32-LABEL: vpgather_v3i8:
162 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
163 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
164 ; RV32-NEXT: vmv1r.v v8, v9
167 ; RV64-LABEL: vpgather_v3i8:
169 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
170 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
171 ; RV64-NEXT: vmv1r.v v8, v10
173 %v = call <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr> %ptrs, <3 x i1> %m, i32 %evl)
177 define <3 x i8> @vpgather_truemask_v3i8(<3 x ptr> %ptrs, i32 zeroext %evl) {
178 ; RV32-LABEL: vpgather_truemask_v3i8:
180 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
181 ; RV32-NEXT: vluxei32.v v9, (zero), v8
182 ; RV32-NEXT: vmv1r.v v8, v9
185 ; RV64-LABEL: vpgather_truemask_v3i8:
187 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
188 ; RV64-NEXT: vluxei64.v v10, (zero), v8
189 ; RV64-NEXT: vmv1r.v v8, v10
191 %v = call <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr> %ptrs, <3 x i1> splat (i1 1), i32 %evl)
195 declare <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr>, <4 x i1>, i32)
197 define <4 x i8> @vpgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
198 ; RV32-LABEL: vpgather_v4i8:
200 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
201 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
202 ; RV32-NEXT: vmv1r.v v8, v9
205 ; RV64-LABEL: vpgather_v4i8:
207 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
208 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
209 ; RV64-NEXT: vmv1r.v v8, v10
211 %v = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
215 define <4 x i8> @vpgather_truemask_v4i8(<4 x ptr> %ptrs, i32 zeroext %evl) {
216 ; RV32-LABEL: vpgather_truemask_v4i8:
218 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
219 ; RV32-NEXT: vluxei32.v v9, (zero), v8
220 ; RV32-NEXT: vmv1r.v v8, v9
223 ; RV64-LABEL: vpgather_truemask_v4i8:
225 ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
226 ; RV64-NEXT: vluxei64.v v10, (zero), v8
227 ; RV64-NEXT: vmv1r.v v8, v10
229 %v = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
233 declare <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr>, <8 x i1>, i32)
235 define <8 x i8> @vpgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
236 ; RV32-LABEL: vpgather_v8i8:
238 ; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
239 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
240 ; RV32-NEXT: vmv1r.v v8, v10
243 ; RV64-LABEL: vpgather_v8i8:
245 ; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
246 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
247 ; RV64-NEXT: vmv1r.v v8, v12
249 %v = call <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
253 define <8 x i8> @vpgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
254 ; RV32-LABEL: vpgather_baseidx_v8i8:
256 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
257 ; RV32-NEXT: vsext.vf4 v10, v8
258 ; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
259 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
262 ; RV64-LABEL: vpgather_baseidx_v8i8:
264 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
265 ; RV64-NEXT: vsext.vf8 v12, v8
266 ; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
267 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
269 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
270 %v = call <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
274 declare <32 x i8> @llvm.vp.gather.v32i8.v32p0(<32 x ptr>, <32 x i1>, i32)
276 define <32 x i8> @vpgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
277 ; RV32-LABEL: vpgather_baseidx_v32i8:
279 ; RV32-NEXT: li a2, 32
280 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
281 ; RV32-NEXT: vsext.vf4 v16, v8
282 ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
283 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
286 ; RV64-LABEL: vpgather_baseidx_v32i8:
288 ; RV64-NEXT: li a3, 16
289 ; RV64-NEXT: mv a2, a1
290 ; RV64-NEXT: bltu a1, a3, .LBB13_2
291 ; RV64-NEXT: # %bb.1:
292 ; RV64-NEXT: li a2, 16
293 ; RV64-NEXT: .LBB13_2:
294 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
295 ; RV64-NEXT: vsext.vf8 v16, v8
296 ; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, ma
297 ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t
298 ; RV64-NEXT: addi a2, a1, -16
299 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
300 ; RV64-NEXT: vslidedown.vi v8, v8, 16
301 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
302 ; RV64-NEXT: vslidedown.vi v0, v0, 2
303 ; RV64-NEXT: sltu a1, a1, a2
304 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
305 ; RV64-NEXT: vsext.vf8 v16, v8
306 ; RV64-NEXT: addi a1, a1, -1
307 ; RV64-NEXT: and a1, a1, a2
308 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
309 ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
310 ; RV64-NEXT: li a0, 32
311 ; RV64-NEXT: vsetvli zero, a0, e8, m2, ta, ma
312 ; RV64-NEXT: vslideup.vi v10, v8, 16
313 ; RV64-NEXT: vmv.v.v v8, v10
315 %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
316 %v = call <32 x i8> @llvm.vp.gather.v32i8.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
320 declare <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr>, <2 x i1>, i32)
322 define <2 x i16> @vpgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
323 ; RV32-LABEL: vpgather_v2i16:
325 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
326 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
327 ; RV32-NEXT: vmv1r.v v8, v9
330 ; RV64-LABEL: vpgather_v2i16:
332 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
333 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
334 ; RV64-NEXT: vmv1r.v v8, v9
336 %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
340 define <2 x i32> @vpgather_v2i16_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
341 ; RV32-LABEL: vpgather_v2i16_sextload_v2i32:
343 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
344 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
345 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
346 ; RV32-NEXT: vsext.vf2 v8, v9
349 ; RV64-LABEL: vpgather_v2i16_sextload_v2i32:
351 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
352 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
353 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
354 ; RV64-NEXT: vsext.vf2 v8, v9
356 %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
357 %ev = sext <2 x i16> %v to <2 x i32>
361 define <2 x i32> @vpgather_v2i16_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
362 ; RV32-LABEL: vpgather_v2i16_zextload_v2i32:
364 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
365 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
366 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
367 ; RV32-NEXT: vzext.vf2 v8, v9
370 ; RV64-LABEL: vpgather_v2i16_zextload_v2i32:
372 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
373 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
374 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
375 ; RV64-NEXT: vzext.vf2 v8, v9
377 %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
378 %ev = zext <2 x i16> %v to <2 x i32>
382 define <2 x i64> @vpgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
383 ; RV32-LABEL: vpgather_v2i16_sextload_v2i64:
385 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
386 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
387 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
388 ; RV32-NEXT: vsext.vf4 v8, v9
391 ; RV64-LABEL: vpgather_v2i16_sextload_v2i64:
393 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
394 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
395 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
396 ; RV64-NEXT: vsext.vf4 v8, v9
398 %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
399 %ev = sext <2 x i16> %v to <2 x i64>
403 define <2 x i64> @vpgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
404 ; RV32-LABEL: vpgather_v2i16_zextload_v2i64:
406 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
407 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
408 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
409 ; RV32-NEXT: vzext.vf4 v8, v9
412 ; RV64-LABEL: vpgather_v2i16_zextload_v2i64:
414 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
415 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
416 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
417 ; RV64-NEXT: vzext.vf4 v8, v9
419 %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
420 %ev = zext <2 x i16> %v to <2 x i64>
424 declare <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr>, <4 x i1>, i32)
426 define <4 x i16> @vpgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
427 ; RV32-LABEL: vpgather_v4i16:
429 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
430 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
431 ; RV32-NEXT: vmv1r.v v8, v9
434 ; RV64-LABEL: vpgather_v4i16:
436 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
437 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
438 ; RV64-NEXT: vmv1r.v v8, v10
440 %v = call <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
444 define <4 x i16> @vpgather_truemask_v4i16(<4 x ptr> %ptrs, i32 zeroext %evl) {
445 ; RV32-LABEL: vpgather_truemask_v4i16:
447 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
448 ; RV32-NEXT: vluxei32.v v9, (zero), v8
449 ; RV32-NEXT: vmv1r.v v8, v9
452 ; RV64-LABEL: vpgather_truemask_v4i16:
454 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
455 ; RV64-NEXT: vluxei64.v v10, (zero), v8
456 ; RV64-NEXT: vmv1r.v v8, v10
458 %v = call <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
462 declare <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr>, <8 x i1>, i32)
464 define <8 x i16> @vpgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
465 ; RV32-LABEL: vpgather_v8i16:
467 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
468 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
469 ; RV32-NEXT: vmv.v.v v8, v10
472 ; RV64-LABEL: vpgather_v8i16:
474 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
475 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
476 ; RV64-NEXT: vmv.v.v v8, v12
478 %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
482 define <8 x i16> @vpgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
483 ; RV32-LABEL: vpgather_baseidx_v8i8_v8i16:
485 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
486 ; RV32-NEXT: vsext.vf4 v10, v8
487 ; RV32-NEXT: vadd.vv v10, v10, v10
488 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
489 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
492 ; RV64-LABEL: vpgather_baseidx_v8i8_v8i16:
494 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
495 ; RV64-NEXT: vsext.vf8 v12, v8
496 ; RV64-NEXT: vadd.vv v12, v12, v12
497 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
498 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
500 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
501 %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
505 define <8 x i16> @vpgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
506 ; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8i16:
508 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
509 ; RV32-NEXT: vsext.vf4 v10, v8
510 ; RV32-NEXT: vadd.vv v10, v10, v10
511 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
512 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
515 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8i16:
517 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
518 ; RV64-NEXT: vsext.vf8 v12, v8
519 ; RV64-NEXT: vadd.vv v12, v12, v12
520 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
521 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
523 %eidxs = sext <8 x i8> %idxs to <8 x i16>
524 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
525 %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
529 define <8 x i16> @vpgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
530 ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i16:
532 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
533 ; RV32-NEXT: vwaddu.vv v9, v8, v8
534 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
535 ; RV32-NEXT: vluxei16.v v8, (a0), v9, v0.t
538 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i16:
540 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
541 ; RV64-NEXT: vwaddu.vv v9, v8, v8
542 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
543 ; RV64-NEXT: vluxei16.v v8, (a0), v9, v0.t
545 %eidxs = zext <8 x i8> %idxs to <8 x i16>
546 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
547 %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
551 define <8 x i16> @vpgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
552 ; RV32-LABEL: vpgather_baseidx_v8i16:
554 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
555 ; RV32-NEXT: vwadd.vv v10, v8, v8
556 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
557 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
560 ; RV64-LABEL: vpgather_baseidx_v8i16:
562 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
563 ; RV64-NEXT: vsext.vf4 v12, v8
564 ; RV64-NEXT: vadd.vv v12, v12, v12
565 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
566 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
568 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
569 %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
573 declare <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr>, <2 x i1>, i32)
575 define <2 x i32> @vpgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
576 ; RV32-LABEL: vpgather_v2i32:
578 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
579 ; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
582 ; RV64-LABEL: vpgather_v2i32:
584 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
585 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
586 ; RV64-NEXT: vmv1r.v v8, v9
588 %v = call <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
592 define <2 x i64> @vpgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
593 ; RV32-LABEL: vpgather_v2i32_sextload_v2i64:
595 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
596 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
597 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
598 ; RV32-NEXT: vsext.vf2 v8, v9
601 ; RV64-LABEL: vpgather_v2i32_sextload_v2i64:
603 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
604 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
605 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
606 ; RV64-NEXT: vsext.vf2 v8, v9
608 %v = call <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
609 %ev = sext <2 x i32> %v to <2 x i64>
613 define <2 x i64> @vpgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
614 ; RV32-LABEL: vpgather_v2i32_zextload_v2i64:
616 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
617 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
618 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
619 ; RV32-NEXT: vzext.vf2 v8, v9
622 ; RV64-LABEL: vpgather_v2i32_zextload_v2i64:
624 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
625 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
626 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
627 ; RV64-NEXT: vzext.vf2 v8, v9
629 %v = call <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
630 %ev = zext <2 x i32> %v to <2 x i64>
634 declare <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr>, <4 x i1>, i32)
636 define <4 x i32> @vpgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
637 ; RV32-LABEL: vpgather_v4i32:
639 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
640 ; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
643 ; RV64-LABEL: vpgather_v4i32:
645 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
646 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
647 ; RV64-NEXT: vmv.v.v v8, v10
649 %v = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
653 define <4 x i32> @vpgather_truemask_v4i32(<4 x ptr> %ptrs, i32 zeroext %evl) {
654 ; RV32-LABEL: vpgather_truemask_v4i32:
656 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
657 ; RV32-NEXT: vluxei32.v v8, (zero), v8
660 ; RV64-LABEL: vpgather_truemask_v4i32:
662 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
663 ; RV64-NEXT: vluxei64.v v10, (zero), v8
664 ; RV64-NEXT: vmv.v.v v8, v10
666 %v = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
670 declare <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr>, <8 x i1>, i32)
672 define <8 x i32> @vpgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
673 ; RV32-LABEL: vpgather_v8i32:
675 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
676 ; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
679 ; RV64-LABEL: vpgather_v8i32:
681 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
682 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
683 ; RV64-NEXT: vmv.v.v v8, v12
685 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
689 define <8 x i32> @vpgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
690 ; RV32-LABEL: vpgather_baseidx_v8i8_v8i32:
692 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
693 ; RV32-NEXT: vsext.vf4 v10, v8
694 ; RV32-NEXT: vsll.vi v8, v10, 2
695 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
696 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
699 ; RV64-LABEL: vpgather_baseidx_v8i8_v8i32:
701 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
702 ; RV64-NEXT: vsext.vf8 v12, v8
703 ; RV64-NEXT: vsll.vi v12, v12, 2
704 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
705 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
707 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
708 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
712 define <8 x i32> @vpgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
713 ; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8i32:
715 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
716 ; RV32-NEXT: vsext.vf4 v10, v8
717 ; RV32-NEXT: vsll.vi v8, v10, 2
718 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
719 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
722 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8i32:
724 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
725 ; RV64-NEXT: vsext.vf8 v12, v8
726 ; RV64-NEXT: vsll.vi v12, v12, 2
727 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
728 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
730 %eidxs = sext <8 x i8> %idxs to <8 x i32>
731 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
732 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
736 define <8 x i32> @vpgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
737 ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i32:
739 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
740 ; RV32-NEXT: vzext.vf2 v9, v8
741 ; RV32-NEXT: vsll.vi v10, v9, 2
742 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
743 ; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t
746 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i32:
748 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
749 ; RV64-NEXT: vzext.vf2 v9, v8
750 ; RV64-NEXT: vsll.vi v10, v9, 2
751 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
752 ; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t
754 %eidxs = zext <8 x i8> %idxs to <8 x i32>
755 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
756 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
760 define <8 x i32> @vpgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
761 ; RV32-LABEL: vpgather_baseidx_v8i16_v8i32:
763 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
764 ; RV32-NEXT: vsext.vf2 v10, v8
765 ; RV32-NEXT: vsll.vi v8, v10, 2
766 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
767 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
770 ; RV64-LABEL: vpgather_baseidx_v8i16_v8i32:
772 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
773 ; RV64-NEXT: vsext.vf4 v12, v8
774 ; RV64-NEXT: vsll.vi v12, v12, 2
775 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
776 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
778 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
779 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
783 define <8 x i32> @vpgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
784 ; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8i32:
786 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
787 ; RV32-NEXT: vsext.vf2 v10, v8
788 ; RV32-NEXT: vsll.vi v8, v10, 2
789 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
790 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
793 ; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8i32:
795 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
796 ; RV64-NEXT: vsext.vf4 v12, v8
797 ; RV64-NEXT: vsll.vi v12, v12, 2
798 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
799 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
801 %eidxs = sext <8 x i16> %idxs to <8 x i32>
802 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
803 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
807 define <8 x i32> @vpgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
808 ; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8i32:
810 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
811 ; RV32-NEXT: vzext.vf2 v10, v8
812 ; RV32-NEXT: vsll.vi v8, v10, 2
813 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
814 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
817 ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8i32:
819 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
820 ; RV64-NEXT: vzext.vf2 v10, v8
821 ; RV64-NEXT: vsll.vi v8, v10, 2
822 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
823 ; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t
825 %eidxs = zext <8 x i16> %idxs to <8 x i32>
826 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
827 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
831 define <8 x i32> @vpgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
832 ; RV32-LABEL: vpgather_baseidx_v8i32:
834 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
835 ; RV32-NEXT: vsll.vi v8, v8, 2
836 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
837 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
840 ; RV64-LABEL: vpgather_baseidx_v8i32:
842 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
843 ; RV64-NEXT: vsext.vf2 v12, v8
844 ; RV64-NEXT: vsll.vi v12, v12, 2
845 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
846 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
848 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
849 %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
853 declare <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr>, <2 x i1>, i32)
855 define <2 x i64> @vpgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
856 ; RV32-LABEL: vpgather_v2i64:
858 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
859 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
860 ; RV32-NEXT: vmv.v.v v8, v9
863 ; RV64-LABEL: vpgather_v2i64:
865 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
866 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
868 %v = call <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
872 declare <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr>, <4 x i1>, i32)
874 define <4 x i64> @vpgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
875 ; RV32-LABEL: vpgather_v4i64:
877 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
878 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
879 ; RV32-NEXT: vmv.v.v v8, v10
882 ; RV64-LABEL: vpgather_v4i64:
884 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
885 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
887 %v = call <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
891 define <4 x i64> @vpgather_truemask_v4i64(<4 x ptr> %ptrs, i32 zeroext %evl) {
892 ; RV32-LABEL: vpgather_truemask_v4i64:
894 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
895 ; RV32-NEXT: vluxei32.v v10, (zero), v8
896 ; RV32-NEXT: vmv.v.v v8, v10
899 ; RV64-LABEL: vpgather_truemask_v4i64:
901 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
902 ; RV64-NEXT: vluxei64.v v8, (zero), v8
904 %v = call <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
908 declare <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr>, <8 x i1>, i32)
910 define <8 x i64> @vpgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
911 ; RV32-LABEL: vpgather_v8i64:
913 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
914 ; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
915 ; RV32-NEXT: vmv.v.v v8, v12
918 ; RV64-LABEL: vpgather_v8i64:
920 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
921 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
923 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
927 define <8 x i64> @vpgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
928 ; RV32-LABEL: vpgather_baseidx_v8i8_v8i64:
930 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
931 ; RV32-NEXT: vsext.vf4 v10, v8
932 ; RV32-NEXT: vsll.vi v12, v10, 3
933 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
934 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
937 ; RV64-LABEL: vpgather_baseidx_v8i8_v8i64:
939 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
940 ; RV64-NEXT: vsext.vf8 v12, v8
941 ; RV64-NEXT: vsll.vi v8, v12, 3
942 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
943 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
945 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs
946 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
950 define <8 x i64> @vpgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
951 ; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8i64:
953 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
954 ; RV32-NEXT: vsext.vf4 v10, v8
955 ; RV32-NEXT: vsll.vi v12, v10, 3
956 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
957 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
960 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8i64:
962 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
963 ; RV64-NEXT: vsext.vf8 v12, v8
964 ; RV64-NEXT: vsll.vi v8, v12, 3
965 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
966 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
968 %eidxs = sext <8 x i8> %idxs to <8 x i64>
969 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
970 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
974 define <8 x i64> @vpgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
975 ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i64:
977 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
978 ; RV32-NEXT: vzext.vf2 v9, v8
979 ; RV32-NEXT: vsll.vi v12, v9, 3
980 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
981 ; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t
984 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i64:
986 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
987 ; RV64-NEXT: vzext.vf2 v9, v8
988 ; RV64-NEXT: vsll.vi v12, v9, 3
989 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
990 ; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t
992 %eidxs = zext <8 x i8> %idxs to <8 x i64>
993 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
994 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
998 define <8 x i64> @vpgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
999 ; RV32-LABEL: vpgather_baseidx_v8i16_v8i64:
1001 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1002 ; RV32-NEXT: vsext.vf2 v10, v8
1003 ; RV32-NEXT: vsll.vi v12, v10, 3
1004 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1005 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1008 ; RV64-LABEL: vpgather_baseidx_v8i16_v8i64:
1010 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1011 ; RV64-NEXT: vsext.vf4 v12, v8
1012 ; RV64-NEXT: vsll.vi v8, v12, 3
1013 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1014 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1016 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs
1017 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1021 define <8 x i64> @vpgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1022 ; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8i64:
1024 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1025 ; RV32-NEXT: vsext.vf2 v10, v8
1026 ; RV32-NEXT: vsll.vi v12, v10, 3
1027 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1028 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1031 ; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8i64:
1033 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1034 ; RV64-NEXT: vsext.vf4 v12, v8
1035 ; RV64-NEXT: vsll.vi v8, v12, 3
1036 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1037 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1039 %eidxs = sext <8 x i16> %idxs to <8 x i64>
1040 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
1041 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1045 define <8 x i64> @vpgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1046 ; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8i64:
1048 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1049 ; RV32-NEXT: vzext.vf2 v10, v8
1050 ; RV32-NEXT: vsll.vi v12, v10, 3
1051 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1052 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1055 ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8i64:
1057 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1058 ; RV64-NEXT: vzext.vf2 v10, v8
1059 ; RV64-NEXT: vsll.vi v12, v10, 3
1060 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1061 ; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t
1063 %eidxs = zext <8 x i16> %idxs to <8 x i64>
1064 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
1065 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1069 define <8 x i64> @vpgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1070 ; RV32-LABEL: vpgather_baseidx_v8i32_v8i64:
1072 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1073 ; RV32-NEXT: vsll.vi v12, v8, 3
1074 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1075 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1078 ; RV64-LABEL: vpgather_baseidx_v8i32_v8i64:
1080 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1081 ; RV64-NEXT: vsext.vf2 v12, v8
1082 ; RV64-NEXT: vsll.vi v8, v12, 3
1083 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1084 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1086 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs
1087 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1091 define <8 x i64> @vpgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1092 ; RV32-LABEL: vpgather_baseidx_sext_v8i32_v8i64:
1094 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1095 ; RV32-NEXT: vsll.vi v12, v8, 3
1096 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1097 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1100 ; RV64-LABEL: vpgather_baseidx_sext_v8i32_v8i64:
1102 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1103 ; RV64-NEXT: vsext.vf2 v12, v8
1104 ; RV64-NEXT: vsll.vi v8, v12, 3
1105 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1106 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1108 %eidxs = sext <8 x i32> %idxs to <8 x i64>
1109 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
1110 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1114 define <8 x i64> @vpgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1115 ; RV32-LABEL: vpgather_baseidx_zext_v8i32_v8i64:
1117 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1118 ; RV32-NEXT: vsll.vi v12, v8, 3
1119 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1120 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1123 ; RV64-LABEL: vpgather_baseidx_zext_v8i32_v8i64:
1125 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1126 ; RV64-NEXT: vzext.vf2 v12, v8
1127 ; RV64-NEXT: vsll.vi v8, v12, 3
1128 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1129 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1131 %eidxs = zext <8 x i32> %idxs to <8 x i64>
1132 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
1133 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1137 define <8 x i64> @vpgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1138 ; RV32-LABEL: vpgather_baseidx_v8i64:
1140 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1141 ; RV32-NEXT: vnsrl.wi v12, v8, 0
1142 ; RV32-NEXT: vsll.vi v12, v12, 3
1143 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1144 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1147 ; RV64-LABEL: vpgather_baseidx_v8i64:
1149 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1150 ; RV64-NEXT: vsll.vi v8, v8, 3
1151 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1152 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1154 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
1155 %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1159 declare <2 x bfloat> @llvm.vp.gather.v2bf16.v2p0(<2 x ptr>, <2 x i1>, i32)
1161 define <2 x bfloat> @vpgather_v2bf16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1162 ; RV32-LABEL: vpgather_v2bf16:
1164 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1165 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
1166 ; RV32-NEXT: vmv1r.v v8, v9
1169 ; RV64-LABEL: vpgather_v2bf16:
1171 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1172 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
1173 ; RV64-NEXT: vmv1r.v v8, v9
1175 %v = call <2 x bfloat> @llvm.vp.gather.v2bf16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1179 declare <4 x bfloat> @llvm.vp.gather.v4bf16.v4p0(<4 x ptr>, <4 x i1>, i32)
1181 define <4 x bfloat> @vpgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1182 ; RV32-LABEL: vpgather_v4bf16:
1184 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1185 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
1186 ; RV32-NEXT: vmv1r.v v8, v9
1189 ; RV64-LABEL: vpgather_v4bf16:
1191 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1192 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
1193 ; RV64-NEXT: vmv1r.v v8, v10
1195 %v = call <4 x bfloat> @llvm.vp.gather.v4bf16.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1199 define <4 x bfloat> @vpgather_truemask_v4bf16(<4 x ptr> %ptrs, i32 zeroext %evl) {
1200 ; RV32-LABEL: vpgather_truemask_v4bf16:
1202 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1203 ; RV32-NEXT: vluxei32.v v9, (zero), v8
1204 ; RV32-NEXT: vmv1r.v v8, v9
1207 ; RV64-LABEL: vpgather_truemask_v4bf16:
1209 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1210 ; RV64-NEXT: vluxei64.v v10, (zero), v8
1211 ; RV64-NEXT: vmv1r.v v8, v10
1213 %v = call <4 x bfloat> @llvm.vp.gather.v4bf16.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
1217 declare <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr>, <8 x i1>, i32)
1219 define <8 x bfloat> @vpgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1220 ; RV32-LABEL: vpgather_v8bf16:
1222 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1223 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
1224 ; RV32-NEXT: vmv.v.v v8, v10
1227 ; RV64-LABEL: vpgather_v8bf16:
1229 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1230 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
1231 ; RV64-NEXT: vmv.v.v v8, v12
1233 %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1237 define <8 x bfloat> @vpgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1238 ; RV32-LABEL: vpgather_baseidx_v8i8_v8bf16:
1240 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1241 ; RV32-NEXT: vsext.vf4 v10, v8
1242 ; RV32-NEXT: vadd.vv v10, v10, v10
1243 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1244 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
1247 ; RV64-LABEL: vpgather_baseidx_v8i8_v8bf16:
1249 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1250 ; RV64-NEXT: vsext.vf8 v12, v8
1251 ; RV64-NEXT: vadd.vv v12, v12, v12
1252 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1253 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1255 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs
1256 %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1260 define <8 x bfloat> @vpgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1261 ; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8bf16:
1263 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1264 ; RV32-NEXT: vsext.vf4 v10, v8
1265 ; RV32-NEXT: vadd.vv v10, v10, v10
1266 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1267 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
1270 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8bf16:
1272 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1273 ; RV64-NEXT: vsext.vf8 v12, v8
1274 ; RV64-NEXT: vadd.vv v12, v12, v12
1275 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1276 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1278 %eidxs = sext <8 x i8> %idxs to <8 x i16>
1279 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
1280 %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1284 define <8 x bfloat> @vpgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1285 ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8bf16:
1287 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1288 ; RV32-NEXT: vwaddu.vv v9, v8, v8
1289 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1290 ; RV32-NEXT: vluxei16.v v8, (a0), v9, v0.t
1293 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8bf16:
1295 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1296 ; RV64-NEXT: vwaddu.vv v9, v8, v8
1297 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1298 ; RV64-NEXT: vluxei16.v v8, (a0), v9, v0.t
1300 %eidxs = zext <8 x i8> %idxs to <8 x i16>
1301 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
1302 %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1306 define <8 x bfloat> @vpgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1307 ; RV32-LABEL: vpgather_baseidx_v8bf16:
1309 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1310 ; RV32-NEXT: vwadd.vv v10, v8, v8
1311 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1312 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
1315 ; RV64-LABEL: vpgather_baseidx_v8bf16:
1317 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1318 ; RV64-NEXT: vsext.vf4 v12, v8
1319 ; RV64-NEXT: vadd.vv v12, v12, v12
1320 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1321 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1323 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs
1324 %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1328 declare <2 x half> @llvm.vp.gather.v2f16.v2p0(<2 x ptr>, <2 x i1>, i32)
1330 define <2 x half> @vpgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1331 ; RV32-LABEL: vpgather_v2f16:
1333 ; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1334 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
1335 ; RV32-NEXT: vmv1r.v v8, v9
1338 ; RV64-LABEL: vpgather_v2f16:
1340 ; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1341 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
1342 ; RV64-NEXT: vmv1r.v v8, v9
1344 %v = call <2 x half> @llvm.vp.gather.v2f16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1348 declare <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr>, <4 x i1>, i32)
1350 define <4 x half> @vpgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1351 ; RV32-LABEL: vpgather_v4f16:
1353 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1354 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
1355 ; RV32-NEXT: vmv1r.v v8, v9
1358 ; RV64-LABEL: vpgather_v4f16:
1360 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1361 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
1362 ; RV64-NEXT: vmv1r.v v8, v10
1364 %v = call <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1368 define <4 x half> @vpgather_truemask_v4f16(<4 x ptr> %ptrs, i32 zeroext %evl) {
1369 ; RV32-LABEL: vpgather_truemask_v4f16:
1371 ; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1372 ; RV32-NEXT: vluxei32.v v9, (zero), v8
1373 ; RV32-NEXT: vmv1r.v v8, v9
1376 ; RV64-LABEL: vpgather_truemask_v4f16:
1378 ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1379 ; RV64-NEXT: vluxei64.v v10, (zero), v8
1380 ; RV64-NEXT: vmv1r.v v8, v10
1382 %v = call <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
1386 declare <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr>, <8 x i1>, i32)
1388 define <8 x half> @vpgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1389 ; RV32-LABEL: vpgather_v8f16:
1391 ; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1392 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
1393 ; RV32-NEXT: vmv.v.v v8, v10
1396 ; RV64-LABEL: vpgather_v8f16:
1398 ; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1399 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
1400 ; RV64-NEXT: vmv.v.v v8, v12
1402 %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1406 define <8 x half> @vpgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1407 ; RV32-LABEL: vpgather_baseidx_v8i8_v8f16:
1409 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1410 ; RV32-NEXT: vsext.vf4 v10, v8
1411 ; RV32-NEXT: vadd.vv v10, v10, v10
1412 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1413 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
1416 ; RV64-LABEL: vpgather_baseidx_v8i8_v8f16:
1418 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1419 ; RV64-NEXT: vsext.vf8 v12, v8
1420 ; RV64-NEXT: vadd.vv v12, v12, v12
1421 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1422 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1424 %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
1425 %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1429 define <8 x half> @vpgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1430 ; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8f16:
1432 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1433 ; RV32-NEXT: vsext.vf4 v10, v8
1434 ; RV32-NEXT: vadd.vv v10, v10, v10
1435 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1436 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
1439 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8f16:
1441 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1442 ; RV64-NEXT: vsext.vf8 v12, v8
1443 ; RV64-NEXT: vadd.vv v12, v12, v12
1444 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1445 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1447 %eidxs = sext <8 x i8> %idxs to <8 x i16>
1448 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
1449 %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1453 define <8 x half> @vpgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1454 ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f16:
1456 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1457 ; RV32-NEXT: vwaddu.vv v9, v8, v8
1458 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1459 ; RV32-NEXT: vluxei16.v v8, (a0), v9, v0.t
1462 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f16:
1464 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1465 ; RV64-NEXT: vwaddu.vv v9, v8, v8
1466 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1467 ; RV64-NEXT: vluxei16.v v8, (a0), v9, v0.t
1469 %eidxs = zext <8 x i8> %idxs to <8 x i16>
1470 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
1471 %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1475 define <8 x half> @vpgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1476 ; RV32-LABEL: vpgather_baseidx_v8f16:
1478 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1479 ; RV32-NEXT: vwadd.vv v10, v8, v8
1480 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1481 ; RV32-NEXT: vluxei32.v v8, (a0), v10, v0.t
1484 ; RV64-LABEL: vpgather_baseidx_v8f16:
1486 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1487 ; RV64-NEXT: vsext.vf4 v12, v8
1488 ; RV64-NEXT: vadd.vv v12, v12, v12
1489 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1490 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1492 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
1493 %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1497 declare <2 x float> @llvm.vp.gather.v2f32.v2p0(<2 x ptr>, <2 x i1>, i32)
1499 define <2 x float> @vpgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1500 ; RV32-LABEL: vpgather_v2f32:
1502 ; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1503 ; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
1506 ; RV64-LABEL: vpgather_v2f32:
1508 ; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1509 ; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t
1510 ; RV64-NEXT: vmv1r.v v8, v9
1512 %v = call <2 x float> @llvm.vp.gather.v2f32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1516 declare <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr>, <4 x i1>, i32)
1518 define <4 x float> @vpgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1519 ; RV32-LABEL: vpgather_v4f32:
1521 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1522 ; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
1525 ; RV64-LABEL: vpgather_v4f32:
1527 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1528 ; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t
1529 ; RV64-NEXT: vmv.v.v v8, v10
1531 %v = call <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1535 define <4 x float> @vpgather_truemask_v4f32(<4 x ptr> %ptrs, i32 zeroext %evl) {
1536 ; RV32-LABEL: vpgather_truemask_v4f32:
1538 ; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1539 ; RV32-NEXT: vluxei32.v v8, (zero), v8
1542 ; RV64-LABEL: vpgather_truemask_v4f32:
1544 ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1545 ; RV64-NEXT: vluxei64.v v10, (zero), v8
1546 ; RV64-NEXT: vmv.v.v v8, v10
1548 %v = call <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
1552 declare <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr>, <8 x i1>, i32)
1554 define <8 x float> @vpgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1555 ; RV32-LABEL: vpgather_v8f32:
1557 ; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1558 ; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t
1561 ; RV64-LABEL: vpgather_v8f32:
1563 ; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1564 ; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t
1565 ; RV64-NEXT: vmv.v.v v8, v12
1567 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1571 define <8 x float> @vpgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1572 ; RV32-LABEL: vpgather_baseidx_v8i8_v8f32:
1574 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1575 ; RV32-NEXT: vsext.vf4 v10, v8
1576 ; RV32-NEXT: vsll.vi v8, v10, 2
1577 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1578 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
1581 ; RV64-LABEL: vpgather_baseidx_v8i8_v8f32:
1583 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1584 ; RV64-NEXT: vsext.vf8 v12, v8
1585 ; RV64-NEXT: vsll.vi v12, v12, 2
1586 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1587 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1589 %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
1590 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1594 define <8 x float> @vpgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1595 ; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8f32:
1597 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1598 ; RV32-NEXT: vsext.vf4 v10, v8
1599 ; RV32-NEXT: vsll.vi v8, v10, 2
1600 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1601 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
1604 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8f32:
1606 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1607 ; RV64-NEXT: vsext.vf8 v12, v8
1608 ; RV64-NEXT: vsll.vi v12, v12, 2
1609 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1610 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1612 %eidxs = sext <8 x i8> %idxs to <8 x i32>
1613 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1614 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1618 define <8 x float> @vpgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1619 ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f32:
1621 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1622 ; RV32-NEXT: vzext.vf2 v9, v8
1623 ; RV32-NEXT: vsll.vi v10, v9, 2
1624 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1625 ; RV32-NEXT: vluxei16.v v8, (a0), v10, v0.t
1628 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f32:
1630 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1631 ; RV64-NEXT: vzext.vf2 v9, v8
1632 ; RV64-NEXT: vsll.vi v10, v9, 2
1633 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1634 ; RV64-NEXT: vluxei16.v v8, (a0), v10, v0.t
1636 %eidxs = zext <8 x i8> %idxs to <8 x i32>
1637 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1638 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1642 define <8 x float> @vpgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1643 ; RV32-LABEL: vpgather_baseidx_v8i16_v8f32:
1645 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1646 ; RV32-NEXT: vsext.vf2 v10, v8
1647 ; RV32-NEXT: vsll.vi v8, v10, 2
1648 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1649 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
1652 ; RV64-LABEL: vpgather_baseidx_v8i16_v8f32:
1654 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1655 ; RV64-NEXT: vsext.vf4 v12, v8
1656 ; RV64-NEXT: vsll.vi v12, v12, 2
1657 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1658 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1660 %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
1661 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1665 define <8 x float> @vpgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1666 ; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8f32:
1668 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1669 ; RV32-NEXT: vsext.vf2 v10, v8
1670 ; RV32-NEXT: vsll.vi v8, v10, 2
1671 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1672 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
1675 ; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8f32:
1677 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1678 ; RV64-NEXT: vsext.vf4 v12, v8
1679 ; RV64-NEXT: vsll.vi v12, v12, 2
1680 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1681 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1683 %eidxs = sext <8 x i16> %idxs to <8 x i32>
1684 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1685 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1689 define <8 x float> @vpgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1690 ; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8f32:
1692 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1693 ; RV32-NEXT: vzext.vf2 v10, v8
1694 ; RV32-NEXT: vsll.vi v8, v10, 2
1695 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1696 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
1699 ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8f32:
1701 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1702 ; RV64-NEXT: vzext.vf2 v10, v8
1703 ; RV64-NEXT: vsll.vi v8, v10, 2
1704 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1705 ; RV64-NEXT: vluxei32.v v8, (a0), v8, v0.t
1707 %eidxs = zext <8 x i16> %idxs to <8 x i32>
1708 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1709 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1713 define <8 x float> @vpgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1714 ; RV32-LABEL: vpgather_baseidx_v8f32:
1716 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1717 ; RV32-NEXT: vsll.vi v8, v8, 2
1718 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1719 ; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t
1722 ; RV64-LABEL: vpgather_baseidx_v8f32:
1724 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1725 ; RV64-NEXT: vsext.vf2 v12, v8
1726 ; RV64-NEXT: vsll.vi v12, v12, 2
1727 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1728 ; RV64-NEXT: vluxei64.v v8, (a0), v12, v0.t
1730 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
1731 %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1735 declare <2 x double> @llvm.vp.gather.v2f64.v2p0(<2 x ptr>, <2 x i1>, i32)
1737 define <2 x double> @vpgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1738 ; RV32-LABEL: vpgather_v2f64:
1740 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1741 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
1742 ; RV32-NEXT: vmv.v.v v8, v9
1745 ; RV64-LABEL: vpgather_v2f64:
1747 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1748 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
1750 %v = call <2 x double> @llvm.vp.gather.v2f64.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1754 declare <4 x double> @llvm.vp.gather.v4f64.v4p0(<4 x ptr>, <4 x i1>, i32)
1756 define <4 x double> @vpgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1757 ; RV32-LABEL: vpgather_v4f64:
1759 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1760 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
1761 ; RV32-NEXT: vmv.v.v v8, v10
1764 ; RV64-LABEL: vpgather_v4f64:
1766 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1767 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
1769 %v = call <4 x double> @llvm.vp.gather.v4f64.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1773 define <4 x double> @vpgather_truemask_v4f64(<4 x ptr> %ptrs, i32 zeroext %evl) {
1774 ; RV32-LABEL: vpgather_truemask_v4f64:
1776 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1777 ; RV32-NEXT: vluxei32.v v10, (zero), v8
1778 ; RV32-NEXT: vmv.v.v v8, v10
1781 ; RV64-LABEL: vpgather_truemask_v4f64:
1783 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1784 ; RV64-NEXT: vluxei64.v v8, (zero), v8
1786 %v = call <4 x double> @llvm.vp.gather.v4f64.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
1790 declare <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr>, <8 x i1>, i32)
1792 define <8 x double> @vpgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1793 ; RV32-LABEL: vpgather_v8f64:
1795 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1796 ; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t
1797 ; RV32-NEXT: vmv.v.v v8, v12
1800 ; RV64-LABEL: vpgather_v8f64:
1802 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1803 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
1805 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1809 define <8 x double> @vpgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1810 ; RV32-LABEL: vpgather_baseidx_v8i8_v8f64:
1812 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1813 ; RV32-NEXT: vsext.vf4 v10, v8
1814 ; RV32-NEXT: vsll.vi v12, v10, 3
1815 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1816 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1819 ; RV64-LABEL: vpgather_baseidx_v8i8_v8f64:
1821 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1822 ; RV64-NEXT: vsext.vf8 v12, v8
1823 ; RV64-NEXT: vsll.vi v8, v12, 3
1824 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1825 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1827 %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
1828 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1832 define <8 x double> @vpgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1833 ; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8f64:
1835 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1836 ; RV32-NEXT: vsext.vf4 v10, v8
1837 ; RV32-NEXT: vsll.vi v12, v10, 3
1838 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1839 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1842 ; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8f64:
1844 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1845 ; RV64-NEXT: vsext.vf8 v12, v8
1846 ; RV64-NEXT: vsll.vi v8, v12, 3
1847 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1848 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1850 %eidxs = sext <8 x i8> %idxs to <8 x i64>
1851 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1852 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1856 define <8 x double> @vpgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1857 ; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f64:
1859 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1860 ; RV32-NEXT: vzext.vf2 v9, v8
1861 ; RV32-NEXT: vsll.vi v12, v9, 3
1862 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1863 ; RV32-NEXT: vluxei16.v v8, (a0), v12, v0.t
1866 ; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f64:
1868 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1869 ; RV64-NEXT: vzext.vf2 v9, v8
1870 ; RV64-NEXT: vsll.vi v12, v9, 3
1871 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1872 ; RV64-NEXT: vluxei16.v v8, (a0), v12, v0.t
1874 %eidxs = zext <8 x i8> %idxs to <8 x i64>
1875 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1876 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1880 define <8 x double> @vpgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1881 ; RV32-LABEL: vpgather_baseidx_v8i16_v8f64:
1883 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1884 ; RV32-NEXT: vsext.vf2 v10, v8
1885 ; RV32-NEXT: vsll.vi v12, v10, 3
1886 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1887 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1890 ; RV64-LABEL: vpgather_baseidx_v8i16_v8f64:
1892 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1893 ; RV64-NEXT: vsext.vf4 v12, v8
1894 ; RV64-NEXT: vsll.vi v8, v12, 3
1895 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1896 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1898 %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
1899 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1903 define <8 x double> @vpgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1904 ; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8f64:
1906 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1907 ; RV32-NEXT: vsext.vf2 v10, v8
1908 ; RV32-NEXT: vsll.vi v12, v10, 3
1909 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1910 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1913 ; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8f64:
1915 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1916 ; RV64-NEXT: vsext.vf4 v12, v8
1917 ; RV64-NEXT: vsll.vi v8, v12, 3
1918 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1919 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1921 %eidxs = sext <8 x i16> %idxs to <8 x i64>
1922 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1923 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1927 define <8 x double> @vpgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1928 ; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8f64:
1930 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1931 ; RV32-NEXT: vzext.vf2 v10, v8
1932 ; RV32-NEXT: vsll.vi v12, v10, 3
1933 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1934 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1937 ; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8f64:
1939 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1940 ; RV64-NEXT: vzext.vf2 v10, v8
1941 ; RV64-NEXT: vsll.vi v12, v10, 3
1942 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1943 ; RV64-NEXT: vluxei32.v v8, (a0), v12, v0.t
1945 %eidxs = zext <8 x i16> %idxs to <8 x i64>
1946 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1947 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1951 define <8 x double> @vpgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1952 ; RV32-LABEL: vpgather_baseidx_v8i32_v8f64:
1954 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1955 ; RV32-NEXT: vsll.vi v12, v8, 3
1956 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1957 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1960 ; RV64-LABEL: vpgather_baseidx_v8i32_v8f64:
1962 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1963 ; RV64-NEXT: vsext.vf2 v12, v8
1964 ; RV64-NEXT: vsll.vi v8, v12, 3
1965 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1966 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1968 %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
1969 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1973 define <8 x double> @vpgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1974 ; RV32-LABEL: vpgather_baseidx_sext_v8i32_v8f64:
1976 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1977 ; RV32-NEXT: vsll.vi v12, v8, 3
1978 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1979 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
1982 ; RV64-LABEL: vpgather_baseidx_sext_v8i32_v8f64:
1984 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1985 ; RV64-NEXT: vsext.vf2 v12, v8
1986 ; RV64-NEXT: vsll.vi v8, v12, 3
1987 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
1988 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
1990 %eidxs = sext <8 x i32> %idxs to <8 x i64>
1991 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1992 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1996 define <8 x double> @vpgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1997 ; RV32-LABEL: vpgather_baseidx_zext_v8i32_v8f64:
1999 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2000 ; RV32-NEXT: vsll.vi v12, v8, 3
2001 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
2002 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
2005 ; RV64-LABEL: vpgather_baseidx_zext_v8i32_v8f64:
2007 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2008 ; RV64-NEXT: vzext.vf2 v12, v8
2009 ; RV64-NEXT: vsll.vi v8, v12, 3
2010 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
2011 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2013 %eidxs = zext <8 x i32> %idxs to <8 x i64>
2014 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
2015 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
2019 define <8 x double> @vpgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) {
2020 ; RV32-LABEL: vpgather_baseidx_v8f64:
2022 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2023 ; RV32-NEXT: vnsrl.wi v12, v8, 0
2024 ; RV32-NEXT: vsll.vi v12, v12, 3
2025 ; RV32-NEXT: vsetvli zero, a1, e64, m4, ta, ma
2026 ; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t
2029 ; RV64-LABEL: vpgather_baseidx_v8f64:
2031 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2032 ; RV64-NEXT: vsll.vi v8, v8, 3
2033 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
2034 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2036 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
2037 %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
2041 declare <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr>, <32 x i1>, i32)
2043 define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroext %evl) {
2044 ; RV32-LABEL: vpgather_v32f64:
2046 ; RV32-NEXT: li a2, 16
2047 ; RV32-NEXT: mv a1, a0
2048 ; RV32-NEXT: bltu a0, a2, .LBB94_2
2049 ; RV32-NEXT: # %bb.1:
2050 ; RV32-NEXT: li a1, 16
2051 ; RV32-NEXT: .LBB94_2:
2052 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2053 ; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t
2054 ; RV32-NEXT: addi a1, a0, -16
2055 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2056 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2057 ; RV32-NEXT: sltu a0, a0, a1
2058 ; RV32-NEXT: addi a0, a0, -1
2059 ; RV32-NEXT: and a0, a0, a1
2060 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2061 ; RV32-NEXT: vslidedown.vi v8, v8, 16
2062 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2063 ; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t
2064 ; RV32-NEXT: vmv8r.v v8, v24
2067 ; RV64-LABEL: vpgather_v32f64:
2069 ; RV64-NEXT: li a2, 16
2070 ; RV64-NEXT: mv a1, a0
2071 ; RV64-NEXT: bltu a0, a2, .LBB94_2
2072 ; RV64-NEXT: # %bb.1:
2073 ; RV64-NEXT: li a1, 16
2074 ; RV64-NEXT: .LBB94_2:
2075 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2076 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
2077 ; RV64-NEXT: addi a1, a0, -16
2078 ; RV64-NEXT: sltu a0, a0, a1
2079 ; RV64-NEXT: addi a0, a0, -1
2080 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2081 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2082 ; RV64-NEXT: and a0, a0, a1
2083 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
2084 ; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t
2086 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2087 ret <32 x double> %v
2090 define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2091 ; RV32-LABEL: vpgather_baseidx_v32i8_v32f64:
2093 ; RV32-NEXT: li a2, 32
2094 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2095 ; RV32-NEXT: vsext.vf4 v16, v8
2096 ; RV32-NEXT: li a3, 16
2097 ; RV32-NEXT: vsll.vi v16, v16, 3
2098 ; RV32-NEXT: mv a2, a1
2099 ; RV32-NEXT: bltu a1, a3, .LBB95_2
2100 ; RV32-NEXT: # %bb.1:
2101 ; RV32-NEXT: li a2, 16
2102 ; RV32-NEXT: .LBB95_2:
2103 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2104 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
2105 ; RV32-NEXT: addi a2, a1, -16
2106 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2107 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2108 ; RV32-NEXT: sltu a1, a1, a2
2109 ; RV32-NEXT: addi a1, a1, -1
2110 ; RV32-NEXT: and a1, a1, a2
2111 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2112 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2113 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2114 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
2117 ; RV64-LABEL: vpgather_baseidx_v32i8_v32f64:
2119 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
2120 ; RV64-NEXT: vslidedown.vi v10, v8, 16
2121 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2122 ; RV64-NEXT: vsext.vf8 v24, v8
2123 ; RV64-NEXT: li a3, 16
2124 ; RV64-NEXT: vsext.vf8 v16, v10
2125 ; RV64-NEXT: vsll.vi v16, v16, 3
2126 ; RV64-NEXT: vsll.vi v8, v24, 3
2127 ; RV64-NEXT: mv a2, a1
2128 ; RV64-NEXT: bltu a1, a3, .LBB95_2
2129 ; RV64-NEXT: # %bb.1:
2130 ; RV64-NEXT: li a2, 16
2131 ; RV64-NEXT: .LBB95_2:
2132 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2133 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2134 ; RV64-NEXT: addi a2, a1, -16
2135 ; RV64-NEXT: sltu a1, a1, a2
2136 ; RV64-NEXT: addi a1, a1, -1
2137 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2138 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2139 ; RV64-NEXT: and a1, a1, a2
2140 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2141 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
2143 %ptrs = getelementptr inbounds double, ptr %base, <32 x i8> %idxs
2144 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2145 ret <32 x double> %v
2148 define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2149 ; RV32-LABEL: vpgather_baseidx_sext_v32i8_v32f64:
2151 ; RV32-NEXT: li a2, 32
2152 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2153 ; RV32-NEXT: vsext.vf4 v16, v8
2154 ; RV32-NEXT: li a3, 16
2155 ; RV32-NEXT: vsll.vi v16, v16, 3
2156 ; RV32-NEXT: mv a2, a1
2157 ; RV32-NEXT: bltu a1, a3, .LBB96_2
2158 ; RV32-NEXT: # %bb.1:
2159 ; RV32-NEXT: li a2, 16
2160 ; RV32-NEXT: .LBB96_2:
2161 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2162 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
2163 ; RV32-NEXT: addi a2, a1, -16
2164 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2165 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2166 ; RV32-NEXT: sltu a1, a1, a2
2167 ; RV32-NEXT: addi a1, a1, -1
2168 ; RV32-NEXT: and a1, a1, a2
2169 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2170 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2171 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2172 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
2175 ; RV64-LABEL: vpgather_baseidx_sext_v32i8_v32f64:
2177 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2178 ; RV64-NEXT: vsext.vf8 v24, v8
2179 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
2180 ; RV64-NEXT: vslidedown.vi v8, v8, 16
2181 ; RV64-NEXT: li a3, 16
2182 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2183 ; RV64-NEXT: vsext.vf8 v16, v8
2184 ; RV64-NEXT: vsll.vi v16, v16, 3
2185 ; RV64-NEXT: vsll.vi v8, v24, 3
2186 ; RV64-NEXT: mv a2, a1
2187 ; RV64-NEXT: bltu a1, a3, .LBB96_2
2188 ; RV64-NEXT: # %bb.1:
2189 ; RV64-NEXT: li a2, 16
2190 ; RV64-NEXT: .LBB96_2:
2191 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2192 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2193 ; RV64-NEXT: addi a2, a1, -16
2194 ; RV64-NEXT: sltu a1, a1, a2
2195 ; RV64-NEXT: addi a1, a1, -1
2196 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2197 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2198 ; RV64-NEXT: and a1, a1, a2
2199 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2200 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
2202 %eidxs = sext <32 x i8> %idxs to <32 x i64>
2203 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2204 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2205 ret <32 x double> %v
2208 define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2209 ; RV32-LABEL: vpgather_baseidx_zext_v32i8_v32f64:
2211 ; RV32-NEXT: li a2, 32
2212 ; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma
2213 ; RV32-NEXT: vzext.vf2 v12, v8
2214 ; RV32-NEXT: li a3, 16
2215 ; RV32-NEXT: vsll.vi v16, v12, 3
2216 ; RV32-NEXT: mv a2, a1
2217 ; RV32-NEXT: bltu a1, a3, .LBB97_2
2218 ; RV32-NEXT: # %bb.1:
2219 ; RV32-NEXT: li a2, 16
2220 ; RV32-NEXT: .LBB97_2:
2221 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2222 ; RV32-NEXT: vluxei16.v v8, (a0), v16, v0.t
2223 ; RV32-NEXT: addi a2, a1, -16
2224 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2225 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2226 ; RV32-NEXT: sltu a1, a1, a2
2227 ; RV32-NEXT: addi a1, a1, -1
2228 ; RV32-NEXT: and a1, a1, a2
2229 ; RV32-NEXT: vsetivli zero, 16, e16, m4, ta, ma
2230 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2231 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2232 ; RV32-NEXT: vluxei16.v v16, (a0), v24, v0.t
2235 ; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64:
2237 ; RV64-NEXT: li a2, 32
2238 ; RV64-NEXT: vsetvli zero, a2, e16, m4, ta, ma
2239 ; RV64-NEXT: vzext.vf2 v12, v8
2240 ; RV64-NEXT: li a3, 16
2241 ; RV64-NEXT: vsll.vi v16, v12, 3
2242 ; RV64-NEXT: mv a2, a1
2243 ; RV64-NEXT: bltu a1, a3, .LBB97_2
2244 ; RV64-NEXT: # %bb.1:
2245 ; RV64-NEXT: li a2, 16
2246 ; RV64-NEXT: .LBB97_2:
2247 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2248 ; RV64-NEXT: vluxei16.v v8, (a0), v16, v0.t
2249 ; RV64-NEXT: addi a2, a1, -16
2250 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2251 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2252 ; RV64-NEXT: sltu a1, a1, a2
2253 ; RV64-NEXT: addi a1, a1, -1
2254 ; RV64-NEXT: and a1, a1, a2
2255 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma
2256 ; RV64-NEXT: vslidedown.vi v24, v16, 16
2257 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2258 ; RV64-NEXT: vluxei16.v v16, (a0), v24, v0.t
2260 %eidxs = zext <32 x i8> %idxs to <32 x i64>
2261 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2262 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2263 ret <32 x double> %v
2266 define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2267 ; RV32-LABEL: vpgather_baseidx_v32i16_v32f64:
2269 ; RV32-NEXT: li a2, 32
2270 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2271 ; RV32-NEXT: vsext.vf2 v16, v8
2272 ; RV32-NEXT: li a3, 16
2273 ; RV32-NEXT: vsll.vi v16, v16, 3
2274 ; RV32-NEXT: mv a2, a1
2275 ; RV32-NEXT: bltu a1, a3, .LBB98_2
2276 ; RV32-NEXT: # %bb.1:
2277 ; RV32-NEXT: li a2, 16
2278 ; RV32-NEXT: .LBB98_2:
2279 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2280 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
2281 ; RV32-NEXT: addi a2, a1, -16
2282 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2283 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2284 ; RV32-NEXT: sltu a1, a1, a2
2285 ; RV32-NEXT: addi a1, a1, -1
2286 ; RV32-NEXT: and a1, a1, a2
2287 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2288 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2289 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2290 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
2293 ; RV64-LABEL: vpgather_baseidx_v32i16_v32f64:
2295 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma
2296 ; RV64-NEXT: vslidedown.vi v12, v8, 16
2297 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2298 ; RV64-NEXT: vsext.vf4 v24, v8
2299 ; RV64-NEXT: li a3, 16
2300 ; RV64-NEXT: vsext.vf4 v16, v12
2301 ; RV64-NEXT: vsll.vi v16, v16, 3
2302 ; RV64-NEXT: vsll.vi v8, v24, 3
2303 ; RV64-NEXT: mv a2, a1
2304 ; RV64-NEXT: bltu a1, a3, .LBB98_2
2305 ; RV64-NEXT: # %bb.1:
2306 ; RV64-NEXT: li a2, 16
2307 ; RV64-NEXT: .LBB98_2:
2308 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2309 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2310 ; RV64-NEXT: addi a2, a1, -16
2311 ; RV64-NEXT: sltu a1, a1, a2
2312 ; RV64-NEXT: addi a1, a1, -1
2313 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2314 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2315 ; RV64-NEXT: and a1, a1, a2
2316 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2317 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
2319 %ptrs = getelementptr inbounds double, ptr %base, <32 x i16> %idxs
2320 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2321 ret <32 x double> %v
2324 define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2325 ; RV32-LABEL: vpgather_baseidx_sext_v32i16_v32f64:
2327 ; RV32-NEXT: li a2, 32
2328 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2329 ; RV32-NEXT: vsext.vf2 v16, v8
2330 ; RV32-NEXT: li a3, 16
2331 ; RV32-NEXT: vsll.vi v16, v16, 3
2332 ; RV32-NEXT: mv a2, a1
2333 ; RV32-NEXT: bltu a1, a3, .LBB99_2
2334 ; RV32-NEXT: # %bb.1:
2335 ; RV32-NEXT: li a2, 16
2336 ; RV32-NEXT: .LBB99_2:
2337 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2338 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
2339 ; RV32-NEXT: addi a2, a1, -16
2340 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2341 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2342 ; RV32-NEXT: sltu a1, a1, a2
2343 ; RV32-NEXT: addi a1, a1, -1
2344 ; RV32-NEXT: and a1, a1, a2
2345 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2346 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2347 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2348 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
2351 ; RV64-LABEL: vpgather_baseidx_sext_v32i16_v32f64:
2353 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2354 ; RV64-NEXT: vsext.vf4 v24, v8
2355 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma
2356 ; RV64-NEXT: vslidedown.vi v8, v8, 16
2357 ; RV64-NEXT: li a3, 16
2358 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2359 ; RV64-NEXT: vsext.vf4 v16, v8
2360 ; RV64-NEXT: vsll.vi v16, v16, 3
2361 ; RV64-NEXT: vsll.vi v8, v24, 3
2362 ; RV64-NEXT: mv a2, a1
2363 ; RV64-NEXT: bltu a1, a3, .LBB99_2
2364 ; RV64-NEXT: # %bb.1:
2365 ; RV64-NEXT: li a2, 16
2366 ; RV64-NEXT: .LBB99_2:
2367 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2368 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2369 ; RV64-NEXT: addi a2, a1, -16
2370 ; RV64-NEXT: sltu a1, a1, a2
2371 ; RV64-NEXT: addi a1, a1, -1
2372 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2373 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2374 ; RV64-NEXT: and a1, a1, a2
2375 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2376 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
2378 %eidxs = sext <32 x i16> %idxs to <32 x i64>
2379 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2380 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2381 ret <32 x double> %v
2384 define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2385 ; RV32-LABEL: vpgather_baseidx_zext_v32i16_v32f64:
2387 ; RV32-NEXT: li a2, 32
2388 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2389 ; RV32-NEXT: vzext.vf2 v16, v8
2390 ; RV32-NEXT: li a3, 16
2391 ; RV32-NEXT: vsll.vi v16, v16, 3
2392 ; RV32-NEXT: mv a2, a1
2393 ; RV32-NEXT: bltu a1, a3, .LBB100_2
2394 ; RV32-NEXT: # %bb.1:
2395 ; RV32-NEXT: li a2, 16
2396 ; RV32-NEXT: .LBB100_2:
2397 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2398 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
2399 ; RV32-NEXT: addi a2, a1, -16
2400 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2401 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2402 ; RV32-NEXT: sltu a1, a1, a2
2403 ; RV32-NEXT: addi a1, a1, -1
2404 ; RV32-NEXT: and a1, a1, a2
2405 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2406 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2407 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2408 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
2411 ; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64:
2413 ; RV64-NEXT: li a2, 32
2414 ; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2415 ; RV64-NEXT: vzext.vf2 v16, v8
2416 ; RV64-NEXT: li a3, 16
2417 ; RV64-NEXT: vsll.vi v16, v16, 3
2418 ; RV64-NEXT: mv a2, a1
2419 ; RV64-NEXT: bltu a1, a3, .LBB100_2
2420 ; RV64-NEXT: # %bb.1:
2421 ; RV64-NEXT: li a2, 16
2422 ; RV64-NEXT: .LBB100_2:
2423 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2424 ; RV64-NEXT: vluxei32.v v8, (a0), v16, v0.t
2425 ; RV64-NEXT: addi a2, a1, -16
2426 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2427 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2428 ; RV64-NEXT: sltu a1, a1, a2
2429 ; RV64-NEXT: addi a1, a1, -1
2430 ; RV64-NEXT: and a1, a1, a2
2431 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2432 ; RV64-NEXT: vslidedown.vi v24, v16, 16
2433 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2434 ; RV64-NEXT: vluxei32.v v16, (a0), v24, v0.t
2436 %eidxs = zext <32 x i16> %idxs to <32 x i64>
2437 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2438 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2439 ret <32 x double> %v
2442 define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2443 ; RV32-LABEL: vpgather_baseidx_v32i32_v32f64:
2445 ; RV32-NEXT: li a2, 32
2446 ; RV32-NEXT: li a3, 16
2447 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2448 ; RV32-NEXT: vsll.vi v16, v8, 3
2449 ; RV32-NEXT: mv a2, a1
2450 ; RV32-NEXT: bltu a1, a3, .LBB101_2
2451 ; RV32-NEXT: # %bb.1:
2452 ; RV32-NEXT: li a2, 16
2453 ; RV32-NEXT: .LBB101_2:
2454 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2455 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
2456 ; RV32-NEXT: addi a2, a1, -16
2457 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2458 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2459 ; RV32-NEXT: sltu a1, a1, a2
2460 ; RV32-NEXT: addi a1, a1, -1
2461 ; RV32-NEXT: and a1, a1, a2
2462 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2463 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2464 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2465 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
2468 ; RV64-LABEL: vpgather_baseidx_v32i32_v32f64:
2470 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2471 ; RV64-NEXT: vslidedown.vi v16, v8, 16
2472 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2473 ; RV64-NEXT: vsext.vf2 v24, v8
2474 ; RV64-NEXT: li a3, 16
2475 ; RV64-NEXT: vsext.vf2 v8, v16
2476 ; RV64-NEXT: vsll.vi v16, v8, 3
2477 ; RV64-NEXT: vsll.vi v8, v24, 3
2478 ; RV64-NEXT: mv a2, a1
2479 ; RV64-NEXT: bltu a1, a3, .LBB101_2
2480 ; RV64-NEXT: # %bb.1:
2481 ; RV64-NEXT: li a2, 16
2482 ; RV64-NEXT: .LBB101_2:
2483 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2484 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2485 ; RV64-NEXT: addi a2, a1, -16
2486 ; RV64-NEXT: sltu a1, a1, a2
2487 ; RV64-NEXT: addi a1, a1, -1
2488 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2489 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2490 ; RV64-NEXT: and a1, a1, a2
2491 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2492 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
2494 %ptrs = getelementptr inbounds double, ptr %base, <32 x i32> %idxs
2495 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2496 ret <32 x double> %v
2499 define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2500 ; RV32-LABEL: vpgather_baseidx_sext_v32i32_v32f64:
2502 ; RV32-NEXT: li a2, 32
2503 ; RV32-NEXT: li a3, 16
2504 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2505 ; RV32-NEXT: vsll.vi v16, v8, 3
2506 ; RV32-NEXT: mv a2, a1
2507 ; RV32-NEXT: bltu a1, a3, .LBB102_2
2508 ; RV32-NEXT: # %bb.1:
2509 ; RV32-NEXT: li a2, 16
2510 ; RV32-NEXT: .LBB102_2:
2511 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2512 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
2513 ; RV32-NEXT: addi a2, a1, -16
2514 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2515 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2516 ; RV32-NEXT: sltu a1, a1, a2
2517 ; RV32-NEXT: addi a1, a1, -1
2518 ; RV32-NEXT: and a1, a1, a2
2519 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2520 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2521 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2522 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
2525 ; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64:
2527 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2528 ; RV64-NEXT: vsext.vf2 v24, v8
2529 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2530 ; RV64-NEXT: vslidedown.vi v8, v8, 16
2531 ; RV64-NEXT: li a3, 16
2532 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2533 ; RV64-NEXT: vsext.vf2 v16, v8
2534 ; RV64-NEXT: vsll.vi v16, v16, 3
2535 ; RV64-NEXT: vsll.vi v8, v24, 3
2536 ; RV64-NEXT: mv a2, a1
2537 ; RV64-NEXT: bltu a1, a3, .LBB102_2
2538 ; RV64-NEXT: # %bb.1:
2539 ; RV64-NEXT: li a2, 16
2540 ; RV64-NEXT: .LBB102_2:
2541 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2542 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2543 ; RV64-NEXT: addi a2, a1, -16
2544 ; RV64-NEXT: sltu a1, a1, a2
2545 ; RV64-NEXT: addi a1, a1, -1
2546 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2547 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2548 ; RV64-NEXT: and a1, a1, a2
2549 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2550 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
2552 %eidxs = sext <32 x i32> %idxs to <32 x i64>
2553 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2554 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2555 ret <32 x double> %v
2558 define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2559 ; RV32-LABEL: vpgather_baseidx_zext_v32i32_v32f64:
2561 ; RV32-NEXT: li a2, 32
2562 ; RV32-NEXT: li a3, 16
2563 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2564 ; RV32-NEXT: vsll.vi v16, v8, 3
2565 ; RV32-NEXT: mv a2, a1
2566 ; RV32-NEXT: bltu a1, a3, .LBB103_2
2567 ; RV32-NEXT: # %bb.1:
2568 ; RV32-NEXT: li a2, 16
2569 ; RV32-NEXT: .LBB103_2:
2570 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2571 ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
2572 ; RV32-NEXT: addi a2, a1, -16
2573 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2574 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2575 ; RV32-NEXT: sltu a1, a1, a2
2576 ; RV32-NEXT: addi a1, a1, -1
2577 ; RV32-NEXT: and a1, a1, a2
2578 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2579 ; RV32-NEXT: vslidedown.vi v24, v16, 16
2580 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2581 ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
2584 ; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64:
2586 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2587 ; RV64-NEXT: vzext.vf2 v24, v8
2588 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2589 ; RV64-NEXT: vslidedown.vi v8, v8, 16
2590 ; RV64-NEXT: li a3, 16
2591 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2592 ; RV64-NEXT: vzext.vf2 v16, v8
2593 ; RV64-NEXT: vsll.vi v16, v16, 3
2594 ; RV64-NEXT: vsll.vi v8, v24, 3
2595 ; RV64-NEXT: mv a2, a1
2596 ; RV64-NEXT: bltu a1, a3, .LBB103_2
2597 ; RV64-NEXT: # %bb.1:
2598 ; RV64-NEXT: li a2, 16
2599 ; RV64-NEXT: .LBB103_2:
2600 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2601 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2602 ; RV64-NEXT: addi a2, a1, -16
2603 ; RV64-NEXT: sltu a1, a1, a2
2604 ; RV64-NEXT: addi a1, a1, -1
2605 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2606 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2607 ; RV64-NEXT: and a1, a1, a2
2608 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2609 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
2611 %eidxs = zext <32 x i32> %idxs to <32 x i64>
2612 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2613 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2614 ret <32 x double> %v
2617 define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2618 ; RV32-LABEL: vpgather_baseidx_v32f64:
2620 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
2621 ; RV32-NEXT: vmv1r.v v7, v0
2622 ; RV32-NEXT: vnsrl.wi v24, v16, 0
2623 ; RV32-NEXT: vnsrl.wi v16, v8, 0
2624 ; RV32-NEXT: li a2, 32
2625 ; RV32-NEXT: addi a3, a1, -16
2626 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2627 ; RV32-NEXT: vslidedown.vi v0, v0, 2
2628 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
2629 ; RV32-NEXT: vslideup.vi v16, v24, 16
2630 ; RV32-NEXT: vsll.vi v24, v16, 3
2631 ; RV32-NEXT: sltu a2, a1, a3
2632 ; RV32-NEXT: addi a2, a2, -1
2633 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
2634 ; RV32-NEXT: vslidedown.vi v8, v24, 16
2635 ; RV32-NEXT: and a2, a2, a3
2636 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2637 ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
2638 ; RV32-NEXT: li a2, 16
2639 ; RV32-NEXT: bltu a1, a2, .LBB104_2
2640 ; RV32-NEXT: # %bb.1:
2641 ; RV32-NEXT: li a1, 16
2642 ; RV32-NEXT: .LBB104_2:
2643 ; RV32-NEXT: vmv1r.v v0, v7
2644 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2645 ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
2648 ; RV64-LABEL: vpgather_baseidx_v32f64:
2650 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
2651 ; RV64-NEXT: vsll.vi v16, v16, 3
2652 ; RV64-NEXT: li a3, 16
2653 ; RV64-NEXT: vsll.vi v8, v8, 3
2654 ; RV64-NEXT: mv a2, a1
2655 ; RV64-NEXT: bltu a1, a3, .LBB104_2
2656 ; RV64-NEXT: # %bb.1:
2657 ; RV64-NEXT: li a2, 16
2658 ; RV64-NEXT: .LBB104_2:
2659 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
2660 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
2661 ; RV64-NEXT: addi a2, a1, -16
2662 ; RV64-NEXT: sltu a1, a1, a2
2663 ; RV64-NEXT: addi a1, a1, -1
2664 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2665 ; RV64-NEXT: vslidedown.vi v0, v0, 2
2666 ; RV64-NEXT: and a1, a1, a2
2667 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
2668 ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
2670 %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %idxs
2671 %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2672 ret <32 x double> %v