1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFHMIN
11 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \
12 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFH
13 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \
14 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFH
15 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \
16 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFHMIN
17 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \
18 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFHMIN
20 declare <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i8>)
22 define <1 x i8> @mgather_v1i8(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i8> %passthru) {
23 ; RV32V-LABEL: mgather_v1i8:
25 ; RV32V-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
26 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
27 ; RV32V-NEXT: vmv1r.v v8, v9
30 ; RV64V-LABEL: mgather_v1i8:
32 ; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
33 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
34 ; RV64V-NEXT: vmv1r.v v8, v9
37 ; RV32ZVE32F-LABEL: mgather_v1i8:
38 ; RV32ZVE32F: # %bb.0:
39 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
40 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
41 ; RV32ZVE32F-NEXT: vmv1r.v v8, v9
42 ; RV32ZVE32F-NEXT: ret
44 ; RV64ZVE32F-LABEL: mgather_v1i8:
45 ; RV64ZVE32F: # %bb.0:
46 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
47 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
48 ; RV64ZVE32F-NEXT: bnez a1, .LBB0_2
49 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
50 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
51 ; RV64ZVE32F-NEXT: vle8.v v8, (a0)
52 ; RV64ZVE32F-NEXT: .LBB0_2: # %else
53 ; RV64ZVE32F-NEXT: ret
54 %v = call <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr> %ptrs, i32 1, <1 x i1> %m, <1 x i8> %passthru)
58 declare <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i8>)
60 define <2 x i8> @mgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
61 ; RV32V-LABEL: mgather_v2i8:
63 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
64 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
65 ; RV32V-NEXT: vmv1r.v v8, v9
68 ; RV64V-LABEL: mgather_v2i8:
70 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
71 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
72 ; RV64V-NEXT: vmv1r.v v8, v9
75 ; RV32ZVE32F-LABEL: mgather_v2i8:
76 ; RV32ZVE32F: # %bb.0:
77 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
78 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
79 ; RV32ZVE32F-NEXT: vmv1r.v v8, v9
80 ; RV32ZVE32F-NEXT: ret
82 ; RV64ZVE32F-LABEL: mgather_v2i8:
83 ; RV64ZVE32F: # %bb.0:
84 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
85 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
86 ; RV64ZVE32F-NEXT: andi a3, a2, 1
87 ; RV64ZVE32F-NEXT: bnez a3, .LBB1_3
88 ; RV64ZVE32F-NEXT: # %bb.1: # %else
89 ; RV64ZVE32F-NEXT: andi a2, a2, 2
90 ; RV64ZVE32F-NEXT: bnez a2, .LBB1_4
91 ; RV64ZVE32F-NEXT: .LBB1_2: # %else2
92 ; RV64ZVE32F-NEXT: ret
93 ; RV64ZVE32F-NEXT: .LBB1_3: # %cond.load
94 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
95 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
96 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
97 ; RV64ZVE32F-NEXT: andi a2, a2, 2
98 ; RV64ZVE32F-NEXT: beqz a2, .LBB1_2
99 ; RV64ZVE32F-NEXT: .LBB1_4: # %cond.load1
100 ; RV64ZVE32F-NEXT: lbu a0, 0(a1)
101 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma
102 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
103 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
104 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
105 ; RV64ZVE32F-NEXT: ret
106 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
110 define <2 x i16> @mgather_v2i8_sextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
111 ; RV32V-LABEL: mgather_v2i8_sextload_v2i16:
113 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
114 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
115 ; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
116 ; RV32V-NEXT: vsext.vf2 v8, v9
119 ; RV64V-LABEL: mgather_v2i8_sextload_v2i16:
121 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
122 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
123 ; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
124 ; RV64V-NEXT: vsext.vf2 v8, v9
127 ; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i16:
128 ; RV32ZVE32F: # %bb.0:
129 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
130 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
131 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
132 ; RV32ZVE32F-NEXT: vsext.vf2 v8, v9
133 ; RV32ZVE32F-NEXT: ret
135 ; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i16:
136 ; RV64ZVE32F: # %bb.0:
137 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
138 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
139 ; RV64ZVE32F-NEXT: andi a3, a2, 1
140 ; RV64ZVE32F-NEXT: beqz a3, .LBB2_2
141 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
142 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
143 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
144 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
145 ; RV64ZVE32F-NEXT: .LBB2_2: # %else
146 ; RV64ZVE32F-NEXT: andi a2, a2, 2
147 ; RV64ZVE32F-NEXT: beqz a2, .LBB2_4
148 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
149 ; RV64ZVE32F-NEXT: lbu a0, 0(a1)
150 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma
151 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
152 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
153 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
154 ; RV64ZVE32F-NEXT: .LBB2_4: # %else2
155 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
156 ; RV64ZVE32F-NEXT: vsext.vf2 v9, v8
157 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
158 ; RV64ZVE32F-NEXT: ret
159 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
160 %ev = sext <2 x i8> %v to <2 x i16>
164 define <2 x i16> @mgather_v2i8_zextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
165 ; RV32V-LABEL: mgather_v2i8_zextload_v2i16:
167 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
168 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
169 ; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
170 ; RV32V-NEXT: vzext.vf2 v8, v9
173 ; RV64V-LABEL: mgather_v2i8_zextload_v2i16:
175 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
176 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
177 ; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
178 ; RV64V-NEXT: vzext.vf2 v8, v9
181 ; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i16:
182 ; RV32ZVE32F: # %bb.0:
183 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
184 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
185 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
186 ; RV32ZVE32F-NEXT: vzext.vf2 v8, v9
187 ; RV32ZVE32F-NEXT: ret
189 ; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i16:
190 ; RV64ZVE32F: # %bb.0:
191 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
192 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
193 ; RV64ZVE32F-NEXT: andi a3, a2, 1
194 ; RV64ZVE32F-NEXT: beqz a3, .LBB3_2
195 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
196 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
197 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
198 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
199 ; RV64ZVE32F-NEXT: .LBB3_2: # %else
200 ; RV64ZVE32F-NEXT: andi a2, a2, 2
201 ; RV64ZVE32F-NEXT: beqz a2, .LBB3_4
202 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
203 ; RV64ZVE32F-NEXT: lbu a0, 0(a1)
204 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma
205 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
206 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
207 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
208 ; RV64ZVE32F-NEXT: .LBB3_4: # %else2
209 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
210 ; RV64ZVE32F-NEXT: vzext.vf2 v9, v8
211 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
212 ; RV64ZVE32F-NEXT: ret
213 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
214 %ev = zext <2 x i8> %v to <2 x i16>
218 define <2 x i32> @mgather_v2i8_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
219 ; RV32V-LABEL: mgather_v2i8_sextload_v2i32:
221 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
222 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
223 ; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
224 ; RV32V-NEXT: vsext.vf4 v8, v9
227 ; RV64V-LABEL: mgather_v2i8_sextload_v2i32:
229 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
230 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
231 ; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
232 ; RV64V-NEXT: vsext.vf4 v8, v9
235 ; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i32:
236 ; RV32ZVE32F: # %bb.0:
237 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
238 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
239 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
240 ; RV32ZVE32F-NEXT: vsext.vf4 v8, v9
241 ; RV32ZVE32F-NEXT: ret
243 ; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i32:
244 ; RV64ZVE32F: # %bb.0:
245 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
246 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
247 ; RV64ZVE32F-NEXT: andi a3, a2, 1
248 ; RV64ZVE32F-NEXT: beqz a3, .LBB4_2
249 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
250 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
251 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
252 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
253 ; RV64ZVE32F-NEXT: .LBB4_2: # %else
254 ; RV64ZVE32F-NEXT: andi a2, a2, 2
255 ; RV64ZVE32F-NEXT: beqz a2, .LBB4_4
256 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
257 ; RV64ZVE32F-NEXT: lbu a0, 0(a1)
258 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma
259 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
260 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
261 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
262 ; RV64ZVE32F-NEXT: .LBB4_4: # %else2
263 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
264 ; RV64ZVE32F-NEXT: vsext.vf4 v9, v8
265 ; RV64ZVE32F-NEXT: vmv.v.v v8, v9
266 ; RV64ZVE32F-NEXT: ret
267 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
268 %ev = sext <2 x i8> %v to <2 x i32>
272 define <2 x i32> @mgather_v2i8_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
273 ; RV32V-LABEL: mgather_v2i8_zextload_v2i32:
275 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
276 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
277 ; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
278 ; RV32V-NEXT: vzext.vf4 v8, v9
281 ; RV64V-LABEL: mgather_v2i8_zextload_v2i32:
283 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
284 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
285 ; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
286 ; RV64V-NEXT: vzext.vf4 v8, v9
289 ; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i32:
290 ; RV32ZVE32F: # %bb.0:
291 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
292 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
293 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
294 ; RV32ZVE32F-NEXT: vzext.vf4 v8, v9
295 ; RV32ZVE32F-NEXT: ret
297 ; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i32:
298 ; RV64ZVE32F: # %bb.0:
299 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
300 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
301 ; RV64ZVE32F-NEXT: andi a3, a2, 1
302 ; RV64ZVE32F-NEXT: beqz a3, .LBB5_2
303 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
304 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
305 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
306 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
307 ; RV64ZVE32F-NEXT: .LBB5_2: # %else
308 ; RV64ZVE32F-NEXT: andi a2, a2, 2
309 ; RV64ZVE32F-NEXT: beqz a2, .LBB5_4
310 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
311 ; RV64ZVE32F-NEXT: lbu a0, 0(a1)
312 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma
313 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
314 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
315 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
316 ; RV64ZVE32F-NEXT: .LBB5_4: # %else2
317 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
318 ; RV64ZVE32F-NEXT: vzext.vf4 v9, v8
319 ; RV64ZVE32F-NEXT: vmv.v.v v8, v9
320 ; RV64ZVE32F-NEXT: ret
321 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
322 %ev = zext <2 x i8> %v to <2 x i32>
326 define <2 x i64> @mgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
327 ; RV32V-LABEL: mgather_v2i8_sextload_v2i64:
329 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
330 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
331 ; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
332 ; RV32V-NEXT: vsext.vf8 v8, v9
335 ; RV64V-LABEL: mgather_v2i8_sextload_v2i64:
337 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
338 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
339 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
340 ; RV64V-NEXT: vsext.vf8 v8, v9
343 ; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i64:
344 ; RV32ZVE32F: # %bb.0:
345 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
346 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
347 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
348 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
349 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
350 ; RV32ZVE32F-NEXT: srai a3, a1, 31
351 ; RV32ZVE32F-NEXT: srai a4, a2, 31
352 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
353 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
354 ; RV32ZVE32F-NEXT: sw a2, 8(a0)
355 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
356 ; RV32ZVE32F-NEXT: ret
358 ; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i64:
359 ; RV64ZVE32F: # %bb.0:
360 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
361 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
362 ; RV64ZVE32F-NEXT: andi a3, a2, 1
363 ; RV64ZVE32F-NEXT: beqz a3, .LBB6_2
364 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
365 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
366 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
367 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
368 ; RV64ZVE32F-NEXT: .LBB6_2: # %else
369 ; RV64ZVE32F-NEXT: andi a2, a2, 2
370 ; RV64ZVE32F-NEXT: beqz a2, .LBB6_4
371 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
372 ; RV64ZVE32F-NEXT: lbu a0, 0(a1)
373 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma
374 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
375 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
376 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
377 ; RV64ZVE32F-NEXT: .LBB6_4: # %else2
378 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
379 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
380 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
381 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
382 ; RV64ZVE32F-NEXT: ret
383 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
384 %ev = sext <2 x i8> %v to <2 x i64>
388 define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
389 ; RV32V-LABEL: mgather_v2i8_zextload_v2i64:
391 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
392 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
393 ; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
394 ; RV32V-NEXT: vzext.vf8 v8, v9
397 ; RV64V-LABEL: mgather_v2i8_zextload_v2i64:
399 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
400 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
401 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
402 ; RV64V-NEXT: vzext.vf8 v8, v9
405 ; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i64:
406 ; RV32ZVE32F: # %bb.0:
407 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
408 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
409 ; RV32ZVE32F-NEXT: sw zero, 12(a0)
410 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
411 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
412 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
413 ; RV32ZVE32F-NEXT: andi a1, a1, 255
414 ; RV32ZVE32F-NEXT: andi a2, a2, 255
415 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
416 ; RV32ZVE32F-NEXT: sw zero, 4(a0)
417 ; RV32ZVE32F-NEXT: sw a2, 8(a0)
418 ; RV32ZVE32F-NEXT: ret
420 ; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i64:
421 ; RV64ZVE32F: # %bb.0:
422 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
423 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
424 ; RV64ZVE32F-NEXT: andi a3, a2, 1
425 ; RV64ZVE32F-NEXT: beqz a3, .LBB7_2
426 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
427 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
428 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
429 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
430 ; RV64ZVE32F-NEXT: .LBB7_2: # %else
431 ; RV64ZVE32F-NEXT: andi a2, a2, 2
432 ; RV64ZVE32F-NEXT: beqz a2, .LBB7_4
433 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
434 ; RV64ZVE32F-NEXT: lbu a0, 0(a1)
435 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma
436 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
437 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
438 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
439 ; RV64ZVE32F-NEXT: .LBB7_4: # %else2
440 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
441 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
442 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
443 ; RV64ZVE32F-NEXT: andi a0, a0, 255
444 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
445 ; RV64ZVE32F-NEXT: andi a1, a1, 255
446 ; RV64ZVE32F-NEXT: ret
447 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
448 %ev = zext <2 x i8> %v to <2 x i64>
452 declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>)
454 define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru) {
455 ; RV32-LABEL: mgather_v4i8:
457 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
458 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
459 ; RV32-NEXT: vmv1r.v v8, v9
462 ; RV64V-LABEL: mgather_v4i8:
464 ; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
465 ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
466 ; RV64V-NEXT: vmv1r.v v8, v10
469 ; RV64ZVE32F-LABEL: mgather_v4i8:
470 ; RV64ZVE32F: # %bb.0:
471 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
472 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
473 ; RV64ZVE32F-NEXT: andi a2, a1, 1
474 ; RV64ZVE32F-NEXT: bnez a2, .LBB8_5
475 ; RV64ZVE32F-NEXT: # %bb.1: # %else
476 ; RV64ZVE32F-NEXT: andi a2, a1, 2
477 ; RV64ZVE32F-NEXT: bnez a2, .LBB8_6
478 ; RV64ZVE32F-NEXT: .LBB8_2: # %else2
479 ; RV64ZVE32F-NEXT: andi a2, a1, 4
480 ; RV64ZVE32F-NEXT: bnez a2, .LBB8_7
481 ; RV64ZVE32F-NEXT: .LBB8_3: # %else5
482 ; RV64ZVE32F-NEXT: andi a1, a1, 8
483 ; RV64ZVE32F-NEXT: bnez a1, .LBB8_8
484 ; RV64ZVE32F-NEXT: .LBB8_4: # %else8
485 ; RV64ZVE32F-NEXT: ret
486 ; RV64ZVE32F-NEXT: .LBB8_5: # %cond.load
487 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
488 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
489 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
490 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
491 ; RV64ZVE32F-NEXT: andi a2, a1, 2
492 ; RV64ZVE32F-NEXT: beqz a2, .LBB8_2
493 ; RV64ZVE32F-NEXT: .LBB8_6: # %cond.load1
494 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
495 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
496 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma
497 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
498 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
499 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
500 ; RV64ZVE32F-NEXT: andi a2, a1, 4
501 ; RV64ZVE32F-NEXT: beqz a2, .LBB8_3
502 ; RV64ZVE32F-NEXT: .LBB8_7: # %cond.load4
503 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
504 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
505 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf4, tu, ma
506 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
507 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
508 ; RV64ZVE32F-NEXT: andi a1, a1, 8
509 ; RV64ZVE32F-NEXT: beqz a1, .LBB8_4
510 ; RV64ZVE32F-NEXT: .LBB8_8: # %cond.load7
511 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
512 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
513 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
514 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
515 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
516 ; RV64ZVE32F-NEXT: ret
517 %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %m, <4 x i8> %passthru)
521 define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) {
522 ; RV32-LABEL: mgather_truemask_v4i8:
524 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
525 ; RV32-NEXT: vluxei32.v v9, (zero), v8
526 ; RV32-NEXT: vmv1r.v v8, v9
529 ; RV64V-LABEL: mgather_truemask_v4i8:
531 ; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
532 ; RV64V-NEXT: vluxei64.v v10, (zero), v8
533 ; RV64V-NEXT: vmv1r.v v8, v10
536 ; RV64ZVE32F-LABEL: mgather_truemask_v4i8:
537 ; RV64ZVE32F: # %bb.0:
538 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
539 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
540 ; RV64ZVE32F-NEXT: ld a3, 16(a0)
541 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
542 ; RV64ZVE32F-NEXT: lbu a1, 0(a1)
543 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
544 ; RV64ZVE32F-NEXT: lbu a3, 0(a3)
545 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
546 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
547 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1
548 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
549 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
550 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
551 ; RV64ZVE32F-NEXT: ret
552 %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1), <4 x i8> %passthru)
556 define <4 x i8> @mgather_falsemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) {
557 ; RV32-LABEL: mgather_falsemask_v4i8:
559 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
560 ; RV32-NEXT: vmv1r.v v8, v9
563 ; RV64V-LABEL: mgather_falsemask_v4i8:
565 ; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
566 ; RV64V-NEXT: vmv1r.v v8, v10
569 ; RV64ZVE32F-LABEL: mgather_falsemask_v4i8:
570 ; RV64ZVE32F: # %bb.0:
571 ; RV64ZVE32F-NEXT: ret
572 %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> zeroinitializer, <4 x i8> %passthru)
576 declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>)
578 define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) {
579 ; RV32-LABEL: mgather_v8i8:
581 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
582 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
583 ; RV32-NEXT: vmv1r.v v8, v10
586 ; RV64V-LABEL: mgather_v8i8:
588 ; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
589 ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
590 ; RV64V-NEXT: vmv1r.v v8, v12
593 ; RV64ZVE32F-LABEL: mgather_v8i8:
594 ; RV64ZVE32F: # %bb.0:
595 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
596 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
597 ; RV64ZVE32F-NEXT: andi a2, a1, 1
598 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_9
599 ; RV64ZVE32F-NEXT: # %bb.1: # %else
600 ; RV64ZVE32F-NEXT: andi a2, a1, 2
601 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_10
602 ; RV64ZVE32F-NEXT: .LBB11_2: # %else2
603 ; RV64ZVE32F-NEXT: andi a2, a1, 4
604 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_11
605 ; RV64ZVE32F-NEXT: .LBB11_3: # %else5
606 ; RV64ZVE32F-NEXT: andi a2, a1, 8
607 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_12
608 ; RV64ZVE32F-NEXT: .LBB11_4: # %else8
609 ; RV64ZVE32F-NEXT: andi a2, a1, 16
610 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_13
611 ; RV64ZVE32F-NEXT: .LBB11_5: # %else11
612 ; RV64ZVE32F-NEXT: andi a2, a1, 32
613 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_14
614 ; RV64ZVE32F-NEXT: .LBB11_6: # %else14
615 ; RV64ZVE32F-NEXT: andi a2, a1, 64
616 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_15
617 ; RV64ZVE32F-NEXT: .LBB11_7: # %else17
618 ; RV64ZVE32F-NEXT: andi a1, a1, -128
619 ; RV64ZVE32F-NEXT: bnez a1, .LBB11_16
620 ; RV64ZVE32F-NEXT: .LBB11_8: # %else20
621 ; RV64ZVE32F-NEXT: ret
622 ; RV64ZVE32F-NEXT: .LBB11_9: # %cond.load
623 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
624 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
625 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
626 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
627 ; RV64ZVE32F-NEXT: andi a2, a1, 2
628 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_2
629 ; RV64ZVE32F-NEXT: .LBB11_10: # %cond.load1
630 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
631 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
632 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma
633 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
634 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
635 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
636 ; RV64ZVE32F-NEXT: andi a2, a1, 4
637 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_3
638 ; RV64ZVE32F-NEXT: .LBB11_11: # %cond.load4
639 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
640 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
641 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
642 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
643 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
644 ; RV64ZVE32F-NEXT: andi a2, a1, 8
645 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_4
646 ; RV64ZVE32F-NEXT: .LBB11_12: # %cond.load7
647 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
648 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
649 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
650 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
651 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
652 ; RV64ZVE32F-NEXT: andi a2, a1, 16
653 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_5
654 ; RV64ZVE32F-NEXT: .LBB11_13: # %cond.load10
655 ; RV64ZVE32F-NEXT: ld a2, 32(a0)
656 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
657 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
658 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
659 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
660 ; RV64ZVE32F-NEXT: andi a2, a1, 32
661 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_6
662 ; RV64ZVE32F-NEXT: .LBB11_14: # %cond.load13
663 ; RV64ZVE32F-NEXT: ld a2, 40(a0)
664 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
665 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
666 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
667 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
668 ; RV64ZVE32F-NEXT: andi a2, a1, 64
669 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_7
670 ; RV64ZVE32F-NEXT: .LBB11_15: # %cond.load16
671 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
672 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
673 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma
674 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
675 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
676 ; RV64ZVE32F-NEXT: andi a1, a1, -128
677 ; RV64ZVE32F-NEXT: beqz a1, .LBB11_8
678 ; RV64ZVE32F-NEXT: .LBB11_16: # %cond.load19
679 ; RV64ZVE32F-NEXT: ld a0, 56(a0)
680 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
681 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
682 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
683 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
684 ; RV64ZVE32F-NEXT: ret
685 %v = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %m, <8 x i8> %passthru)
689 define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i8> %passthru) {
690 ; RV32-LABEL: mgather_baseidx_v8i8:
692 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
693 ; RV32-NEXT: vsext.vf4 v10, v8
694 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
695 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
696 ; RV32-NEXT: vmv1r.v v8, v9
699 ; RV64V-LABEL: mgather_baseidx_v8i8:
701 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
702 ; RV64V-NEXT: vsext.vf8 v12, v8
703 ; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
704 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
705 ; RV64V-NEXT: vmv1r.v v8, v9
708 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8:
709 ; RV64ZVE32F: # %bb.0:
710 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
711 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
712 ; RV64ZVE32F-NEXT: andi a2, a1, 1
713 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_2
714 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
715 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
716 ; RV64ZVE32F-NEXT: add a2, a0, a2
717 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
718 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma
719 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
720 ; RV64ZVE32F-NEXT: .LBB12_2: # %else
721 ; RV64ZVE32F-NEXT: andi a2, a1, 2
722 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_4
723 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
724 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
725 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
726 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
727 ; RV64ZVE32F-NEXT: add a2, a0, a2
728 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
729 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
730 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
731 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
732 ; RV64ZVE32F-NEXT: .LBB12_4: # %else2
733 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
734 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
735 ; RV64ZVE32F-NEXT: andi a2, a1, 4
736 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
737 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
738 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_14
739 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
740 ; RV64ZVE32F-NEXT: andi a2, a1, 8
741 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_15
742 ; RV64ZVE32F-NEXT: .LBB12_6: # %else8
743 ; RV64ZVE32F-NEXT: andi a2, a1, 16
744 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_16
745 ; RV64ZVE32F-NEXT: .LBB12_7: # %else11
746 ; RV64ZVE32F-NEXT: andi a2, a1, 32
747 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_9
748 ; RV64ZVE32F-NEXT: .LBB12_8: # %cond.load13
749 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
750 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
751 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
752 ; RV64ZVE32F-NEXT: add a2, a0, a2
753 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
754 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
755 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
756 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
757 ; RV64ZVE32F-NEXT: .LBB12_9: # %else14
758 ; RV64ZVE32F-NEXT: andi a2, a1, 64
759 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
760 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
761 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_11
762 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
763 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
764 ; RV64ZVE32F-NEXT: add a2, a0, a2
765 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
766 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
767 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma
768 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
769 ; RV64ZVE32F-NEXT: .LBB12_11: # %else17
770 ; RV64ZVE32F-NEXT: andi a1, a1, -128
771 ; RV64ZVE32F-NEXT: beqz a1, .LBB12_13
772 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
773 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
774 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
775 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
776 ; RV64ZVE32F-NEXT: add a0, a0, a1
777 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
778 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
779 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
780 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
781 ; RV64ZVE32F-NEXT: .LBB12_13: # %else20
782 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
783 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
784 ; RV64ZVE32F-NEXT: ret
785 ; RV64ZVE32F-NEXT: .LBB12_14: # %cond.load4
786 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
787 ; RV64ZVE32F-NEXT: add a2, a0, a2
788 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
789 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
790 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
791 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
792 ; RV64ZVE32F-NEXT: andi a2, a1, 8
793 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_6
794 ; RV64ZVE32F-NEXT: .LBB12_15: # %cond.load7
795 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
796 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
797 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
798 ; RV64ZVE32F-NEXT: add a2, a0, a2
799 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
800 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
801 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
802 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
803 ; RV64ZVE32F-NEXT: andi a2, a1, 16
804 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_7
805 ; RV64ZVE32F-NEXT: .LBB12_16: # %cond.load10
806 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
807 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
808 ; RV64ZVE32F-NEXT: add a2, a0, a2
809 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
810 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
811 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
812 ; RV64ZVE32F-NEXT: andi a2, a1, 32
813 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_8
814 ; RV64ZVE32F-NEXT: j .LBB12_9
815 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
816 %v = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %m, <8 x i8> %passthru)
820 declare <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i16>)
822 define <1 x i16> @mgather_v1i16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i16> %passthru) {
823 ; RV32V-LABEL: mgather_v1i16:
825 ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
826 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
827 ; RV32V-NEXT: vmv1r.v v8, v9
830 ; RV64V-LABEL: mgather_v1i16:
832 ; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
833 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
834 ; RV64V-NEXT: vmv1r.v v8, v9
837 ; RV32ZVE32F-LABEL: mgather_v1i16:
838 ; RV32ZVE32F: # %bb.0:
839 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
840 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
841 ; RV32ZVE32F-NEXT: vmv1r.v v8, v9
842 ; RV32ZVE32F-NEXT: ret
844 ; RV64ZVE32F-LABEL: mgather_v1i16:
845 ; RV64ZVE32F: # %bb.0:
846 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
847 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
848 ; RV64ZVE32F-NEXT: bnez a1, .LBB13_2
849 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
850 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
851 ; RV64ZVE32F-NEXT: vle16.v v8, (a0)
852 ; RV64ZVE32F-NEXT: .LBB13_2: # %else
853 ; RV64ZVE32F-NEXT: ret
854 %v = call <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x i16> %passthru)
858 declare <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i16>)
860 define <2 x i16> @mgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
861 ; RV32V-LABEL: mgather_v2i16:
863 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
864 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
865 ; RV32V-NEXT: vmv1r.v v8, v9
868 ; RV64V-LABEL: mgather_v2i16:
870 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
871 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
872 ; RV64V-NEXT: vmv1r.v v8, v9
875 ; RV32ZVE32F-LABEL: mgather_v2i16:
876 ; RV32ZVE32F: # %bb.0:
877 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
878 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
879 ; RV32ZVE32F-NEXT: vmv1r.v v8, v9
880 ; RV32ZVE32F-NEXT: ret
882 ; RV64ZVE32F-LABEL: mgather_v2i16:
883 ; RV64ZVE32F: # %bb.0:
884 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
885 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
886 ; RV64ZVE32F-NEXT: andi a3, a2, 1
887 ; RV64ZVE32F-NEXT: bnez a3, .LBB14_3
888 ; RV64ZVE32F-NEXT: # %bb.1: # %else
889 ; RV64ZVE32F-NEXT: andi a2, a2, 2
890 ; RV64ZVE32F-NEXT: bnez a2, .LBB14_4
891 ; RV64ZVE32F-NEXT: .LBB14_2: # %else2
892 ; RV64ZVE32F-NEXT: ret
893 ; RV64ZVE32F-NEXT: .LBB14_3: # %cond.load
894 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
895 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
896 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
897 ; RV64ZVE32F-NEXT: andi a2, a2, 2
898 ; RV64ZVE32F-NEXT: beqz a2, .LBB14_2
899 ; RV64ZVE32F-NEXT: .LBB14_4: # %cond.load1
900 ; RV64ZVE32F-NEXT: lh a0, 0(a1)
901 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
902 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
903 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
904 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
905 ; RV64ZVE32F-NEXT: ret
906 %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
910 define <2 x i32> @mgather_v2i16_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
911 ; RV32V-LABEL: mgather_v2i16_sextload_v2i32:
913 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
914 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
915 ; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
916 ; RV32V-NEXT: vsext.vf2 v8, v9
919 ; RV64V-LABEL: mgather_v2i16_sextload_v2i32:
921 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
922 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
923 ; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
924 ; RV64V-NEXT: vsext.vf2 v8, v9
927 ; RV32ZVE32F-LABEL: mgather_v2i16_sextload_v2i32:
928 ; RV32ZVE32F: # %bb.0:
929 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
930 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
931 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
932 ; RV32ZVE32F-NEXT: vsext.vf2 v8, v9
933 ; RV32ZVE32F-NEXT: ret
935 ; RV64ZVE32F-LABEL: mgather_v2i16_sextload_v2i32:
936 ; RV64ZVE32F: # %bb.0:
937 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
938 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
939 ; RV64ZVE32F-NEXT: andi a3, a2, 1
940 ; RV64ZVE32F-NEXT: beqz a3, .LBB15_2
941 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
942 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
943 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
944 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
945 ; RV64ZVE32F-NEXT: .LBB15_2: # %else
946 ; RV64ZVE32F-NEXT: andi a2, a2, 2
947 ; RV64ZVE32F-NEXT: beqz a2, .LBB15_4
948 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
949 ; RV64ZVE32F-NEXT: lh a0, 0(a1)
950 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
951 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
952 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
953 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
954 ; RV64ZVE32F-NEXT: .LBB15_4: # %else2
955 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
956 ; RV64ZVE32F-NEXT: vsext.vf2 v9, v8
957 ; RV64ZVE32F-NEXT: vmv.v.v v8, v9
958 ; RV64ZVE32F-NEXT: ret
959 %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
960 %ev = sext <2 x i16> %v to <2 x i32>
964 define <2 x i32> @mgather_v2i16_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
965 ; RV32V-LABEL: mgather_v2i16_zextload_v2i32:
967 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
968 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
969 ; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
970 ; RV32V-NEXT: vzext.vf2 v8, v9
973 ; RV64V-LABEL: mgather_v2i16_zextload_v2i32:
975 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
976 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
977 ; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
978 ; RV64V-NEXT: vzext.vf2 v8, v9
981 ; RV32ZVE32F-LABEL: mgather_v2i16_zextload_v2i32:
982 ; RV32ZVE32F: # %bb.0:
983 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
984 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
985 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
986 ; RV32ZVE32F-NEXT: vzext.vf2 v8, v9
987 ; RV32ZVE32F-NEXT: ret
989 ; RV64ZVE32F-LABEL: mgather_v2i16_zextload_v2i32:
990 ; RV64ZVE32F: # %bb.0:
991 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
992 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
993 ; RV64ZVE32F-NEXT: andi a3, a2, 1
994 ; RV64ZVE32F-NEXT: beqz a3, .LBB16_2
995 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
996 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
997 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
998 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
999 ; RV64ZVE32F-NEXT: .LBB16_2: # %else
1000 ; RV64ZVE32F-NEXT: andi a2, a2, 2
1001 ; RV64ZVE32F-NEXT: beqz a2, .LBB16_4
1002 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
1003 ; RV64ZVE32F-NEXT: lh a0, 0(a1)
1004 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1005 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
1006 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1007 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
1008 ; RV64ZVE32F-NEXT: .LBB16_4: # %else2
1009 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1010 ; RV64ZVE32F-NEXT: vzext.vf2 v9, v8
1011 ; RV64ZVE32F-NEXT: vmv.v.v v8, v9
1012 ; RV64ZVE32F-NEXT: ret
1013 %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
1014 %ev = zext <2 x i16> %v to <2 x i32>
1018 define <2 x i64> @mgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
1019 ; RV32V-LABEL: mgather_v2i16_sextload_v2i64:
1021 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
1022 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
1023 ; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1024 ; RV32V-NEXT: vsext.vf4 v8, v9
1027 ; RV64V-LABEL: mgather_v2i16_sextload_v2i64:
1029 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
1030 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
1031 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1032 ; RV64V-NEXT: vsext.vf4 v8, v9
1035 ; RV32ZVE32F-LABEL: mgather_v2i16_sextload_v2i64:
1036 ; RV32ZVE32F: # %bb.0:
1037 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
1038 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
1039 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
1040 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
1041 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
1042 ; RV32ZVE32F-NEXT: srai a3, a1, 31
1043 ; RV32ZVE32F-NEXT: srai a4, a2, 31
1044 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
1045 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
1046 ; RV32ZVE32F-NEXT: sw a2, 8(a0)
1047 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
1048 ; RV32ZVE32F-NEXT: ret
1050 ; RV64ZVE32F-LABEL: mgather_v2i16_sextload_v2i64:
1051 ; RV64ZVE32F: # %bb.0:
1052 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1053 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
1054 ; RV64ZVE32F-NEXT: andi a3, a2, 1
1055 ; RV64ZVE32F-NEXT: beqz a3, .LBB17_2
1056 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
1057 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1058 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
1059 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
1060 ; RV64ZVE32F-NEXT: .LBB17_2: # %else
1061 ; RV64ZVE32F-NEXT: andi a2, a2, 2
1062 ; RV64ZVE32F-NEXT: beqz a2, .LBB17_4
1063 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
1064 ; RV64ZVE32F-NEXT: lh a0, 0(a1)
1065 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1066 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
1067 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1068 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
1069 ; RV64ZVE32F-NEXT: .LBB17_4: # %else2
1070 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1071 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
1072 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1073 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
1074 ; RV64ZVE32F-NEXT: ret
1075 %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
1076 %ev = sext <2 x i16> %v to <2 x i64>
1080 define <2 x i64> @mgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
1081 ; RV32V-LABEL: mgather_v2i16_zextload_v2i64:
1083 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
1084 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
1085 ; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1086 ; RV32V-NEXT: vzext.vf4 v8, v9
1089 ; RV64V-LABEL: mgather_v2i16_zextload_v2i64:
1091 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
1092 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
1093 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1094 ; RV64V-NEXT: vzext.vf4 v8, v9
1097 ; RV32ZVE32F-LABEL: mgather_v2i16_zextload_v2i64:
1098 ; RV32ZVE32F: # %bb.0:
1099 ; RV32ZVE32F-NEXT: lui a1, 16
1100 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
1101 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
1102 ; RV32ZVE32F-NEXT: addi a1, a1, -1
1103 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
1104 ; RV32ZVE32F-NEXT: vmv.x.s a2, v9
1105 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
1106 ; RV32ZVE32F-NEXT: and a2, a2, a1
1107 ; RV32ZVE32F-NEXT: and a1, a3, a1
1108 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
1109 ; RV32ZVE32F-NEXT: sw zero, 4(a0)
1110 ; RV32ZVE32F-NEXT: sw a1, 8(a0)
1111 ; RV32ZVE32F-NEXT: sw zero, 12(a0)
1112 ; RV32ZVE32F-NEXT: ret
1114 ; RV64ZVE32F-LABEL: mgather_v2i16_zextload_v2i64:
1115 ; RV64ZVE32F: # %bb.0:
1116 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1117 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
1118 ; RV64ZVE32F-NEXT: andi a3, a2, 1
1119 ; RV64ZVE32F-NEXT: beqz a3, .LBB18_2
1120 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
1121 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1122 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
1123 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
1124 ; RV64ZVE32F-NEXT: .LBB18_2: # %else
1125 ; RV64ZVE32F-NEXT: andi a2, a2, 2
1126 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_4
1127 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
1128 ; RV64ZVE32F-NEXT: lh a0, 0(a1)
1129 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1130 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
1131 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1132 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
1133 ; RV64ZVE32F-NEXT: .LBB18_4: # %else2
1134 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1135 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
1136 ; RV64ZVE32F-NEXT: lui a1, 16
1137 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1138 ; RV64ZVE32F-NEXT: addiw a1, a1, -1
1139 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1140 ; RV64ZVE32F-NEXT: and a0, a0, a1
1141 ; RV64ZVE32F-NEXT: and a1, a2, a1
1142 ; RV64ZVE32F-NEXT: ret
1143 %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
1144 %ev = zext <2 x i16> %v to <2 x i64>
1148 declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
1150 define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthru) {
1151 ; RV32-LABEL: mgather_v4i16:
1153 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
1154 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
1155 ; RV32-NEXT: vmv1r.v v8, v9
1158 ; RV64V-LABEL: mgather_v4i16:
1160 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
1161 ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
1162 ; RV64V-NEXT: vmv1r.v v8, v10
1165 ; RV64ZVE32F-LABEL: mgather_v4i16:
1166 ; RV64ZVE32F: # %bb.0:
1167 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1168 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1169 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1170 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_5
1171 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1172 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1173 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_6
1174 ; RV64ZVE32F-NEXT: .LBB19_2: # %else2
1175 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1176 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_7
1177 ; RV64ZVE32F-NEXT: .LBB19_3: # %else5
1178 ; RV64ZVE32F-NEXT: andi a1, a1, 8
1179 ; RV64ZVE32F-NEXT: bnez a1, .LBB19_8
1180 ; RV64ZVE32F-NEXT: .LBB19_4: # %else8
1181 ; RV64ZVE32F-NEXT: ret
1182 ; RV64ZVE32F-NEXT: .LBB19_5: # %cond.load
1183 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
1184 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1185 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
1186 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1187 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1188 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_2
1189 ; RV64ZVE32F-NEXT: .LBB19_6: # %cond.load1
1190 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
1191 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1192 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1193 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1194 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
1195 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
1196 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1197 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_3
1198 ; RV64ZVE32F-NEXT: .LBB19_7: # %cond.load4
1199 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
1200 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1201 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
1202 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1203 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
1204 ; RV64ZVE32F-NEXT: andi a1, a1, 8
1205 ; RV64ZVE32F-NEXT: beqz a1, .LBB19_4
1206 ; RV64ZVE32F-NEXT: .LBB19_8: # %cond.load7
1207 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
1208 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1209 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1210 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
1211 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
1212 ; RV64ZVE32F-NEXT: ret
1213 %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x i16> %passthru)
1217 define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) {
1218 ; RV32-LABEL: mgather_truemask_v4i16:
1220 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1221 ; RV32-NEXT: vluxei32.v v9, (zero), v8
1222 ; RV32-NEXT: vmv1r.v v8, v9
1225 ; RV64V-LABEL: mgather_truemask_v4i16:
1227 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1228 ; RV64V-NEXT: vluxei64.v v10, (zero), v8
1229 ; RV64V-NEXT: vmv1r.v v8, v10
1232 ; RV64ZVE32F-LABEL: mgather_truemask_v4i16:
1233 ; RV64ZVE32F: # %bb.0:
1234 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
1235 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
1236 ; RV64ZVE32F-NEXT: ld a3, 16(a0)
1237 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
1238 ; RV64ZVE32F-NEXT: lh a1, 0(a1)
1239 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1240 ; RV64ZVE32F-NEXT: lh a3, 0(a3)
1241 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1242 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1243 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1
1244 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
1245 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
1246 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
1247 ; RV64ZVE32F-NEXT: ret
1248 %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x i16> %passthru)
1252 define <4 x i16> @mgather_falsemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) {
1253 ; RV32-LABEL: mgather_falsemask_v4i16:
1255 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1256 ; RV32-NEXT: vmv1r.v v8, v9
1259 ; RV64V-LABEL: mgather_falsemask_v4i16:
1261 ; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1262 ; RV64V-NEXT: vmv1r.v v8, v10
1265 ; RV64ZVE32F-LABEL: mgather_falsemask_v4i16:
1266 ; RV64ZVE32F: # %bb.0:
1267 ; RV64ZVE32F-NEXT: ret
1268 %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x i16> %passthru)
1272 declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>)
1274 define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthru) {
1275 ; RV32-LABEL: mgather_v8i16:
1277 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
1278 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
1279 ; RV32-NEXT: vmv.v.v v8, v10
1282 ; RV64V-LABEL: mgather_v8i16:
1284 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
1285 ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
1286 ; RV64V-NEXT: vmv.v.v v8, v12
1289 ; RV64ZVE32F-LABEL: mgather_v8i16:
1290 ; RV64ZVE32F: # %bb.0:
1291 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1292 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1293 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1294 ; RV64ZVE32F-NEXT: bnez a2, .LBB22_9
1295 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1296 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1297 ; RV64ZVE32F-NEXT: bnez a2, .LBB22_10
1298 ; RV64ZVE32F-NEXT: .LBB22_2: # %else2
1299 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1300 ; RV64ZVE32F-NEXT: bnez a2, .LBB22_11
1301 ; RV64ZVE32F-NEXT: .LBB22_3: # %else5
1302 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1303 ; RV64ZVE32F-NEXT: bnez a2, .LBB22_12
1304 ; RV64ZVE32F-NEXT: .LBB22_4: # %else8
1305 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1306 ; RV64ZVE32F-NEXT: bnez a2, .LBB22_13
1307 ; RV64ZVE32F-NEXT: .LBB22_5: # %else11
1308 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1309 ; RV64ZVE32F-NEXT: bnez a2, .LBB22_14
1310 ; RV64ZVE32F-NEXT: .LBB22_6: # %else14
1311 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1312 ; RV64ZVE32F-NEXT: bnez a2, .LBB22_15
1313 ; RV64ZVE32F-NEXT: .LBB22_7: # %else17
1314 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1315 ; RV64ZVE32F-NEXT: bnez a1, .LBB22_16
1316 ; RV64ZVE32F-NEXT: .LBB22_8: # %else20
1317 ; RV64ZVE32F-NEXT: ret
1318 ; RV64ZVE32F-NEXT: .LBB22_9: # %cond.load
1319 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
1320 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1321 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
1322 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1323 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1324 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_2
1325 ; RV64ZVE32F-NEXT: .LBB22_10: # %cond.load1
1326 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
1327 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1328 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1329 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1330 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
1331 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
1332 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1333 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_3
1334 ; RV64ZVE32F-NEXT: .LBB22_11: # %cond.load4
1335 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
1336 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1337 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
1338 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1339 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
1340 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1341 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_4
1342 ; RV64ZVE32F-NEXT: .LBB22_12: # %cond.load7
1343 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
1344 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1345 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
1346 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1347 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
1348 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1349 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_5
1350 ; RV64ZVE32F-NEXT: .LBB22_13: # %cond.load10
1351 ; RV64ZVE32F-NEXT: ld a2, 32(a0)
1352 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1353 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
1354 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1355 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
1356 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1357 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_6
1358 ; RV64ZVE32F-NEXT: .LBB22_14: # %cond.load13
1359 ; RV64ZVE32F-NEXT: ld a2, 40(a0)
1360 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1361 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
1362 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1363 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
1364 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1365 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_7
1366 ; RV64ZVE32F-NEXT: .LBB22_15: # %cond.load16
1367 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
1368 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1369 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
1370 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1371 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
1372 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1373 ; RV64ZVE32F-NEXT: beqz a1, .LBB22_8
1374 ; RV64ZVE32F-NEXT: .LBB22_16: # %cond.load19
1375 ; RV64ZVE32F-NEXT: ld a0, 56(a0)
1376 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1377 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1378 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
1379 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
1380 ; RV64ZVE32F-NEXT: ret
1381 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
1385 define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
1386 ; RV32-LABEL: mgather_baseidx_v8i8_v8i16:
1388 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1389 ; RV32-NEXT: vsext.vf4 v10, v8
1390 ; RV32-NEXT: vadd.vv v10, v10, v10
1391 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
1392 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
1393 ; RV32-NEXT: vmv.v.v v8, v9
1396 ; RV64V-LABEL: mgather_baseidx_v8i8_v8i16:
1398 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1399 ; RV64V-NEXT: vsext.vf8 v12, v8
1400 ; RV64V-NEXT: vadd.vv v12, v12, v12
1401 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
1402 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
1403 ; RV64V-NEXT: vmv.v.v v8, v9
1406 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i16:
1407 ; RV64ZVE32F: # %bb.0:
1408 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1409 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1410 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1411 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_2
1412 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
1413 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1414 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1415 ; RV64ZVE32F-NEXT: add a2, a0, a2
1416 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1417 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
1418 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1419 ; RV64ZVE32F-NEXT: .LBB23_2: # %else
1420 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1421 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_4
1422 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
1423 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1424 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1425 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1426 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1427 ; RV64ZVE32F-NEXT: add a2, a0, a2
1428 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1429 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1430 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
1431 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
1432 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
1433 ; RV64ZVE32F-NEXT: .LBB23_4: # %else2
1434 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1435 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
1436 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1437 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1438 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
1439 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_14
1440 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
1441 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1442 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_15
1443 ; RV64ZVE32F-NEXT: .LBB23_6: # %else8
1444 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1445 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_16
1446 ; RV64ZVE32F-NEXT: .LBB23_7: # %else11
1447 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1448 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_9
1449 ; RV64ZVE32F-NEXT: .LBB23_8: # %cond.load13
1450 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1451 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
1452 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1453 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1454 ; RV64ZVE32F-NEXT: add a2, a0, a2
1455 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1456 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1457 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1458 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
1459 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
1460 ; RV64ZVE32F-NEXT: .LBB23_9: # %else14
1461 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1462 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1463 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
1464 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_11
1465 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
1466 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1467 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1468 ; RV64ZVE32F-NEXT: add a2, a0, a2
1469 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1470 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1471 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
1472 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
1473 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
1474 ; RV64ZVE32F-NEXT: .LBB23_11: # %else17
1475 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1476 ; RV64ZVE32F-NEXT: beqz a1, .LBB23_13
1477 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
1478 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1479 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1480 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
1481 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1482 ; RV64ZVE32F-NEXT: add a0, a0, a1
1483 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1484 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1485 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
1486 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1487 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
1488 ; RV64ZVE32F-NEXT: .LBB23_13: # %else20
1489 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1490 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
1491 ; RV64ZVE32F-NEXT: ret
1492 ; RV64ZVE32F-NEXT: .LBB23_14: # %cond.load4
1493 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1494 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1495 ; RV64ZVE32F-NEXT: add a2, a0, a2
1496 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1497 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1498 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
1499 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
1500 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
1501 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1502 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_6
1503 ; RV64ZVE32F-NEXT: .LBB23_15: # %cond.load7
1504 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1505 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1506 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1507 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1508 ; RV64ZVE32F-NEXT: add a2, a0, a2
1509 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1510 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1511 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1512 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
1513 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
1514 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1515 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_7
1516 ; RV64ZVE32F-NEXT: .LBB23_16: # %cond.load10
1517 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1518 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1519 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1520 ; RV64ZVE32F-NEXT: add a2, a0, a2
1521 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1522 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1523 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1524 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
1525 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
1526 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1527 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_8
1528 ; RV64ZVE32F-NEXT: j .LBB23_9
1529 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
1530 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
1534 define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
1535 ; RV32-LABEL: mgather_baseidx_sext_v8i8_v8i16:
1537 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1538 ; RV32-NEXT: vsext.vf4 v10, v8
1539 ; RV32-NEXT: vadd.vv v10, v10, v10
1540 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
1541 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
1542 ; RV32-NEXT: vmv.v.v v8, v9
1545 ; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i16:
1547 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1548 ; RV64V-NEXT: vsext.vf8 v12, v8
1549 ; RV64V-NEXT: vadd.vv v12, v12, v12
1550 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
1551 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
1552 ; RV64V-NEXT: vmv.v.v v8, v9
1555 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i16:
1556 ; RV64ZVE32F: # %bb.0:
1557 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1558 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1559 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1560 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_2
1561 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
1562 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1563 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1564 ; RV64ZVE32F-NEXT: add a2, a0, a2
1565 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1566 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
1567 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1568 ; RV64ZVE32F-NEXT: .LBB24_2: # %else
1569 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1570 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_4
1571 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
1572 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1573 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1574 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1575 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1576 ; RV64ZVE32F-NEXT: add a2, a0, a2
1577 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1578 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1579 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
1580 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
1581 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
1582 ; RV64ZVE32F-NEXT: .LBB24_4: # %else2
1583 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1584 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
1585 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1586 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1587 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
1588 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_14
1589 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
1590 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1591 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_15
1592 ; RV64ZVE32F-NEXT: .LBB24_6: # %else8
1593 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1594 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_16
1595 ; RV64ZVE32F-NEXT: .LBB24_7: # %else11
1596 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1597 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_9
1598 ; RV64ZVE32F-NEXT: .LBB24_8: # %cond.load13
1599 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1600 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
1601 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1602 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1603 ; RV64ZVE32F-NEXT: add a2, a0, a2
1604 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1605 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1606 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1607 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
1608 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
1609 ; RV64ZVE32F-NEXT: .LBB24_9: # %else14
1610 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1611 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1612 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
1613 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_11
1614 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
1615 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1616 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1617 ; RV64ZVE32F-NEXT: add a2, a0, a2
1618 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1619 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1620 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
1621 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
1622 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
1623 ; RV64ZVE32F-NEXT: .LBB24_11: # %else17
1624 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1625 ; RV64ZVE32F-NEXT: beqz a1, .LBB24_13
1626 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
1627 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1628 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1629 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
1630 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1631 ; RV64ZVE32F-NEXT: add a0, a0, a1
1632 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1633 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1634 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
1635 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1636 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
1637 ; RV64ZVE32F-NEXT: .LBB24_13: # %else20
1638 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1639 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
1640 ; RV64ZVE32F-NEXT: ret
1641 ; RV64ZVE32F-NEXT: .LBB24_14: # %cond.load4
1642 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1643 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1644 ; RV64ZVE32F-NEXT: add a2, a0, a2
1645 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1646 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1647 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
1648 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
1649 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
1650 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1651 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_6
1652 ; RV64ZVE32F-NEXT: .LBB24_15: # %cond.load7
1653 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1654 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1655 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1656 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1657 ; RV64ZVE32F-NEXT: add a2, a0, a2
1658 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1659 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1660 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1661 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
1662 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
1663 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1664 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_7
1665 ; RV64ZVE32F-NEXT: .LBB24_16: # %cond.load10
1666 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1667 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1668 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1669 ; RV64ZVE32F-NEXT: add a2, a0, a2
1670 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1671 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1672 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1673 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
1674 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
1675 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1676 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_8
1677 ; RV64ZVE32F-NEXT: j .LBB24_9
1678 %eidxs = sext <8 x i8> %idxs to <8 x i16>
1679 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
1680 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
1684 define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
1685 ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i16:
1687 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1688 ; RV32-NEXT: vwaddu.vv v10, v8, v8
1689 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
1690 ; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t
1691 ; RV32-NEXT: vmv.v.v v8, v9
1694 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i16:
1696 ; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1697 ; RV64V-NEXT: vwaddu.vv v10, v8, v8
1698 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
1699 ; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t
1700 ; RV64V-NEXT: vmv.v.v v8, v9
1703 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i16:
1704 ; RV64ZVE32F: # %bb.0:
1705 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1706 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1707 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1708 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_2
1709 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
1710 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1711 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1712 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1713 ; RV64ZVE32F-NEXT: add a2, a0, a2
1714 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1715 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
1716 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1717 ; RV64ZVE32F-NEXT: .LBB25_2: # %else
1718 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1719 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_4
1720 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
1721 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1722 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1723 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1724 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1725 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1726 ; RV64ZVE32F-NEXT: add a2, a0, a2
1727 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1728 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1729 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
1730 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
1731 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
1732 ; RV64ZVE32F-NEXT: .LBB25_4: # %else2
1733 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1734 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
1735 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1736 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1737 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
1738 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_14
1739 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
1740 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1741 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_15
1742 ; RV64ZVE32F-NEXT: .LBB25_6: # %else8
1743 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1744 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_16
1745 ; RV64ZVE32F-NEXT: .LBB25_7: # %else11
1746 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1747 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_9
1748 ; RV64ZVE32F-NEXT: .LBB25_8: # %cond.load13
1749 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1750 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
1751 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1752 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1753 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1754 ; RV64ZVE32F-NEXT: add a2, a0, a2
1755 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1756 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1757 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1758 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
1759 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
1760 ; RV64ZVE32F-NEXT: .LBB25_9: # %else14
1761 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1762 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1763 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
1764 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_11
1765 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
1766 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1767 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1768 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1769 ; RV64ZVE32F-NEXT: add a2, a0, a2
1770 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1771 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1772 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
1773 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
1774 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
1775 ; RV64ZVE32F-NEXT: .LBB25_11: # %else17
1776 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1777 ; RV64ZVE32F-NEXT: beqz a1, .LBB25_13
1778 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
1779 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1780 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1781 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
1782 ; RV64ZVE32F-NEXT: andi a1, a1, 255
1783 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1784 ; RV64ZVE32F-NEXT: add a0, a0, a1
1785 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1786 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1787 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
1788 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1789 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
1790 ; RV64ZVE32F-NEXT: .LBB25_13: # %else20
1791 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1792 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
1793 ; RV64ZVE32F-NEXT: ret
1794 ; RV64ZVE32F-NEXT: .LBB25_14: # %cond.load4
1795 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1796 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1797 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1798 ; RV64ZVE32F-NEXT: add a2, a0, a2
1799 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1800 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1801 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
1802 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
1803 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
1804 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1805 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_6
1806 ; RV64ZVE32F-NEXT: .LBB25_15: # %cond.load7
1807 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1808 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1809 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1810 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1811 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1812 ; RV64ZVE32F-NEXT: add a2, a0, a2
1813 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1814 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1815 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1816 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
1817 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
1818 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1819 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_7
1820 ; RV64ZVE32F-NEXT: .LBB25_16: # %cond.load10
1821 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1822 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1823 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1824 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1825 ; RV64ZVE32F-NEXT: add a2, a0, a2
1826 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1827 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1828 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1829 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
1830 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
1831 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1832 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_8
1833 ; RV64ZVE32F-NEXT: j .LBB25_9
1834 %eidxs = zext <8 x i8> %idxs to <8 x i16>
1835 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
1836 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
1840 define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
1841 ; RV32-LABEL: mgather_baseidx_v8i16:
1843 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
1844 ; RV32-NEXT: vwadd.vv v10, v8, v8
1845 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
1846 ; RV32-NEXT: vmv.v.v v8, v9
1849 ; RV64V-LABEL: mgather_baseidx_v8i16:
1851 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1852 ; RV64V-NEXT: vsext.vf4 v12, v8
1853 ; RV64V-NEXT: vadd.vv v12, v12, v12
1854 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
1855 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
1856 ; RV64V-NEXT: vmv.v.v v8, v9
1859 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i16:
1860 ; RV64ZVE32F: # %bb.0:
1861 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1862 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1863 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1864 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_2
1865 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
1866 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
1867 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1868 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1869 ; RV64ZVE32F-NEXT: add a2, a0, a2
1870 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1871 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1872 ; RV64ZVE32F-NEXT: .LBB26_2: # %else
1873 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1874 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_4
1875 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
1876 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1877 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1878 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1879 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1880 ; RV64ZVE32F-NEXT: add a2, a0, a2
1881 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1882 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
1883 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
1884 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
1885 ; RV64ZVE32F-NEXT: .LBB26_4: # %else2
1886 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
1887 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
1888 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1889 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1890 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
1891 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_14
1892 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
1893 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1894 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_15
1895 ; RV64ZVE32F-NEXT: .LBB26_6: # %else8
1896 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1897 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_16
1898 ; RV64ZVE32F-NEXT: .LBB26_7: # %else11
1899 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1900 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_9
1901 ; RV64ZVE32F-NEXT: .LBB26_8: # %cond.load13
1902 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1903 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
1904 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1905 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1906 ; RV64ZVE32F-NEXT: add a2, a0, a2
1907 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1908 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1909 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
1910 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
1911 ; RV64ZVE32F-NEXT: .LBB26_9: # %else14
1912 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1913 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1914 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
1915 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_11
1916 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
1917 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1918 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1919 ; RV64ZVE32F-NEXT: add a2, a0, a2
1920 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1921 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
1922 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
1923 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
1924 ; RV64ZVE32F-NEXT: .LBB26_11: # %else17
1925 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1926 ; RV64ZVE32F-NEXT: beqz a1, .LBB26_13
1927 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
1928 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1929 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1930 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
1931 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1932 ; RV64ZVE32F-NEXT: add a0, a0, a1
1933 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1934 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
1935 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1936 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
1937 ; RV64ZVE32F-NEXT: .LBB26_13: # %else20
1938 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1939 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
1940 ; RV64ZVE32F-NEXT: ret
1941 ; RV64ZVE32F-NEXT: .LBB26_14: # %cond.load4
1942 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1943 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1944 ; RV64ZVE32F-NEXT: add a2, a0, a2
1945 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1946 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
1947 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
1948 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
1949 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1950 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_6
1951 ; RV64ZVE32F-NEXT: .LBB26_15: # %cond.load7
1952 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1953 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1954 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1955 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1956 ; RV64ZVE32F-NEXT: add a2, a0, a2
1957 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1958 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1959 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
1960 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
1961 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1962 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_7
1963 ; RV64ZVE32F-NEXT: .LBB26_16: # %cond.load10
1964 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
1965 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1966 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1967 ; RV64ZVE32F-NEXT: add a2, a0, a2
1968 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1969 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1970 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
1971 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1972 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_8
1973 ; RV64ZVE32F-NEXT: j .LBB26_9
1974 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
1975 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
1979 declare <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i32>)
1981 define <1 x i32> @mgather_v1i32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i32> %passthru) {
1982 ; RV32V-LABEL: mgather_v1i32:
1984 ; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
1985 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
1986 ; RV32V-NEXT: vmv1r.v v8, v9
1989 ; RV64V-LABEL: mgather_v1i32:
1991 ; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
1992 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
1993 ; RV64V-NEXT: vmv1r.v v8, v9
1996 ; RV32ZVE32F-LABEL: mgather_v1i32:
1997 ; RV32ZVE32F: # %bb.0:
1998 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
1999 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
2000 ; RV32ZVE32F-NEXT: vmv.v.v v8, v9
2001 ; RV32ZVE32F-NEXT: ret
2003 ; RV64ZVE32F-LABEL: mgather_v1i32:
2004 ; RV64ZVE32F: # %bb.0:
2005 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
2006 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
2007 ; RV64ZVE32F-NEXT: bnez a1, .LBB27_2
2008 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
2009 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2010 ; RV64ZVE32F-NEXT: vle32.v v8, (a0)
2011 ; RV64ZVE32F-NEXT: .LBB27_2: # %else
2012 ; RV64ZVE32F-NEXT: ret
2013 %v = call <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x i32> %passthru)
2017 declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>)
2019 define <2 x i32> @mgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) {
2020 ; RV32V-LABEL: mgather_v2i32:
2022 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
2023 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
2024 ; RV32V-NEXT: vmv1r.v v8, v9
2027 ; RV64V-LABEL: mgather_v2i32:
2029 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
2030 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
2031 ; RV64V-NEXT: vmv1r.v v8, v9
2034 ; RV32ZVE32F-LABEL: mgather_v2i32:
2035 ; RV32ZVE32F: # %bb.0:
2036 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
2037 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
2038 ; RV32ZVE32F-NEXT: vmv.v.v v8, v9
2039 ; RV32ZVE32F-NEXT: ret
2041 ; RV64ZVE32F-LABEL: mgather_v2i32:
2042 ; RV64ZVE32F: # %bb.0:
2043 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2044 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
2045 ; RV64ZVE32F-NEXT: andi a3, a2, 1
2046 ; RV64ZVE32F-NEXT: bnez a3, .LBB28_3
2047 ; RV64ZVE32F-NEXT: # %bb.1: # %else
2048 ; RV64ZVE32F-NEXT: andi a2, a2, 2
2049 ; RV64ZVE32F-NEXT: bnez a2, .LBB28_4
2050 ; RV64ZVE32F-NEXT: .LBB28_2: # %else2
2051 ; RV64ZVE32F-NEXT: ret
2052 ; RV64ZVE32F-NEXT: .LBB28_3: # %cond.load
2053 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2054 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
2055 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
2056 ; RV64ZVE32F-NEXT: andi a2, a2, 2
2057 ; RV64ZVE32F-NEXT: beqz a2, .LBB28_2
2058 ; RV64ZVE32F-NEXT: .LBB28_4: # %cond.load1
2059 ; RV64ZVE32F-NEXT: lw a0, 0(a1)
2060 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2061 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
2062 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
2063 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
2064 ; RV64ZVE32F-NEXT: ret
2065 %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru)
2069 define <2 x i64> @mgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) {
2070 ; RV32V-LABEL: mgather_v2i32_sextload_v2i64:
2072 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
2073 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
2074 ; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
2075 ; RV32V-NEXT: vsext.vf2 v8, v9
2078 ; RV64V-LABEL: mgather_v2i32_sextload_v2i64:
2080 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
2081 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
2082 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
2083 ; RV64V-NEXT: vsext.vf2 v8, v9
2086 ; RV32ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:
2087 ; RV32ZVE32F: # %bb.0:
2088 ; RV32ZVE32F-NEXT: addi a1, a0, 8
2089 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
2090 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
2091 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
2092 ; RV32ZVE32F-NEXT: vmv.x.s a2, v9
2093 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2094 ; RV32ZVE32F-NEXT: vse32.v v9, (a0)
2095 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
2096 ; RV32ZVE32F-NEXT: srai a2, a2, 31
2097 ; RV32ZVE32F-NEXT: vse32.v v8, (a1)
2098 ; RV32ZVE32F-NEXT: srai a3, a3, 31
2099 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
2100 ; RV32ZVE32F-NEXT: sw a3, 12(a0)
2101 ; RV32ZVE32F-NEXT: ret
2103 ; RV64ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:
2104 ; RV64ZVE32F: # %bb.0:
2105 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2106 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
2107 ; RV64ZVE32F-NEXT: andi a3, a2, 1
2108 ; RV64ZVE32F-NEXT: beqz a3, .LBB29_2
2109 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
2110 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2111 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
2112 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
2113 ; RV64ZVE32F-NEXT: .LBB29_2: # %else
2114 ; RV64ZVE32F-NEXT: andi a2, a2, 2
2115 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_4
2116 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
2117 ; RV64ZVE32F-NEXT: lw a0, 0(a1)
2118 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2119 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
2120 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
2121 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
2122 ; RV64ZVE32F-NEXT: .LBB29_4: # %else2
2123 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2124 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
2125 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2126 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
2127 ; RV64ZVE32F-NEXT: ret
2128 %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru)
2129 %ev = sext <2 x i32> %v to <2 x i64>
2133 define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) {
2134 ; RV32V-LABEL: mgather_v2i32_zextload_v2i64:
2136 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
2137 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
2138 ; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
2139 ; RV32V-NEXT: vzext.vf2 v8, v9
2142 ; RV64V-LABEL: mgather_v2i32_zextload_v2i64:
2144 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
2145 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
2146 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
2147 ; RV64V-NEXT: vzext.vf2 v8, v9
2150 ; RV32ZVE32F-LABEL: mgather_v2i32_zextload_v2i64:
2151 ; RV32ZVE32F: # %bb.0:
2152 ; RV32ZVE32F-NEXT: addi a1, a0, 8
2153 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
2154 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
2155 ; RV32ZVE32F-NEXT: sw zero, 4(a0)
2156 ; RV32ZVE32F-NEXT: sw zero, 12(a0)
2157 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2158 ; RV32ZVE32F-NEXT: vse32.v v9, (a0)
2159 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
2160 ; RV32ZVE32F-NEXT: vse32.v v8, (a1)
2161 ; RV32ZVE32F-NEXT: ret
2163 ; RV64ZVE32F-LABEL: mgather_v2i32_zextload_v2i64:
2164 ; RV64ZVE32F: # %bb.0:
2165 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2166 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
2167 ; RV64ZVE32F-NEXT: andi a3, a2, 1
2168 ; RV64ZVE32F-NEXT: beqz a3, .LBB30_2
2169 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
2170 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2171 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
2172 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
2173 ; RV64ZVE32F-NEXT: .LBB30_2: # %else
2174 ; RV64ZVE32F-NEXT: andi a2, a2, 2
2175 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_4
2176 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
2177 ; RV64ZVE32F-NEXT: lw a0, 0(a1)
2178 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2179 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
2180 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
2181 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
2182 ; RV64ZVE32F-NEXT: .LBB30_4: # %else2
2183 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2184 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
2185 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2186 ; RV64ZVE32F-NEXT: slli a0, a0, 32
2187 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
2188 ; RV64ZVE32F-NEXT: srli a0, a0, 32
2189 ; RV64ZVE32F-NEXT: slli a1, a1, 32
2190 ; RV64ZVE32F-NEXT: srli a1, a1, 32
2191 ; RV64ZVE32F-NEXT: ret
2192 %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru)
2193 %ev = zext <2 x i32> %v to <2 x i64>
2197 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
2199 define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthru) {
2200 ; RV32-LABEL: mgather_v4i32:
2202 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
2203 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
2204 ; RV32-NEXT: vmv.v.v v8, v9
2207 ; RV64V-LABEL: mgather_v4i32:
2209 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, mu
2210 ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
2211 ; RV64V-NEXT: vmv.v.v v8, v10
2214 ; RV64ZVE32F-LABEL: mgather_v4i32:
2215 ; RV64ZVE32F: # %bb.0:
2216 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2217 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2218 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2219 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_5
2220 ; RV64ZVE32F-NEXT: # %bb.1: # %else
2221 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2222 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_6
2223 ; RV64ZVE32F-NEXT: .LBB31_2: # %else2
2224 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2225 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_7
2226 ; RV64ZVE32F-NEXT: .LBB31_3: # %else5
2227 ; RV64ZVE32F-NEXT: andi a1, a1, 8
2228 ; RV64ZVE32F-NEXT: bnez a1, .LBB31_8
2229 ; RV64ZVE32F-NEXT: .LBB31_4: # %else8
2230 ; RV64ZVE32F-NEXT: ret
2231 ; RV64ZVE32F-NEXT: .LBB31_5: # %cond.load
2232 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
2233 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2234 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
2235 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2236 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2237 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_2
2238 ; RV64ZVE32F-NEXT: .LBB31_6: # %cond.load1
2239 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
2240 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2241 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2242 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
2243 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
2244 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
2245 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2246 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_3
2247 ; RV64ZVE32F-NEXT: .LBB31_7: # %cond.load4
2248 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
2249 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2250 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
2251 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
2252 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
2253 ; RV64ZVE32F-NEXT: andi a1, a1, 8
2254 ; RV64ZVE32F-NEXT: beqz a1, .LBB31_4
2255 ; RV64ZVE32F-NEXT: .LBB31_8: # %cond.load7
2256 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
2257 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2258 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2259 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
2260 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
2261 ; RV64ZVE32F-NEXT: ret
2262 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x i32> %passthru)
2266 define <4 x i32> @mgather_truemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) {
2267 ; RV32-LABEL: mgather_truemask_v4i32:
2269 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2270 ; RV32-NEXT: vluxei32.v v8, (zero), v8
2273 ; RV64V-LABEL: mgather_truemask_v4i32:
2275 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2276 ; RV64V-NEXT: vluxei64.v v10, (zero), v8
2277 ; RV64V-NEXT: vmv.v.v v8, v10
2280 ; RV64ZVE32F-LABEL: mgather_truemask_v4i32:
2281 ; RV64ZVE32F: # %bb.0:
2282 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
2283 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
2284 ; RV64ZVE32F-NEXT: ld a3, 16(a0)
2285 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
2286 ; RV64ZVE32F-NEXT: lw a1, 0(a1)
2287 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2288 ; RV64ZVE32F-NEXT: lw a3, 0(a3)
2289 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2290 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2291 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1
2292 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
2293 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
2294 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
2295 ; RV64ZVE32F-NEXT: ret
2296 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1), <4 x i32> %passthru)
2300 define <4 x i32> @mgather_falsemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) {
2301 ; RV32-LABEL: mgather_falsemask_v4i32:
2303 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2304 ; RV32-NEXT: vmv1r.v v8, v9
2307 ; RV64V-LABEL: mgather_falsemask_v4i32:
2309 ; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2310 ; RV64V-NEXT: vmv1r.v v8, v10
2313 ; RV64ZVE32F-LABEL: mgather_falsemask_v4i32:
2314 ; RV64ZVE32F: # %bb.0:
2315 ; RV64ZVE32F-NEXT: ret
2316 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer, <4 x i32> %passthru)
2320 declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>)
2322 define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthru) {
2323 ; RV32-LABEL: mgather_v8i32:
2325 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
2326 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
2327 ; RV32-NEXT: vmv.v.v v8, v10
2330 ; RV64V-LABEL: mgather_v8i32:
2332 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu
2333 ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
2334 ; RV64V-NEXT: vmv.v.v v8, v12
2337 ; RV64ZVE32F-LABEL: mgather_v8i32:
2338 ; RV64ZVE32F: # %bb.0:
2339 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2340 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2341 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2342 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_9
2343 ; RV64ZVE32F-NEXT: # %bb.1: # %else
2344 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2345 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_10
2346 ; RV64ZVE32F-NEXT: .LBB34_2: # %else2
2347 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2348 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_11
2349 ; RV64ZVE32F-NEXT: .LBB34_3: # %else5
2350 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2351 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_12
2352 ; RV64ZVE32F-NEXT: .LBB34_4: # %else8
2353 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2354 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_13
2355 ; RV64ZVE32F-NEXT: .LBB34_5: # %else11
2356 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2357 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_14
2358 ; RV64ZVE32F-NEXT: .LBB34_6: # %else14
2359 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2360 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_15
2361 ; RV64ZVE32F-NEXT: .LBB34_7: # %else17
2362 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2363 ; RV64ZVE32F-NEXT: bnez a1, .LBB34_16
2364 ; RV64ZVE32F-NEXT: .LBB34_8: # %else20
2365 ; RV64ZVE32F-NEXT: ret
2366 ; RV64ZVE32F-NEXT: .LBB34_9: # %cond.load
2367 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
2368 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2369 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
2370 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2371 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2372 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_2
2373 ; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1
2374 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
2375 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2376 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2377 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2378 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
2379 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1
2380 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2381 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_3
2382 ; RV64ZVE32F-NEXT: .LBB34_11: # %cond.load4
2383 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
2384 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2385 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
2386 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2387 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2
2388 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2389 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_4
2390 ; RV64ZVE32F-NEXT: .LBB34_12: # %cond.load7
2391 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
2392 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2393 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
2394 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2395 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3
2396 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2397 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_5
2398 ; RV64ZVE32F-NEXT: .LBB34_13: # %cond.load10
2399 ; RV64ZVE32F-NEXT: ld a2, 32(a0)
2400 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2401 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
2402 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2403 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4
2404 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2405 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_6
2406 ; RV64ZVE32F-NEXT: .LBB34_14: # %cond.load13
2407 ; RV64ZVE32F-NEXT: ld a2, 40(a0)
2408 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2409 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
2410 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2411 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5
2412 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2413 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_7
2414 ; RV64ZVE32F-NEXT: .LBB34_15: # %cond.load16
2415 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
2416 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2417 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
2418 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2419 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6
2420 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2421 ; RV64ZVE32F-NEXT: beqz a1, .LBB34_8
2422 ; RV64ZVE32F-NEXT: .LBB34_16: # %cond.load19
2423 ; RV64ZVE32F-NEXT: ld a0, 56(a0)
2424 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2425 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2426 ; RV64ZVE32F-NEXT: vmv.s.x v10, a0
2427 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7
2428 ; RV64ZVE32F-NEXT: ret
2429 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
2433 define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
2434 ; RV32-LABEL: mgather_baseidx_v8i8_v8i32:
2436 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
2437 ; RV32-NEXT: vsext.vf4 v12, v8
2438 ; RV32-NEXT: vsll.vi v8, v12, 2
2439 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
2440 ; RV32-NEXT: vmv.v.v v8, v10
2443 ; RV64V-LABEL: mgather_baseidx_v8i8_v8i32:
2445 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2446 ; RV64V-NEXT: vsext.vf8 v12, v8
2447 ; RV64V-NEXT: vsll.vi v12, v12, 2
2448 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
2449 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
2450 ; RV64V-NEXT: vmv.v.v v8, v10
2453 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i32:
2454 ; RV64ZVE32F: # %bb.0:
2455 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2456 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2457 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2458 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_2
2459 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
2460 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2461 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2462 ; RV64ZVE32F-NEXT: add a2, a0, a2
2463 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2464 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
2465 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2466 ; RV64ZVE32F-NEXT: .LBB35_2: # %else
2467 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2468 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_4
2469 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
2470 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2471 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
2472 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
2473 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2474 ; RV64ZVE32F-NEXT: add a2, a0, a2
2475 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2476 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2477 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
2478 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
2479 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
2480 ; RV64ZVE32F-NEXT: .LBB35_4: # %else2
2481 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
2482 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
2483 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2484 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2485 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
2486 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_14
2487 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
2488 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2489 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_15
2490 ; RV64ZVE32F-NEXT: .LBB35_6: # %else8
2491 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2492 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_16
2493 ; RV64ZVE32F-NEXT: .LBB35_7: # %else11
2494 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2495 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_9
2496 ; RV64ZVE32F-NEXT: .LBB35_8: # %cond.load13
2497 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2498 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
2499 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2500 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2501 ; RV64ZVE32F-NEXT: add a2, a0, a2
2502 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2503 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2504 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2505 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
2506 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
2507 ; RV64ZVE32F-NEXT: .LBB35_9: # %else14
2508 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2509 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2510 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
2511 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_11
2512 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
2513 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2514 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2515 ; RV64ZVE32F-NEXT: add a2, a0, a2
2516 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2517 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2518 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2519 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
2520 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
2521 ; RV64ZVE32F-NEXT: .LBB35_11: # %else17
2522 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2523 ; RV64ZVE32F-NEXT: beqz a1, .LBB35_13
2524 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
2525 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2526 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2527 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
2528 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2529 ; RV64ZVE32F-NEXT: add a0, a0, a1
2530 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2531 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2532 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
2533 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2534 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
2535 ; RV64ZVE32F-NEXT: .LBB35_13: # %else20
2536 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2537 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
2538 ; RV64ZVE32F-NEXT: ret
2539 ; RV64ZVE32F-NEXT: .LBB35_14: # %cond.load4
2540 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2541 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2542 ; RV64ZVE32F-NEXT: add a2, a0, a2
2543 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2544 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2545 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2546 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
2547 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
2548 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2549 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_6
2550 ; RV64ZVE32F-NEXT: .LBB35_15: # %cond.load7
2551 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2552 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2553 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2554 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2555 ; RV64ZVE32F-NEXT: add a2, a0, a2
2556 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2557 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2558 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2559 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
2560 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
2561 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2562 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_7
2563 ; RV64ZVE32F-NEXT: .LBB35_16: # %cond.load10
2564 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2565 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
2566 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2567 ; RV64ZVE32F-NEXT: add a2, a0, a2
2568 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2569 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2570 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2571 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
2572 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
2573 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2574 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_8
2575 ; RV64ZVE32F-NEXT: j .LBB35_9
2576 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
2577 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
2581 define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
2582 ; RV32-LABEL: mgather_baseidx_sext_v8i8_v8i32:
2584 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
2585 ; RV32-NEXT: vsext.vf4 v12, v8
2586 ; RV32-NEXT: vsll.vi v8, v12, 2
2587 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
2588 ; RV32-NEXT: vmv.v.v v8, v10
2591 ; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i32:
2593 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2594 ; RV64V-NEXT: vsext.vf8 v12, v8
2595 ; RV64V-NEXT: vsll.vi v12, v12, 2
2596 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
2597 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
2598 ; RV64V-NEXT: vmv.v.v v8, v10
2601 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i32:
2602 ; RV64ZVE32F: # %bb.0:
2603 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2604 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2605 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2606 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_2
2607 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
2608 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2609 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2610 ; RV64ZVE32F-NEXT: add a2, a0, a2
2611 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2612 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
2613 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2614 ; RV64ZVE32F-NEXT: .LBB36_2: # %else
2615 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2616 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_4
2617 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
2618 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2619 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
2620 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
2621 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2622 ; RV64ZVE32F-NEXT: add a2, a0, a2
2623 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2624 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2625 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
2626 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
2627 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
2628 ; RV64ZVE32F-NEXT: .LBB36_4: # %else2
2629 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
2630 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
2631 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2632 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2633 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
2634 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_14
2635 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
2636 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2637 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_15
2638 ; RV64ZVE32F-NEXT: .LBB36_6: # %else8
2639 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2640 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_16
2641 ; RV64ZVE32F-NEXT: .LBB36_7: # %else11
2642 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2643 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_9
2644 ; RV64ZVE32F-NEXT: .LBB36_8: # %cond.load13
2645 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2646 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
2647 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2648 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2649 ; RV64ZVE32F-NEXT: add a2, a0, a2
2650 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2651 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2652 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2653 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
2654 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
2655 ; RV64ZVE32F-NEXT: .LBB36_9: # %else14
2656 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2657 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2658 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
2659 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_11
2660 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
2661 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2662 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2663 ; RV64ZVE32F-NEXT: add a2, a0, a2
2664 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2665 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2666 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2667 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
2668 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
2669 ; RV64ZVE32F-NEXT: .LBB36_11: # %else17
2670 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2671 ; RV64ZVE32F-NEXT: beqz a1, .LBB36_13
2672 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
2673 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2674 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2675 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
2676 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2677 ; RV64ZVE32F-NEXT: add a0, a0, a1
2678 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2679 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2680 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
2681 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2682 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
2683 ; RV64ZVE32F-NEXT: .LBB36_13: # %else20
2684 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2685 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
2686 ; RV64ZVE32F-NEXT: ret
2687 ; RV64ZVE32F-NEXT: .LBB36_14: # %cond.load4
2688 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2689 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2690 ; RV64ZVE32F-NEXT: add a2, a0, a2
2691 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2692 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2693 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2694 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
2695 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
2696 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2697 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_6
2698 ; RV64ZVE32F-NEXT: .LBB36_15: # %cond.load7
2699 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2700 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2701 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2702 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2703 ; RV64ZVE32F-NEXT: add a2, a0, a2
2704 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2705 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2706 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2707 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
2708 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
2709 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2710 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_7
2711 ; RV64ZVE32F-NEXT: .LBB36_16: # %cond.load10
2712 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2713 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
2714 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2715 ; RV64ZVE32F-NEXT: add a2, a0, a2
2716 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2717 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2718 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2719 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
2720 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
2721 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2722 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_8
2723 ; RV64ZVE32F-NEXT: j .LBB36_9
2724 %eidxs = sext <8 x i8> %idxs to <8 x i32>
2725 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2726 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
2730 define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
2731 ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i32:
2733 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2734 ; RV32-NEXT: vzext.vf2 v9, v8
2735 ; RV32-NEXT: vsll.vi v8, v9, 2
2736 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
2737 ; RV32-NEXT: vluxei16.v v10, (a0), v8, v0.t
2738 ; RV32-NEXT: vmv.v.v v8, v10
2741 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i32:
2743 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2744 ; RV64V-NEXT: vzext.vf2 v9, v8
2745 ; RV64V-NEXT: vsll.vi v8, v9, 2
2746 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
2747 ; RV64V-NEXT: vluxei16.v v10, (a0), v8, v0.t
2748 ; RV64V-NEXT: vmv.v.v v8, v10
2751 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i32:
2752 ; RV64ZVE32F: # %bb.0:
2753 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2754 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2755 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2756 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_2
2757 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
2758 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2759 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2760 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2761 ; RV64ZVE32F-NEXT: add a2, a0, a2
2762 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2763 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
2764 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2765 ; RV64ZVE32F-NEXT: .LBB37_2: # %else
2766 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2767 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_4
2768 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
2769 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2770 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
2771 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
2772 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2773 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2774 ; RV64ZVE32F-NEXT: add a2, a0, a2
2775 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2776 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2777 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
2778 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
2779 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
2780 ; RV64ZVE32F-NEXT: .LBB37_4: # %else2
2781 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
2782 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
2783 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2784 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2785 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
2786 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_14
2787 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
2788 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2789 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_15
2790 ; RV64ZVE32F-NEXT: .LBB37_6: # %else8
2791 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2792 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_16
2793 ; RV64ZVE32F-NEXT: .LBB37_7: # %else11
2794 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2795 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_9
2796 ; RV64ZVE32F-NEXT: .LBB37_8: # %cond.load13
2797 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2798 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
2799 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2800 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2801 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2802 ; RV64ZVE32F-NEXT: add a2, a0, a2
2803 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2804 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2805 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2806 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
2807 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
2808 ; RV64ZVE32F-NEXT: .LBB37_9: # %else14
2809 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2810 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2811 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
2812 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_11
2813 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
2814 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2815 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2816 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2817 ; RV64ZVE32F-NEXT: add a2, a0, a2
2818 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2819 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2820 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2821 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
2822 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
2823 ; RV64ZVE32F-NEXT: .LBB37_11: # %else17
2824 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2825 ; RV64ZVE32F-NEXT: beqz a1, .LBB37_13
2826 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
2827 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2828 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2829 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
2830 ; RV64ZVE32F-NEXT: andi a1, a1, 255
2831 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2832 ; RV64ZVE32F-NEXT: add a0, a0, a1
2833 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2834 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2835 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
2836 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2837 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
2838 ; RV64ZVE32F-NEXT: .LBB37_13: # %else20
2839 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2840 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
2841 ; RV64ZVE32F-NEXT: ret
2842 ; RV64ZVE32F-NEXT: .LBB37_14: # %cond.load4
2843 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2844 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2845 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2846 ; RV64ZVE32F-NEXT: add a2, a0, a2
2847 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2848 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2849 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2850 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
2851 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
2852 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2853 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_6
2854 ; RV64ZVE32F-NEXT: .LBB37_15: # %cond.load7
2855 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2856 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2857 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2858 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2859 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2860 ; RV64ZVE32F-NEXT: add a2, a0, a2
2861 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2862 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2863 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2864 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
2865 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
2866 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2867 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_7
2868 ; RV64ZVE32F-NEXT: .LBB37_16: # %cond.load10
2869 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2870 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
2871 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2872 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2873 ; RV64ZVE32F-NEXT: add a2, a0, a2
2874 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2875 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2876 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2877 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
2878 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
2879 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2880 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_8
2881 ; RV64ZVE32F-NEXT: j .LBB37_9
2882 %eidxs = zext <8 x i8> %idxs to <8 x i32>
2883 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2884 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
2888 define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
2889 ; RV32-LABEL: mgather_baseidx_v8i16_v8i32:
2891 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
2892 ; RV32-NEXT: vsext.vf2 v12, v8
2893 ; RV32-NEXT: vsll.vi v8, v12, 2
2894 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
2895 ; RV32-NEXT: vmv.v.v v8, v10
2898 ; RV64V-LABEL: mgather_baseidx_v8i16_v8i32:
2900 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2901 ; RV64V-NEXT: vsext.vf4 v12, v8
2902 ; RV64V-NEXT: vsll.vi v12, v12, 2
2903 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
2904 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
2905 ; RV64V-NEXT: vmv.v.v v8, v10
2908 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8i32:
2909 ; RV64ZVE32F: # %bb.0:
2910 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2911 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2912 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2913 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_2
2914 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
2915 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2916 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2917 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2918 ; RV64ZVE32F-NEXT: add a2, a0, a2
2919 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2920 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
2921 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2922 ; RV64ZVE32F-NEXT: .LBB38_2: # %else
2923 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2924 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_4
2925 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
2926 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2927 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
2928 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
2929 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2930 ; RV64ZVE32F-NEXT: add a2, a0, a2
2931 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2932 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2933 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
2934 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
2935 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
2936 ; RV64ZVE32F-NEXT: .LBB38_4: # %else2
2937 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
2938 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
2939 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2940 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2941 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
2942 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_14
2943 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
2944 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2945 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_15
2946 ; RV64ZVE32F-NEXT: .LBB38_6: # %else8
2947 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2948 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_16
2949 ; RV64ZVE32F-NEXT: .LBB38_7: # %else11
2950 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2951 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_9
2952 ; RV64ZVE32F-NEXT: .LBB38_8: # %cond.load13
2953 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2954 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
2955 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2956 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2957 ; RV64ZVE32F-NEXT: add a2, a0, a2
2958 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2959 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2960 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2961 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
2962 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
2963 ; RV64ZVE32F-NEXT: .LBB38_9: # %else14
2964 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2965 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2966 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
2967 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_11
2968 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
2969 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2970 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2971 ; RV64ZVE32F-NEXT: add a2, a0, a2
2972 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2973 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2974 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2975 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
2976 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
2977 ; RV64ZVE32F-NEXT: .LBB38_11: # %else17
2978 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2979 ; RV64ZVE32F-NEXT: beqz a1, .LBB38_13
2980 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
2981 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2982 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2983 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
2984 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2985 ; RV64ZVE32F-NEXT: add a0, a0, a1
2986 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2987 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2988 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
2989 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2990 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
2991 ; RV64ZVE32F-NEXT: .LBB38_13: # %else20
2992 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2993 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
2994 ; RV64ZVE32F-NEXT: ret
2995 ; RV64ZVE32F-NEXT: .LBB38_14: # %cond.load4
2996 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2997 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2998 ; RV64ZVE32F-NEXT: add a2, a0, a2
2999 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3000 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3001 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3002 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
3003 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
3004 ; RV64ZVE32F-NEXT: andi a2, a1, 8
3005 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_6
3006 ; RV64ZVE32F-NEXT: .LBB38_15: # %cond.load7
3007 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3008 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3009 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3010 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3011 ; RV64ZVE32F-NEXT: add a2, a0, a2
3012 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3013 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3014 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
3015 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3016 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
3017 ; RV64ZVE32F-NEXT: andi a2, a1, 16
3018 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_7
3019 ; RV64ZVE32F-NEXT: .LBB38_16: # %cond.load10
3020 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
3021 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
3022 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3023 ; RV64ZVE32F-NEXT: add a2, a0, a2
3024 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3025 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3026 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3027 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
3028 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
3029 ; RV64ZVE32F-NEXT: andi a2, a1, 32
3030 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_8
3031 ; RV64ZVE32F-NEXT: j .LBB38_9
3032 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
3033 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
3037 define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
3038 ; RV32-LABEL: mgather_baseidx_sext_v8i16_v8i32:
3040 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
3041 ; RV32-NEXT: vsext.vf2 v12, v8
3042 ; RV32-NEXT: vsll.vi v8, v12, 2
3043 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
3044 ; RV32-NEXT: vmv.v.v v8, v10
3047 ; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8i32:
3049 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3050 ; RV64V-NEXT: vsext.vf4 v12, v8
3051 ; RV64V-NEXT: vsll.vi v12, v12, 2
3052 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
3053 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
3054 ; RV64V-NEXT: vmv.v.v v8, v10
3057 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i32:
3058 ; RV64ZVE32F: # %bb.0:
3059 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3060 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
3061 ; RV64ZVE32F-NEXT: andi a2, a1, 1
3062 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_2
3063 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
3064 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
3065 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3066 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3067 ; RV64ZVE32F-NEXT: add a2, a0, a2
3068 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3069 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
3070 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
3071 ; RV64ZVE32F-NEXT: .LBB39_2: # %else
3072 ; RV64ZVE32F-NEXT: andi a2, a1, 2
3073 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_4
3074 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
3075 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3076 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3077 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
3078 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3079 ; RV64ZVE32F-NEXT: add a2, a0, a2
3080 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3081 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3082 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
3083 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
3084 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
3085 ; RV64ZVE32F-NEXT: .LBB39_4: # %else2
3086 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
3087 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
3088 ; RV64ZVE32F-NEXT: andi a2, a1, 4
3089 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
3090 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
3091 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_14
3092 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
3093 ; RV64ZVE32F-NEXT: andi a2, a1, 8
3094 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_15
3095 ; RV64ZVE32F-NEXT: .LBB39_6: # %else8
3096 ; RV64ZVE32F-NEXT: andi a2, a1, 16
3097 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_16
3098 ; RV64ZVE32F-NEXT: .LBB39_7: # %else11
3099 ; RV64ZVE32F-NEXT: andi a2, a1, 32
3100 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_9
3101 ; RV64ZVE32F-NEXT: .LBB39_8: # %cond.load13
3102 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3103 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
3104 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3105 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3106 ; RV64ZVE32F-NEXT: add a2, a0, a2
3107 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3108 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3109 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3110 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
3111 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
3112 ; RV64ZVE32F-NEXT: .LBB39_9: # %else14
3113 ; RV64ZVE32F-NEXT: andi a2, a1, 64
3114 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
3115 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
3116 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_11
3117 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
3118 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3119 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3120 ; RV64ZVE32F-NEXT: add a2, a0, a2
3121 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3122 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3123 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3124 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
3125 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
3126 ; RV64ZVE32F-NEXT: .LBB39_11: # %else17
3127 ; RV64ZVE32F-NEXT: andi a1, a1, -128
3128 ; RV64ZVE32F-NEXT: beqz a1, .LBB39_13
3129 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
3130 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3131 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3132 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
3133 ; RV64ZVE32F-NEXT: slli a1, a1, 2
3134 ; RV64ZVE32F-NEXT: add a0, a0, a1
3135 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
3136 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3137 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
3138 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3139 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
3140 ; RV64ZVE32F-NEXT: .LBB39_13: # %else20
3141 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3142 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
3143 ; RV64ZVE32F-NEXT: ret
3144 ; RV64ZVE32F-NEXT: .LBB39_14: # %cond.load4
3145 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3146 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3147 ; RV64ZVE32F-NEXT: add a2, a0, a2
3148 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3149 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3150 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3151 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
3152 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
3153 ; RV64ZVE32F-NEXT: andi a2, a1, 8
3154 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_6
3155 ; RV64ZVE32F-NEXT: .LBB39_15: # %cond.load7
3156 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3157 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3158 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3159 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3160 ; RV64ZVE32F-NEXT: add a2, a0, a2
3161 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3162 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3163 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
3164 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3165 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
3166 ; RV64ZVE32F-NEXT: andi a2, a1, 16
3167 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_7
3168 ; RV64ZVE32F-NEXT: .LBB39_16: # %cond.load10
3169 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
3170 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
3171 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3172 ; RV64ZVE32F-NEXT: add a2, a0, a2
3173 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3174 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3175 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3176 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
3177 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
3178 ; RV64ZVE32F-NEXT: andi a2, a1, 32
3179 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_8
3180 ; RV64ZVE32F-NEXT: j .LBB39_9
3181 %eidxs = sext <8 x i16> %idxs to <8 x i32>
3182 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
3183 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
3187 define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
3188 ; RV32-LABEL: mgather_baseidx_zext_v8i16_v8i32:
3190 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
3191 ; RV32-NEXT: vzext.vf2 v12, v8
3192 ; RV32-NEXT: vsll.vi v8, v12, 2
3193 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
3194 ; RV32-NEXT: vmv.v.v v8, v10
3197 ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i32:
3199 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu
3200 ; RV64V-NEXT: vzext.vf2 v12, v8
3201 ; RV64V-NEXT: vsll.vi v8, v12, 2
3202 ; RV64V-NEXT: vluxei32.v v10, (a0), v8, v0.t
3203 ; RV64V-NEXT: vmv.v.v v8, v10
3206 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i32:
3207 ; RV64ZVE32F: # %bb.0:
3208 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3209 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
3210 ; RV64ZVE32F-NEXT: andi a2, a1, 1
3211 ; RV64ZVE32F-NEXT: beqz a2, .LBB40_2
3212 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
3213 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
3214 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3215 ; RV64ZVE32F-NEXT: slli a2, a2, 48
3216 ; RV64ZVE32F-NEXT: srli a2, a2, 46
3217 ; RV64ZVE32F-NEXT: add a2, a0, a2
3218 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3219 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
3220 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
3221 ; RV64ZVE32F-NEXT: .LBB40_2: # %else
3222 ; RV64ZVE32F-NEXT: andi a2, a1, 2
3223 ; RV64ZVE32F-NEXT: beqz a2, .LBB40_4
3224 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
3225 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3226 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3227 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
3228 ; RV64ZVE32F-NEXT: slli a2, a2, 48
3229 ; RV64ZVE32F-NEXT: srli a2, a2, 46
3230 ; RV64ZVE32F-NEXT: add a2, a0, a2
3231 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3232 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3233 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
3234 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
3235 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
3236 ; RV64ZVE32F-NEXT: .LBB40_4: # %else2
3237 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
3238 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
3239 ; RV64ZVE32F-NEXT: andi a2, a1, 4
3240 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
3241 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
3242 ; RV64ZVE32F-NEXT: bnez a2, .LBB40_14
3243 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
3244 ; RV64ZVE32F-NEXT: andi a2, a1, 8
3245 ; RV64ZVE32F-NEXT: bnez a2, .LBB40_15
3246 ; RV64ZVE32F-NEXT: .LBB40_6: # %else8
3247 ; RV64ZVE32F-NEXT: andi a2, a1, 16
3248 ; RV64ZVE32F-NEXT: bnez a2, .LBB40_16
3249 ; RV64ZVE32F-NEXT: .LBB40_7: # %else11
3250 ; RV64ZVE32F-NEXT: andi a2, a1, 32
3251 ; RV64ZVE32F-NEXT: beqz a2, .LBB40_9
3252 ; RV64ZVE32F-NEXT: .LBB40_8: # %cond.load13
3253 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3254 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
3255 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3256 ; RV64ZVE32F-NEXT: slli a2, a2, 48
3257 ; RV64ZVE32F-NEXT: srli a2, a2, 46
3258 ; RV64ZVE32F-NEXT: add a2, a0, a2
3259 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3260 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3261 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3262 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
3263 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
3264 ; RV64ZVE32F-NEXT: .LBB40_9: # %else14
3265 ; RV64ZVE32F-NEXT: andi a2, a1, 64
3266 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
3267 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
3268 ; RV64ZVE32F-NEXT: beqz a2, .LBB40_11
3269 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
3270 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3271 ; RV64ZVE32F-NEXT: slli a2, a2, 48
3272 ; RV64ZVE32F-NEXT: srli a2, a2, 46
3273 ; RV64ZVE32F-NEXT: add a2, a0, a2
3274 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3275 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3276 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3277 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
3278 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
3279 ; RV64ZVE32F-NEXT: .LBB40_11: # %else17
3280 ; RV64ZVE32F-NEXT: andi a1, a1, -128
3281 ; RV64ZVE32F-NEXT: beqz a1, .LBB40_13
3282 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
3283 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3284 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3285 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
3286 ; RV64ZVE32F-NEXT: slli a1, a1, 48
3287 ; RV64ZVE32F-NEXT: srli a1, a1, 46
3288 ; RV64ZVE32F-NEXT: add a0, a0, a1
3289 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
3290 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3291 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
3292 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3293 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
3294 ; RV64ZVE32F-NEXT: .LBB40_13: # %else20
3295 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3296 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
3297 ; RV64ZVE32F-NEXT: ret
3298 ; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load4
3299 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3300 ; RV64ZVE32F-NEXT: slli a2, a2, 48
3301 ; RV64ZVE32F-NEXT: srli a2, a2, 46
3302 ; RV64ZVE32F-NEXT: add a2, a0, a2
3303 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3304 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3305 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3306 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
3307 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
3308 ; RV64ZVE32F-NEXT: andi a2, a1, 8
3309 ; RV64ZVE32F-NEXT: beqz a2, .LBB40_6
3310 ; RV64ZVE32F-NEXT: .LBB40_15: # %cond.load7
3311 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3312 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3313 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3314 ; RV64ZVE32F-NEXT: slli a2, a2, 48
3315 ; RV64ZVE32F-NEXT: srli a2, a2, 46
3316 ; RV64ZVE32F-NEXT: add a2, a0, a2
3317 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3318 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3319 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
3320 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3321 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
3322 ; RV64ZVE32F-NEXT: andi a2, a1, 16
3323 ; RV64ZVE32F-NEXT: beqz a2, .LBB40_7
3324 ; RV64ZVE32F-NEXT: .LBB40_16: # %cond.load10
3325 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
3326 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
3327 ; RV64ZVE32F-NEXT: slli a2, a2, 48
3328 ; RV64ZVE32F-NEXT: srli a2, a2, 46
3329 ; RV64ZVE32F-NEXT: add a2, a0, a2
3330 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3331 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3332 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3333 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
3334 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
3335 ; RV64ZVE32F-NEXT: andi a2, a1, 32
3336 ; RV64ZVE32F-NEXT: bnez a2, .LBB40_8
3337 ; RV64ZVE32F-NEXT: j .LBB40_9
3338 %eidxs = zext <8 x i16> %idxs to <8 x i32>
3339 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
3340 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
3344 define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
3345 ; RV32-LABEL: mgather_baseidx_v8i32:
3347 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
3348 ; RV32-NEXT: vsll.vi v8, v8, 2
3349 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
3350 ; RV32-NEXT: vmv.v.v v8, v10
3353 ; RV64V-LABEL: mgather_baseidx_v8i32:
3355 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3356 ; RV64V-NEXT: vsext.vf2 v12, v8
3357 ; RV64V-NEXT: vsll.vi v12, v12, 2
3358 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
3359 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
3360 ; RV64V-NEXT: vmv.v.v v8, v10
3363 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i32:
3364 ; RV64ZVE32F: # %bb.0:
3365 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3366 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
3367 ; RV64ZVE32F-NEXT: andi a2, a1, 1
3368 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_2
3369 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
3370 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
3371 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3372 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3373 ; RV64ZVE32F-NEXT: add a2, a0, a2
3374 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3375 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
3376 ; RV64ZVE32F-NEXT: .LBB41_2: # %else
3377 ; RV64ZVE32F-NEXT: andi a2, a1, 2
3378 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_4
3379 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
3380 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
3381 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
3382 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
3383 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3384 ; RV64ZVE32F-NEXT: add a2, a0, a2
3385 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3386 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3387 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
3388 ; RV64ZVE32F-NEXT: .LBB41_4: # %else2
3389 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
3390 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
3391 ; RV64ZVE32F-NEXT: andi a2, a1, 4
3392 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
3393 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
3394 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_14
3395 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
3396 ; RV64ZVE32F-NEXT: andi a2, a1, 8
3397 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_15
3398 ; RV64ZVE32F-NEXT: .LBB41_6: # %else8
3399 ; RV64ZVE32F-NEXT: andi a2, a1, 16
3400 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_16
3401 ; RV64ZVE32F-NEXT: .LBB41_7: # %else11
3402 ; RV64ZVE32F-NEXT: andi a2, a1, 32
3403 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_9
3404 ; RV64ZVE32F-NEXT: .LBB41_8: # %cond.load13
3405 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3406 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1
3407 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3408 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3409 ; RV64ZVE32F-NEXT: add a2, a0, a2
3410 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3411 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
3412 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
3413 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
3414 ; RV64ZVE32F-NEXT: .LBB41_9: # %else14
3415 ; RV64ZVE32F-NEXT: andi a2, a1, 64
3416 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
3417 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2
3418 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_11
3419 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
3420 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3421 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3422 ; RV64ZVE32F-NEXT: add a2, a0, a2
3423 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3424 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3425 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
3426 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
3427 ; RV64ZVE32F-NEXT: .LBB41_11: # %else17
3428 ; RV64ZVE32F-NEXT: andi a1, a1, -128
3429 ; RV64ZVE32F-NEXT: beqz a1, .LBB41_13
3430 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
3431 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3432 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3433 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
3434 ; RV64ZVE32F-NEXT: slli a1, a1, 2
3435 ; RV64ZVE32F-NEXT: add a0, a0, a1
3436 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
3437 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
3438 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3439 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
3440 ; RV64ZVE32F-NEXT: .LBB41_13: # %else20
3441 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3442 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
3443 ; RV64ZVE32F-NEXT: ret
3444 ; RV64ZVE32F-NEXT: .LBB41_14: # %cond.load4
3445 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3446 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3447 ; RV64ZVE32F-NEXT: add a2, a0, a2
3448 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3449 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
3450 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
3451 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2
3452 ; RV64ZVE32F-NEXT: andi a2, a1, 8
3453 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_6
3454 ; RV64ZVE32F-NEXT: .LBB41_15: # %cond.load7
3455 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3456 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3457 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3458 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3459 ; RV64ZVE32F-NEXT: add a2, a0, a2
3460 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3461 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
3462 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
3463 ; RV64ZVE32F-NEXT: andi a2, a1, 16
3464 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_7
3465 ; RV64ZVE32F-NEXT: .LBB41_16: # %cond.load10
3466 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
3467 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
3468 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3469 ; RV64ZVE32F-NEXT: add a2, a0, a2
3470 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3471 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
3472 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
3473 ; RV64ZVE32F-NEXT: andi a2, a1, 32
3474 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_8
3475 ; RV64ZVE32F-NEXT: j .LBB41_9
3476 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
3477 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
3481 declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>)
3483 define <1 x i64> @mgather_v1i64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i64> %passthru) {
3484 ; RV32V-LABEL: mgather_v1i64:
3486 ; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
3487 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
3488 ; RV32V-NEXT: vmv.v.v v8, v9
3491 ; RV64V-LABEL: mgather_v1i64:
3493 ; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
3494 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
3495 ; RV64V-NEXT: vmv.v.v v8, v9
3498 ; RV32ZVE32F-LABEL: mgather_v1i64:
3499 ; RV32ZVE32F: # %bb.0:
3500 ; RV32ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
3501 ; RV32ZVE32F-NEXT: vfirst.m a2, v0
3502 ; RV32ZVE32F-NEXT: bnez a2, .LBB42_2
3503 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
3504 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3505 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
3506 ; RV32ZVE32F-NEXT: lw a0, 0(a1)
3507 ; RV32ZVE32F-NEXT: lw a1, 4(a1)
3508 ; RV32ZVE32F-NEXT: .LBB42_2: # %else
3509 ; RV32ZVE32F-NEXT: ret
3511 ; RV64ZVE32F-LABEL: mgather_v1i64:
3512 ; RV64ZVE32F: # %bb.0:
3513 ; RV64ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
3514 ; RV64ZVE32F-NEXT: vfirst.m a2, v0
3515 ; RV64ZVE32F-NEXT: bnez a2, .LBB42_2
3516 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
3517 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
3518 ; RV64ZVE32F-NEXT: .LBB42_2: # %else
3519 ; RV64ZVE32F-NEXT: mv a0, a1
3520 ; RV64ZVE32F-NEXT: ret
3521 %v = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> %ptrs, i32 8, <1 x i1> %m, <1 x i64> %passthru)
3525 declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
3527 define <2 x i64> @mgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> %passthru) {
3528 ; RV32V-LABEL: mgather_v2i64:
3530 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
3531 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
3532 ; RV32V-NEXT: vmv.v.v v8, v9
3535 ; RV64V-LABEL: mgather_v2i64:
3537 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
3538 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
3539 ; RV64V-NEXT: vmv.v.v v8, v9
3542 ; RV32ZVE32F-LABEL: mgather_v2i64:
3543 ; RV32ZVE32F: # %bb.0:
3544 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3545 ; RV32ZVE32F-NEXT: vmv.x.s a4, v0
3546 ; RV32ZVE32F-NEXT: andi a2, a4, 1
3547 ; RV32ZVE32F-NEXT: beqz a2, .LBB43_3
3548 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
3549 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
3550 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
3551 ; RV32ZVE32F-NEXT: lw a2, 0(a3)
3552 ; RV32ZVE32F-NEXT: lw a3, 4(a3)
3553 ; RV32ZVE32F-NEXT: andi a4, a4, 2
3554 ; RV32ZVE32F-NEXT: bnez a4, .LBB43_4
3555 ; RV32ZVE32F-NEXT: .LBB43_2:
3556 ; RV32ZVE32F-NEXT: lw a4, 8(a1)
3557 ; RV32ZVE32F-NEXT: lw a1, 12(a1)
3558 ; RV32ZVE32F-NEXT: j .LBB43_5
3559 ; RV32ZVE32F-NEXT: .LBB43_3:
3560 ; RV32ZVE32F-NEXT: lw a2, 0(a1)
3561 ; RV32ZVE32F-NEXT: lw a3, 4(a1)
3562 ; RV32ZVE32F-NEXT: andi a4, a4, 2
3563 ; RV32ZVE32F-NEXT: beqz a4, .LBB43_2
3564 ; RV32ZVE32F-NEXT: .LBB43_4: # %cond.load1
3565 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3566 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3567 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
3568 ; RV32ZVE32F-NEXT: lw a4, 0(a1)
3569 ; RV32ZVE32F-NEXT: lw a1, 4(a1)
3570 ; RV32ZVE32F-NEXT: .LBB43_5: # %else2
3571 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3572 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3573 ; RV32ZVE32F-NEXT: sw a4, 8(a0)
3574 ; RV32ZVE32F-NEXT: sw a1, 12(a0)
3575 ; RV32ZVE32F-NEXT: ret
3577 ; RV64ZVE32F-LABEL: mgather_v2i64:
3578 ; RV64ZVE32F: # %bb.0:
3579 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3580 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
3581 ; RV64ZVE32F-NEXT: andi a5, a4, 1
3582 ; RV64ZVE32F-NEXT: beqz a5, .LBB43_2
3583 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
3584 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
3585 ; RV64ZVE32F-NEXT: .LBB43_2: # %else
3586 ; RV64ZVE32F-NEXT: andi a4, a4, 2
3587 ; RV64ZVE32F-NEXT: beqz a4, .LBB43_4
3588 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
3589 ; RV64ZVE32F-NEXT: ld a3, 0(a1)
3590 ; RV64ZVE32F-NEXT: .LBB43_4: # %else2
3591 ; RV64ZVE32F-NEXT: mv a0, a2
3592 ; RV64ZVE32F-NEXT: mv a1, a3
3593 ; RV64ZVE32F-NEXT: ret
3594 %v = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> %ptrs, i32 8, <2 x i1> %m, <2 x i64> %passthru)
3598 declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>)
3600 define <4 x i64> @mgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i64> %passthru) {
3601 ; RV32V-LABEL: mgather_v4i64:
3603 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
3604 ; RV32V-NEXT: vluxei32.v v10, (zero), v8, v0.t
3605 ; RV32V-NEXT: vmv.v.v v8, v10
3608 ; RV64V-LABEL: mgather_v4i64:
3610 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
3611 ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
3612 ; RV64V-NEXT: vmv.v.v v8, v10
3615 ; RV32ZVE32F-LABEL: mgather_v4i64:
3616 ; RV32ZVE32F: # %bb.0:
3617 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3618 ; RV32ZVE32F-NEXT: vmv.x.s a6, v0
3619 ; RV32ZVE32F-NEXT: andi a2, a6, 1
3620 ; RV32ZVE32F-NEXT: beqz a2, .LBB44_5
3621 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
3622 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
3623 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
3624 ; RV32ZVE32F-NEXT: lw a2, 0(a3)
3625 ; RV32ZVE32F-NEXT: lw a3, 4(a3)
3626 ; RV32ZVE32F-NEXT: andi a4, a6, 2
3627 ; RV32ZVE32F-NEXT: bnez a4, .LBB44_6
3628 ; RV32ZVE32F-NEXT: .LBB44_2:
3629 ; RV32ZVE32F-NEXT: lw a4, 8(a1)
3630 ; RV32ZVE32F-NEXT: lw a5, 12(a1)
3631 ; RV32ZVE32F-NEXT: andi a7, a6, 4
3632 ; RV32ZVE32F-NEXT: bnez a7, .LBB44_7
3633 ; RV32ZVE32F-NEXT: .LBB44_3:
3634 ; RV32ZVE32F-NEXT: lw a7, 16(a1)
3635 ; RV32ZVE32F-NEXT: lw t0, 20(a1)
3636 ; RV32ZVE32F-NEXT: andi a6, a6, 8
3637 ; RV32ZVE32F-NEXT: bnez a6, .LBB44_8
3638 ; RV32ZVE32F-NEXT: .LBB44_4:
3639 ; RV32ZVE32F-NEXT: lw a6, 24(a1)
3640 ; RV32ZVE32F-NEXT: lw a1, 28(a1)
3641 ; RV32ZVE32F-NEXT: j .LBB44_9
3642 ; RV32ZVE32F-NEXT: .LBB44_5:
3643 ; RV32ZVE32F-NEXT: lw a2, 0(a1)
3644 ; RV32ZVE32F-NEXT: lw a3, 4(a1)
3645 ; RV32ZVE32F-NEXT: andi a4, a6, 2
3646 ; RV32ZVE32F-NEXT: beqz a4, .LBB44_2
3647 ; RV32ZVE32F-NEXT: .LBB44_6: # %cond.load1
3648 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3649 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3650 ; RV32ZVE32F-NEXT: vmv.x.s a5, v9
3651 ; RV32ZVE32F-NEXT: lw a4, 0(a5)
3652 ; RV32ZVE32F-NEXT: lw a5, 4(a5)
3653 ; RV32ZVE32F-NEXT: andi a7, a6, 4
3654 ; RV32ZVE32F-NEXT: beqz a7, .LBB44_3
3655 ; RV32ZVE32F-NEXT: .LBB44_7: # %cond.load4
3656 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3657 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
3658 ; RV32ZVE32F-NEXT: vmv.x.s t0, v9
3659 ; RV32ZVE32F-NEXT: lw a7, 0(t0)
3660 ; RV32ZVE32F-NEXT: lw t0, 4(t0)
3661 ; RV32ZVE32F-NEXT: andi a6, a6, 8
3662 ; RV32ZVE32F-NEXT: beqz a6, .LBB44_4
3663 ; RV32ZVE32F-NEXT: .LBB44_8: # %cond.load7
3664 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3665 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
3666 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
3667 ; RV32ZVE32F-NEXT: lw a6, 0(a1)
3668 ; RV32ZVE32F-NEXT: lw a1, 4(a1)
3669 ; RV32ZVE32F-NEXT: .LBB44_9: # %else8
3670 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3671 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3672 ; RV32ZVE32F-NEXT: sw a4, 8(a0)
3673 ; RV32ZVE32F-NEXT: sw a5, 12(a0)
3674 ; RV32ZVE32F-NEXT: sw a7, 16(a0)
3675 ; RV32ZVE32F-NEXT: sw t0, 20(a0)
3676 ; RV32ZVE32F-NEXT: sw a6, 24(a0)
3677 ; RV32ZVE32F-NEXT: sw a1, 28(a0)
3678 ; RV32ZVE32F-NEXT: ret
3680 ; RV64ZVE32F-LABEL: mgather_v4i64:
3681 ; RV64ZVE32F: # %bb.0:
3682 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3683 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
3684 ; RV64ZVE32F-NEXT: andi a3, a5, 1
3685 ; RV64ZVE32F-NEXT: beqz a3, .LBB44_5
3686 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
3687 ; RV64ZVE32F-NEXT: ld a3, 0(a1)
3688 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
3689 ; RV64ZVE32F-NEXT: andi a4, a5, 2
3690 ; RV64ZVE32F-NEXT: bnez a4, .LBB44_6
3691 ; RV64ZVE32F-NEXT: .LBB44_2:
3692 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
3693 ; RV64ZVE32F-NEXT: andi a6, a5, 4
3694 ; RV64ZVE32F-NEXT: bnez a6, .LBB44_7
3695 ; RV64ZVE32F-NEXT: .LBB44_3:
3696 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
3697 ; RV64ZVE32F-NEXT: andi a5, a5, 8
3698 ; RV64ZVE32F-NEXT: bnez a5, .LBB44_8
3699 ; RV64ZVE32F-NEXT: .LBB44_4:
3700 ; RV64ZVE32F-NEXT: ld a1, 24(a2)
3701 ; RV64ZVE32F-NEXT: j .LBB44_9
3702 ; RV64ZVE32F-NEXT: .LBB44_5:
3703 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
3704 ; RV64ZVE32F-NEXT: andi a4, a5, 2
3705 ; RV64ZVE32F-NEXT: beqz a4, .LBB44_2
3706 ; RV64ZVE32F-NEXT: .LBB44_6: # %cond.load1
3707 ; RV64ZVE32F-NEXT: ld a4, 8(a1)
3708 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
3709 ; RV64ZVE32F-NEXT: andi a6, a5, 4
3710 ; RV64ZVE32F-NEXT: beqz a6, .LBB44_3
3711 ; RV64ZVE32F-NEXT: .LBB44_7: # %cond.load4
3712 ; RV64ZVE32F-NEXT: ld a6, 16(a1)
3713 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
3714 ; RV64ZVE32F-NEXT: andi a5, a5, 8
3715 ; RV64ZVE32F-NEXT: beqz a5, .LBB44_4
3716 ; RV64ZVE32F-NEXT: .LBB44_8: # %cond.load7
3717 ; RV64ZVE32F-NEXT: ld a1, 24(a1)
3718 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
3719 ; RV64ZVE32F-NEXT: .LBB44_9: # %else8
3720 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
3721 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
3722 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
3723 ; RV64ZVE32F-NEXT: sd a1, 24(a0)
3724 ; RV64ZVE32F-NEXT: ret
3725 %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x i64> %passthru)
3729 define <4 x i64> @mgather_truemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) {
3730 ; RV32V-LABEL: mgather_truemask_v4i64:
3732 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3733 ; RV32V-NEXT: vluxei32.v v10, (zero), v8
3734 ; RV32V-NEXT: vmv.v.v v8, v10
3737 ; RV64V-LABEL: mgather_truemask_v4i64:
3739 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3740 ; RV64V-NEXT: vluxei64.v v8, (zero), v8
3743 ; RV32ZVE32F-LABEL: mgather_truemask_v4i64:
3744 ; RV32ZVE32F: # %bb.0:
3745 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3746 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
3747 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3748 ; RV32ZVE32F-NEXT: vmv.x.s a2, v9
3749 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
3750 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
3751 ; RV32ZVE32F-NEXT: lw a3, 0(a1)
3752 ; RV32ZVE32F-NEXT: lw a1, 4(a1)
3753 ; RV32ZVE32F-NEXT: vmv.x.s a4, v9
3754 ; RV32ZVE32F-NEXT: vmv.x.s a5, v8
3755 ; RV32ZVE32F-NEXT: lw a6, 0(a2)
3756 ; RV32ZVE32F-NEXT: lw a2, 4(a2)
3757 ; RV32ZVE32F-NEXT: lw a7, 0(a4)
3758 ; RV32ZVE32F-NEXT: lw a4, 4(a4)
3759 ; RV32ZVE32F-NEXT: lw t0, 0(a5)
3760 ; RV32ZVE32F-NEXT: lw a5, 4(a5)
3761 ; RV32ZVE32F-NEXT: sw a7, 16(a0)
3762 ; RV32ZVE32F-NEXT: sw a4, 20(a0)
3763 ; RV32ZVE32F-NEXT: sw t0, 24(a0)
3764 ; RV32ZVE32F-NEXT: sw a5, 28(a0)
3765 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
3766 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
3767 ; RV32ZVE32F-NEXT: sw a6, 8(a0)
3768 ; RV32ZVE32F-NEXT: sw a2, 12(a0)
3769 ; RV32ZVE32F-NEXT: ret
3771 ; RV64ZVE32F-LABEL: mgather_truemask_v4i64:
3772 ; RV64ZVE32F: # %bb.0:
3773 ; RV64ZVE32F-NEXT: ld a2, 0(a1)
3774 ; RV64ZVE32F-NEXT: ld a3, 8(a1)
3775 ; RV64ZVE32F-NEXT: ld a4, 16(a1)
3776 ; RV64ZVE32F-NEXT: ld a1, 24(a1)
3777 ; RV64ZVE32F-NEXT: ld a2, 0(a2)
3778 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
3779 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
3780 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
3781 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
3782 ; RV64ZVE32F-NEXT: sd a3, 8(a0)
3783 ; RV64ZVE32F-NEXT: sd a4, 16(a0)
3784 ; RV64ZVE32F-NEXT: sd a1, 24(a0)
3785 ; RV64ZVE32F-NEXT: ret
3786 %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x i64> %passthru)
3790 define <4 x i64> @mgather_falsemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) {
3791 ; RV32V-LABEL: mgather_falsemask_v4i64:
3793 ; RV32V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3794 ; RV32V-NEXT: vmv2r.v v8, v10
3797 ; RV64V-LABEL: mgather_falsemask_v4i64:
3799 ; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3800 ; RV64V-NEXT: vmv2r.v v8, v10
3803 ; RV32ZVE32F-LABEL: mgather_falsemask_v4i64:
3804 ; RV32ZVE32F: # %bb.0:
3805 ; RV32ZVE32F-NEXT: lw a2, 0(a1)
3806 ; RV32ZVE32F-NEXT: lw a3, 4(a1)
3807 ; RV32ZVE32F-NEXT: lw a4, 8(a1)
3808 ; RV32ZVE32F-NEXT: lw a5, 12(a1)
3809 ; RV32ZVE32F-NEXT: lw a6, 16(a1)
3810 ; RV32ZVE32F-NEXT: lw a7, 20(a1)
3811 ; RV32ZVE32F-NEXT: lw t0, 24(a1)
3812 ; RV32ZVE32F-NEXT: lw a1, 28(a1)
3813 ; RV32ZVE32F-NEXT: sw a6, 16(a0)
3814 ; RV32ZVE32F-NEXT: sw a7, 20(a0)
3815 ; RV32ZVE32F-NEXT: sw t0, 24(a0)
3816 ; RV32ZVE32F-NEXT: sw a1, 28(a0)
3817 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3818 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3819 ; RV32ZVE32F-NEXT: sw a4, 8(a0)
3820 ; RV32ZVE32F-NEXT: sw a5, 12(a0)
3821 ; RV32ZVE32F-NEXT: ret
3823 ; RV64ZVE32F-LABEL: mgather_falsemask_v4i64:
3824 ; RV64ZVE32F: # %bb.0:
3825 ; RV64ZVE32F-NEXT: ld a1, 0(a2)
3826 ; RV64ZVE32F-NEXT: ld a3, 8(a2)
3827 ; RV64ZVE32F-NEXT: ld a4, 16(a2)
3828 ; RV64ZVE32F-NEXT: ld a2, 24(a2)
3829 ; RV64ZVE32F-NEXT: sd a1, 0(a0)
3830 ; RV64ZVE32F-NEXT: sd a3, 8(a0)
3831 ; RV64ZVE32F-NEXT: sd a4, 16(a0)
3832 ; RV64ZVE32F-NEXT: sd a2, 24(a0)
3833 ; RV64ZVE32F-NEXT: ret
3834 %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x i64> %passthru)
3838 declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>)
3840 define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthru) {
3841 ; RV32V-LABEL: mgather_v8i64:
3843 ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
3844 ; RV32V-NEXT: vluxei32.v v12, (zero), v8, v0.t
3845 ; RV32V-NEXT: vmv.v.v v8, v12
3848 ; RV64V-LABEL: mgather_v8i64:
3850 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
3851 ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
3852 ; RV64V-NEXT: vmv.v.v v8, v12
3855 ; RV32ZVE32F-LABEL: mgather_v8i64:
3856 ; RV32ZVE32F: # %bb.0:
3857 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3858 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
3859 ; RV32ZVE32F-NEXT: andi a2, t0, 1
3860 ; RV32ZVE32F-NEXT: beqz a2, .LBB47_7
3861 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
3862 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
3863 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
3864 ; RV32ZVE32F-NEXT: lw a2, 0(a3)
3865 ; RV32ZVE32F-NEXT: lw a3, 4(a3)
3866 ; RV32ZVE32F-NEXT: andi a4, t0, 2
3867 ; RV32ZVE32F-NEXT: bnez a4, .LBB47_8
3868 ; RV32ZVE32F-NEXT: .LBB47_2:
3869 ; RV32ZVE32F-NEXT: lw a4, 8(a1)
3870 ; RV32ZVE32F-NEXT: lw a5, 12(a1)
3871 ; RV32ZVE32F-NEXT: andi a6, t0, 4
3872 ; RV32ZVE32F-NEXT: bnez a6, .LBB47_9
3873 ; RV32ZVE32F-NEXT: .LBB47_3:
3874 ; RV32ZVE32F-NEXT: lw a6, 16(a1)
3875 ; RV32ZVE32F-NEXT: lw a7, 20(a1)
3876 ; RV32ZVE32F-NEXT: andi t1, t0, 8
3877 ; RV32ZVE32F-NEXT: bnez t1, .LBB47_10
3878 ; RV32ZVE32F-NEXT: .LBB47_4:
3879 ; RV32ZVE32F-NEXT: lw t1, 24(a1)
3880 ; RV32ZVE32F-NEXT: lw t2, 28(a1)
3881 ; RV32ZVE32F-NEXT: andi t3, t0, 16
3882 ; RV32ZVE32F-NEXT: bnez t3, .LBB47_11
3883 ; RV32ZVE32F-NEXT: .LBB47_5:
3884 ; RV32ZVE32F-NEXT: lw t3, 32(a1)
3885 ; RV32ZVE32F-NEXT: lw t4, 36(a1)
3886 ; RV32ZVE32F-NEXT: andi t5, t0, 32
3887 ; RV32ZVE32F-NEXT: bnez t5, .LBB47_12
3888 ; RV32ZVE32F-NEXT: .LBB47_6:
3889 ; RV32ZVE32F-NEXT: lw t5, 40(a1)
3890 ; RV32ZVE32F-NEXT: lw t6, 44(a1)
3891 ; RV32ZVE32F-NEXT: j .LBB47_13
3892 ; RV32ZVE32F-NEXT: .LBB47_7:
3893 ; RV32ZVE32F-NEXT: lw a2, 0(a1)
3894 ; RV32ZVE32F-NEXT: lw a3, 4(a1)
3895 ; RV32ZVE32F-NEXT: andi a4, t0, 2
3896 ; RV32ZVE32F-NEXT: beqz a4, .LBB47_2
3897 ; RV32ZVE32F-NEXT: .LBB47_8: # %cond.load1
3898 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3899 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3900 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
3901 ; RV32ZVE32F-NEXT: lw a4, 0(a5)
3902 ; RV32ZVE32F-NEXT: lw a5, 4(a5)
3903 ; RV32ZVE32F-NEXT: andi a6, t0, 4
3904 ; RV32ZVE32F-NEXT: beqz a6, .LBB47_3
3905 ; RV32ZVE32F-NEXT: .LBB47_9: # %cond.load4
3906 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3907 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3908 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
3909 ; RV32ZVE32F-NEXT: lw a6, 0(a7)
3910 ; RV32ZVE32F-NEXT: lw a7, 4(a7)
3911 ; RV32ZVE32F-NEXT: andi t1, t0, 8
3912 ; RV32ZVE32F-NEXT: beqz t1, .LBB47_4
3913 ; RV32ZVE32F-NEXT: .LBB47_10: # %cond.load7
3914 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3915 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3916 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
3917 ; RV32ZVE32F-NEXT: lw t1, 0(t2)
3918 ; RV32ZVE32F-NEXT: lw t2, 4(t2)
3919 ; RV32ZVE32F-NEXT: andi t3, t0, 16
3920 ; RV32ZVE32F-NEXT: beqz t3, .LBB47_5
3921 ; RV32ZVE32F-NEXT: .LBB47_11: # %cond.load10
3922 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3923 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
3924 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
3925 ; RV32ZVE32F-NEXT: lw t3, 0(t4)
3926 ; RV32ZVE32F-NEXT: lw t4, 4(t4)
3927 ; RV32ZVE32F-NEXT: andi t5, t0, 32
3928 ; RV32ZVE32F-NEXT: beqz t5, .LBB47_6
3929 ; RV32ZVE32F-NEXT: .LBB47_12: # %cond.load13
3930 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3931 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
3932 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
3933 ; RV32ZVE32F-NEXT: lw t5, 0(t6)
3934 ; RV32ZVE32F-NEXT: lw t6, 4(t6)
3935 ; RV32ZVE32F-NEXT: .LBB47_13: # %else14
3936 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3937 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3938 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3939 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3940 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3941 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3942 ; RV32ZVE32F-NEXT: andi s0, t0, 64
3943 ; RV32ZVE32F-NEXT: beqz s0, .LBB47_16
3944 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
3945 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3946 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
3947 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
3948 ; RV32ZVE32F-NEXT: lw s0, 0(s1)
3949 ; RV32ZVE32F-NEXT: lw s1, 4(s1)
3950 ; RV32ZVE32F-NEXT: andi t0, t0, -128
3951 ; RV32ZVE32F-NEXT: bnez t0, .LBB47_17
3952 ; RV32ZVE32F-NEXT: .LBB47_15:
3953 ; RV32ZVE32F-NEXT: lw t0, 56(a1)
3954 ; RV32ZVE32F-NEXT: lw a1, 60(a1)
3955 ; RV32ZVE32F-NEXT: j .LBB47_18
3956 ; RV32ZVE32F-NEXT: .LBB47_16:
3957 ; RV32ZVE32F-NEXT: lw s0, 48(a1)
3958 ; RV32ZVE32F-NEXT: lw s1, 52(a1)
3959 ; RV32ZVE32F-NEXT: andi t0, t0, -128
3960 ; RV32ZVE32F-NEXT: beqz t0, .LBB47_15
3961 ; RV32ZVE32F-NEXT: .LBB47_17: # %cond.load19
3962 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3963 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3964 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
3965 ; RV32ZVE32F-NEXT: lw t0, 0(a1)
3966 ; RV32ZVE32F-NEXT: lw a1, 4(a1)
3967 ; RV32ZVE32F-NEXT: .LBB47_18: # %else20
3968 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3969 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3970 ; RV32ZVE32F-NEXT: sw a4, 8(a0)
3971 ; RV32ZVE32F-NEXT: sw a5, 12(a0)
3972 ; RV32ZVE32F-NEXT: sw a6, 16(a0)
3973 ; RV32ZVE32F-NEXT: sw a7, 20(a0)
3974 ; RV32ZVE32F-NEXT: sw t1, 24(a0)
3975 ; RV32ZVE32F-NEXT: sw t2, 28(a0)
3976 ; RV32ZVE32F-NEXT: sw t3, 32(a0)
3977 ; RV32ZVE32F-NEXT: sw t4, 36(a0)
3978 ; RV32ZVE32F-NEXT: sw t5, 40(a0)
3979 ; RV32ZVE32F-NEXT: sw t6, 44(a0)
3980 ; RV32ZVE32F-NEXT: sw s0, 48(a0)
3981 ; RV32ZVE32F-NEXT: sw s1, 52(a0)
3982 ; RV32ZVE32F-NEXT: sw t0, 56(a0)
3983 ; RV32ZVE32F-NEXT: sw a1, 60(a0)
3984 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3985 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3986 ; RV32ZVE32F-NEXT: .cfi_restore s0
3987 ; RV32ZVE32F-NEXT: .cfi_restore s1
3988 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3989 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
3990 ; RV32ZVE32F-NEXT: ret
3992 ; RV64ZVE32F-LABEL: mgather_v8i64:
3993 ; RV64ZVE32F: # %bb.0:
3994 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3995 ; RV64ZVE32F-NEXT: vmv.x.s a6, v0
3996 ; RV64ZVE32F-NEXT: andi a3, a6, 1
3997 ; RV64ZVE32F-NEXT: beqz a3, .LBB47_9
3998 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
3999 ; RV64ZVE32F-NEXT: ld a3, 0(a1)
4000 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
4001 ; RV64ZVE32F-NEXT: andi a4, a6, 2
4002 ; RV64ZVE32F-NEXT: bnez a4, .LBB47_10
4003 ; RV64ZVE32F-NEXT: .LBB47_2:
4004 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
4005 ; RV64ZVE32F-NEXT: andi a5, a6, 4
4006 ; RV64ZVE32F-NEXT: bnez a5, .LBB47_11
4007 ; RV64ZVE32F-NEXT: .LBB47_3:
4008 ; RV64ZVE32F-NEXT: ld a5, 16(a2)
4009 ; RV64ZVE32F-NEXT: andi a7, a6, 8
4010 ; RV64ZVE32F-NEXT: bnez a7, .LBB47_12
4011 ; RV64ZVE32F-NEXT: .LBB47_4:
4012 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
4013 ; RV64ZVE32F-NEXT: andi t0, a6, 16
4014 ; RV64ZVE32F-NEXT: bnez t0, .LBB47_13
4015 ; RV64ZVE32F-NEXT: .LBB47_5:
4016 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
4017 ; RV64ZVE32F-NEXT: andi t1, a6, 32
4018 ; RV64ZVE32F-NEXT: bnez t1, .LBB47_14
4019 ; RV64ZVE32F-NEXT: .LBB47_6:
4020 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
4021 ; RV64ZVE32F-NEXT: andi t2, a6, 64
4022 ; RV64ZVE32F-NEXT: bnez t2, .LBB47_15
4023 ; RV64ZVE32F-NEXT: .LBB47_7:
4024 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
4025 ; RV64ZVE32F-NEXT: andi a6, a6, -128
4026 ; RV64ZVE32F-NEXT: bnez a6, .LBB47_16
4027 ; RV64ZVE32F-NEXT: .LBB47_8:
4028 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
4029 ; RV64ZVE32F-NEXT: j .LBB47_17
4030 ; RV64ZVE32F-NEXT: .LBB47_9:
4031 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
4032 ; RV64ZVE32F-NEXT: andi a4, a6, 2
4033 ; RV64ZVE32F-NEXT: beqz a4, .LBB47_2
4034 ; RV64ZVE32F-NEXT: .LBB47_10: # %cond.load1
4035 ; RV64ZVE32F-NEXT: ld a4, 8(a1)
4036 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
4037 ; RV64ZVE32F-NEXT: andi a5, a6, 4
4038 ; RV64ZVE32F-NEXT: beqz a5, .LBB47_3
4039 ; RV64ZVE32F-NEXT: .LBB47_11: # %cond.load4
4040 ; RV64ZVE32F-NEXT: ld a5, 16(a1)
4041 ; RV64ZVE32F-NEXT: ld a5, 0(a5)
4042 ; RV64ZVE32F-NEXT: andi a7, a6, 8
4043 ; RV64ZVE32F-NEXT: beqz a7, .LBB47_4
4044 ; RV64ZVE32F-NEXT: .LBB47_12: # %cond.load7
4045 ; RV64ZVE32F-NEXT: ld a7, 24(a1)
4046 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
4047 ; RV64ZVE32F-NEXT: andi t0, a6, 16
4048 ; RV64ZVE32F-NEXT: beqz t0, .LBB47_5
4049 ; RV64ZVE32F-NEXT: .LBB47_13: # %cond.load10
4050 ; RV64ZVE32F-NEXT: ld t0, 32(a1)
4051 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
4052 ; RV64ZVE32F-NEXT: andi t1, a6, 32
4053 ; RV64ZVE32F-NEXT: beqz t1, .LBB47_6
4054 ; RV64ZVE32F-NEXT: .LBB47_14: # %cond.load13
4055 ; RV64ZVE32F-NEXT: ld t1, 40(a1)
4056 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
4057 ; RV64ZVE32F-NEXT: andi t2, a6, 64
4058 ; RV64ZVE32F-NEXT: beqz t2, .LBB47_7
4059 ; RV64ZVE32F-NEXT: .LBB47_15: # %cond.load16
4060 ; RV64ZVE32F-NEXT: ld t2, 48(a1)
4061 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
4062 ; RV64ZVE32F-NEXT: andi a6, a6, -128
4063 ; RV64ZVE32F-NEXT: beqz a6, .LBB47_8
4064 ; RV64ZVE32F-NEXT: .LBB47_16: # %cond.load19
4065 ; RV64ZVE32F-NEXT: ld a1, 56(a1)
4066 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
4067 ; RV64ZVE32F-NEXT: .LBB47_17: # %else20
4068 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4069 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
4070 ; RV64ZVE32F-NEXT: sd a5, 16(a0)
4071 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
4072 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
4073 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
4074 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
4075 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
4076 ; RV64ZVE32F-NEXT: ret
4077 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
4081 define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
4082 ; RV32V-LABEL: mgather_baseidx_v8i8_v8i64:
4084 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4085 ; RV32V-NEXT: vsext.vf4 v10, v8
4086 ; RV32V-NEXT: vsll.vi v8, v10, 3
4087 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
4088 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
4089 ; RV32V-NEXT: vmv.v.v v8, v12
4092 ; RV64V-LABEL: mgather_baseidx_v8i8_v8i64:
4094 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
4095 ; RV64V-NEXT: vsext.vf8 v16, v8
4096 ; RV64V-NEXT: vsll.vi v8, v16, 3
4097 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
4098 ; RV64V-NEXT: vmv.v.v v8, v12
4101 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i8_v8i64:
4102 ; RV32ZVE32F: # %bb.0:
4103 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4104 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
4105 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4106 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4107 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4108 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4109 ; RV32ZVE32F-NEXT: andi a3, t0, 1
4110 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4111 ; RV32ZVE32F-NEXT: beqz a3, .LBB48_7
4112 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
4113 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
4114 ; RV32ZVE32F-NEXT: lw a1, 0(a3)
4115 ; RV32ZVE32F-NEXT: lw a3, 4(a3)
4116 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4117 ; RV32ZVE32F-NEXT: bnez a4, .LBB48_8
4118 ; RV32ZVE32F-NEXT: .LBB48_2:
4119 ; RV32ZVE32F-NEXT: lw a4, 8(a2)
4120 ; RV32ZVE32F-NEXT: lw a5, 12(a2)
4121 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4122 ; RV32ZVE32F-NEXT: bnez a6, .LBB48_9
4123 ; RV32ZVE32F-NEXT: .LBB48_3:
4124 ; RV32ZVE32F-NEXT: lw a6, 16(a2)
4125 ; RV32ZVE32F-NEXT: lw a7, 20(a2)
4126 ; RV32ZVE32F-NEXT: andi t1, t0, 8
4127 ; RV32ZVE32F-NEXT: bnez t1, .LBB48_10
4128 ; RV32ZVE32F-NEXT: .LBB48_4:
4129 ; RV32ZVE32F-NEXT: lw t1, 24(a2)
4130 ; RV32ZVE32F-NEXT: lw t2, 28(a2)
4131 ; RV32ZVE32F-NEXT: andi t3, t0, 16
4132 ; RV32ZVE32F-NEXT: bnez t3, .LBB48_11
4133 ; RV32ZVE32F-NEXT: .LBB48_5:
4134 ; RV32ZVE32F-NEXT: lw t3, 32(a2)
4135 ; RV32ZVE32F-NEXT: lw t4, 36(a2)
4136 ; RV32ZVE32F-NEXT: andi t5, t0, 32
4137 ; RV32ZVE32F-NEXT: bnez t5, .LBB48_12
4138 ; RV32ZVE32F-NEXT: .LBB48_6:
4139 ; RV32ZVE32F-NEXT: lw t5, 40(a2)
4140 ; RV32ZVE32F-NEXT: lw t6, 44(a2)
4141 ; RV32ZVE32F-NEXT: j .LBB48_13
4142 ; RV32ZVE32F-NEXT: .LBB48_7:
4143 ; RV32ZVE32F-NEXT: lw a1, 0(a2)
4144 ; RV32ZVE32F-NEXT: lw a3, 4(a2)
4145 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4146 ; RV32ZVE32F-NEXT: beqz a4, .LBB48_2
4147 ; RV32ZVE32F-NEXT: .LBB48_8: # %cond.load1
4148 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4149 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4150 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
4151 ; RV32ZVE32F-NEXT: lw a4, 0(a5)
4152 ; RV32ZVE32F-NEXT: lw a5, 4(a5)
4153 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4154 ; RV32ZVE32F-NEXT: beqz a6, .LBB48_3
4155 ; RV32ZVE32F-NEXT: .LBB48_9: # %cond.load4
4156 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4157 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4158 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
4159 ; RV32ZVE32F-NEXT: lw a6, 0(a7)
4160 ; RV32ZVE32F-NEXT: lw a7, 4(a7)
4161 ; RV32ZVE32F-NEXT: andi t1, t0, 8
4162 ; RV32ZVE32F-NEXT: beqz t1, .LBB48_4
4163 ; RV32ZVE32F-NEXT: .LBB48_10: # %cond.load7
4164 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4165 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4166 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
4167 ; RV32ZVE32F-NEXT: lw t1, 0(t2)
4168 ; RV32ZVE32F-NEXT: lw t2, 4(t2)
4169 ; RV32ZVE32F-NEXT: andi t3, t0, 16
4170 ; RV32ZVE32F-NEXT: beqz t3, .LBB48_5
4171 ; RV32ZVE32F-NEXT: .LBB48_11: # %cond.load10
4172 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4173 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4174 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
4175 ; RV32ZVE32F-NEXT: lw t3, 0(t4)
4176 ; RV32ZVE32F-NEXT: lw t4, 4(t4)
4177 ; RV32ZVE32F-NEXT: andi t5, t0, 32
4178 ; RV32ZVE32F-NEXT: beqz t5, .LBB48_6
4179 ; RV32ZVE32F-NEXT: .LBB48_12: # %cond.load13
4180 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4181 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4182 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
4183 ; RV32ZVE32F-NEXT: lw t5, 0(t6)
4184 ; RV32ZVE32F-NEXT: lw t6, 4(t6)
4185 ; RV32ZVE32F-NEXT: .LBB48_13: # %else14
4186 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4187 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4188 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4189 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4190 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4191 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4192 ; RV32ZVE32F-NEXT: andi s0, t0, 64
4193 ; RV32ZVE32F-NEXT: beqz s0, .LBB48_16
4194 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
4195 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4196 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4197 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
4198 ; RV32ZVE32F-NEXT: lw s0, 0(s1)
4199 ; RV32ZVE32F-NEXT: lw s1, 4(s1)
4200 ; RV32ZVE32F-NEXT: andi t0, t0, -128
4201 ; RV32ZVE32F-NEXT: bnez t0, .LBB48_17
4202 ; RV32ZVE32F-NEXT: .LBB48_15:
4203 ; RV32ZVE32F-NEXT: lw t0, 56(a2)
4204 ; RV32ZVE32F-NEXT: lw a2, 60(a2)
4205 ; RV32ZVE32F-NEXT: j .LBB48_18
4206 ; RV32ZVE32F-NEXT: .LBB48_16:
4207 ; RV32ZVE32F-NEXT: lw s0, 48(a2)
4208 ; RV32ZVE32F-NEXT: lw s1, 52(a2)
4209 ; RV32ZVE32F-NEXT: andi t0, t0, -128
4210 ; RV32ZVE32F-NEXT: beqz t0, .LBB48_15
4211 ; RV32ZVE32F-NEXT: .LBB48_17: # %cond.load19
4212 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4213 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4214 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
4215 ; RV32ZVE32F-NEXT: lw t0, 0(a2)
4216 ; RV32ZVE32F-NEXT: lw a2, 4(a2)
4217 ; RV32ZVE32F-NEXT: .LBB48_18: # %else20
4218 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
4219 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
4220 ; RV32ZVE32F-NEXT: sw a4, 8(a0)
4221 ; RV32ZVE32F-NEXT: sw a5, 12(a0)
4222 ; RV32ZVE32F-NEXT: sw a6, 16(a0)
4223 ; RV32ZVE32F-NEXT: sw a7, 20(a0)
4224 ; RV32ZVE32F-NEXT: sw t1, 24(a0)
4225 ; RV32ZVE32F-NEXT: sw t2, 28(a0)
4226 ; RV32ZVE32F-NEXT: sw t3, 32(a0)
4227 ; RV32ZVE32F-NEXT: sw t4, 36(a0)
4228 ; RV32ZVE32F-NEXT: sw t5, 40(a0)
4229 ; RV32ZVE32F-NEXT: sw t6, 44(a0)
4230 ; RV32ZVE32F-NEXT: sw s0, 48(a0)
4231 ; RV32ZVE32F-NEXT: sw s1, 52(a0)
4232 ; RV32ZVE32F-NEXT: sw t0, 56(a0)
4233 ; RV32ZVE32F-NEXT: sw a2, 60(a0)
4234 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4235 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4236 ; RV32ZVE32F-NEXT: .cfi_restore s0
4237 ; RV32ZVE32F-NEXT: .cfi_restore s1
4238 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4239 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
4240 ; RV32ZVE32F-NEXT: ret
4242 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i64:
4243 ; RV64ZVE32F: # %bb.0:
4244 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4245 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
4246 ; RV64ZVE32F-NEXT: andi a3, a5, 1
4247 ; RV64ZVE32F-NEXT: beqz a3, .LBB48_3
4248 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
4249 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
4250 ; RV64ZVE32F-NEXT: slli a3, a3, 3
4251 ; RV64ZVE32F-NEXT: add a3, a1, a3
4252 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
4253 ; RV64ZVE32F-NEXT: andi a4, a5, 2
4254 ; RV64ZVE32F-NEXT: bnez a4, .LBB48_4
4255 ; RV64ZVE32F-NEXT: .LBB48_2:
4256 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
4257 ; RV64ZVE32F-NEXT: j .LBB48_5
4258 ; RV64ZVE32F-NEXT: .LBB48_3:
4259 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
4260 ; RV64ZVE32F-NEXT: andi a4, a5, 2
4261 ; RV64ZVE32F-NEXT: beqz a4, .LBB48_2
4262 ; RV64ZVE32F-NEXT: .LBB48_4: # %cond.load1
4263 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4264 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4265 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9
4266 ; RV64ZVE32F-NEXT: slli a4, a4, 3
4267 ; RV64ZVE32F-NEXT: add a4, a1, a4
4268 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
4269 ; RV64ZVE32F-NEXT: .LBB48_5: # %else2
4270 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
4271 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4272 ; RV64ZVE32F-NEXT: andi a6, a5, 4
4273 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
4274 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4275 ; RV64ZVE32F-NEXT: beqz a6, .LBB48_10
4276 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
4277 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
4278 ; RV64ZVE32F-NEXT: slli a6, a6, 3
4279 ; RV64ZVE32F-NEXT: add a6, a1, a6
4280 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
4281 ; RV64ZVE32F-NEXT: andi a7, a5, 8
4282 ; RV64ZVE32F-NEXT: bnez a7, .LBB48_11
4283 ; RV64ZVE32F-NEXT: .LBB48_7:
4284 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
4285 ; RV64ZVE32F-NEXT: andi t0, a5, 16
4286 ; RV64ZVE32F-NEXT: bnez t0, .LBB48_12
4287 ; RV64ZVE32F-NEXT: .LBB48_8:
4288 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
4289 ; RV64ZVE32F-NEXT: andi t1, a5, 32
4290 ; RV64ZVE32F-NEXT: bnez t1, .LBB48_13
4291 ; RV64ZVE32F-NEXT: .LBB48_9:
4292 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
4293 ; RV64ZVE32F-NEXT: j .LBB48_14
4294 ; RV64ZVE32F-NEXT: .LBB48_10:
4295 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
4296 ; RV64ZVE32F-NEXT: andi a7, a5, 8
4297 ; RV64ZVE32F-NEXT: beqz a7, .LBB48_7
4298 ; RV64ZVE32F-NEXT: .LBB48_11: # %cond.load7
4299 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4300 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
4301 ; RV64ZVE32F-NEXT: slli a7, a7, 3
4302 ; RV64ZVE32F-NEXT: add a7, a1, a7
4303 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
4304 ; RV64ZVE32F-NEXT: andi t0, a5, 16
4305 ; RV64ZVE32F-NEXT: beqz t0, .LBB48_8
4306 ; RV64ZVE32F-NEXT: .LBB48_12: # %cond.load10
4307 ; RV64ZVE32F-NEXT: vmv.x.s t0, v9
4308 ; RV64ZVE32F-NEXT: slli t0, t0, 3
4309 ; RV64ZVE32F-NEXT: add t0, a1, t0
4310 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
4311 ; RV64ZVE32F-NEXT: andi t1, a5, 32
4312 ; RV64ZVE32F-NEXT: beqz t1, .LBB48_9
4313 ; RV64ZVE32F-NEXT: .LBB48_13: # %cond.load13
4314 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4315 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
4316 ; RV64ZVE32F-NEXT: slli t1, t1, 3
4317 ; RV64ZVE32F-NEXT: add t1, a1, t1
4318 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
4319 ; RV64ZVE32F-NEXT: .LBB48_14: # %else14
4320 ; RV64ZVE32F-NEXT: andi t2, a5, 64
4321 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4322 ; RV64ZVE32F-NEXT: beqz t2, .LBB48_17
4323 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
4324 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4325 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4326 ; RV64ZVE32F-NEXT: add t2, a1, t2
4327 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
4328 ; RV64ZVE32F-NEXT: andi a5, a5, -128
4329 ; RV64ZVE32F-NEXT: bnez a5, .LBB48_18
4330 ; RV64ZVE32F-NEXT: .LBB48_16:
4331 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
4332 ; RV64ZVE32F-NEXT: j .LBB48_19
4333 ; RV64ZVE32F-NEXT: .LBB48_17:
4334 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
4335 ; RV64ZVE32F-NEXT: andi a5, a5, -128
4336 ; RV64ZVE32F-NEXT: beqz a5, .LBB48_16
4337 ; RV64ZVE32F-NEXT: .LBB48_18: # %cond.load19
4338 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4339 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
4340 ; RV64ZVE32F-NEXT: slli a2, a2, 3
4341 ; RV64ZVE32F-NEXT: add a1, a1, a2
4342 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
4343 ; RV64ZVE32F-NEXT: .LBB48_19: # %else20
4344 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4345 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
4346 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
4347 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
4348 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
4349 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
4350 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
4351 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
4352 ; RV64ZVE32F-NEXT: ret
4353 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs
4354 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
4358 define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
4359 ; RV32V-LABEL: mgather_baseidx_sext_v8i8_v8i64:
4361 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4362 ; RV32V-NEXT: vsext.vf4 v10, v8
4363 ; RV32V-NEXT: vsll.vi v8, v10, 3
4364 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
4365 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
4366 ; RV32V-NEXT: vmv.v.v v8, v12
4369 ; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i64:
4371 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
4372 ; RV64V-NEXT: vsext.vf8 v16, v8
4373 ; RV64V-NEXT: vsll.vi v8, v16, 3
4374 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
4375 ; RV64V-NEXT: vmv.v.v v8, v12
4378 ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i64:
4379 ; RV32ZVE32F: # %bb.0:
4380 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4381 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
4382 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4383 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4384 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4385 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4386 ; RV32ZVE32F-NEXT: andi a3, t0, 1
4387 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4388 ; RV32ZVE32F-NEXT: beqz a3, .LBB49_7
4389 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
4390 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
4391 ; RV32ZVE32F-NEXT: lw a1, 0(a3)
4392 ; RV32ZVE32F-NEXT: lw a3, 4(a3)
4393 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4394 ; RV32ZVE32F-NEXT: bnez a4, .LBB49_8
4395 ; RV32ZVE32F-NEXT: .LBB49_2:
4396 ; RV32ZVE32F-NEXT: lw a4, 8(a2)
4397 ; RV32ZVE32F-NEXT: lw a5, 12(a2)
4398 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4399 ; RV32ZVE32F-NEXT: bnez a6, .LBB49_9
4400 ; RV32ZVE32F-NEXT: .LBB49_3:
4401 ; RV32ZVE32F-NEXT: lw a6, 16(a2)
4402 ; RV32ZVE32F-NEXT: lw a7, 20(a2)
4403 ; RV32ZVE32F-NEXT: andi t1, t0, 8
4404 ; RV32ZVE32F-NEXT: bnez t1, .LBB49_10
4405 ; RV32ZVE32F-NEXT: .LBB49_4:
4406 ; RV32ZVE32F-NEXT: lw t1, 24(a2)
4407 ; RV32ZVE32F-NEXT: lw t2, 28(a2)
4408 ; RV32ZVE32F-NEXT: andi t3, t0, 16
4409 ; RV32ZVE32F-NEXT: bnez t3, .LBB49_11
4410 ; RV32ZVE32F-NEXT: .LBB49_5:
4411 ; RV32ZVE32F-NEXT: lw t3, 32(a2)
4412 ; RV32ZVE32F-NEXT: lw t4, 36(a2)
4413 ; RV32ZVE32F-NEXT: andi t5, t0, 32
4414 ; RV32ZVE32F-NEXT: bnez t5, .LBB49_12
4415 ; RV32ZVE32F-NEXT: .LBB49_6:
4416 ; RV32ZVE32F-NEXT: lw t5, 40(a2)
4417 ; RV32ZVE32F-NEXT: lw t6, 44(a2)
4418 ; RV32ZVE32F-NEXT: j .LBB49_13
4419 ; RV32ZVE32F-NEXT: .LBB49_7:
4420 ; RV32ZVE32F-NEXT: lw a1, 0(a2)
4421 ; RV32ZVE32F-NEXT: lw a3, 4(a2)
4422 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4423 ; RV32ZVE32F-NEXT: beqz a4, .LBB49_2
4424 ; RV32ZVE32F-NEXT: .LBB49_8: # %cond.load1
4425 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4426 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4427 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
4428 ; RV32ZVE32F-NEXT: lw a4, 0(a5)
4429 ; RV32ZVE32F-NEXT: lw a5, 4(a5)
4430 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4431 ; RV32ZVE32F-NEXT: beqz a6, .LBB49_3
4432 ; RV32ZVE32F-NEXT: .LBB49_9: # %cond.load4
4433 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4434 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4435 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
4436 ; RV32ZVE32F-NEXT: lw a6, 0(a7)
4437 ; RV32ZVE32F-NEXT: lw a7, 4(a7)
4438 ; RV32ZVE32F-NEXT: andi t1, t0, 8
4439 ; RV32ZVE32F-NEXT: beqz t1, .LBB49_4
4440 ; RV32ZVE32F-NEXT: .LBB49_10: # %cond.load7
4441 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4442 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4443 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
4444 ; RV32ZVE32F-NEXT: lw t1, 0(t2)
4445 ; RV32ZVE32F-NEXT: lw t2, 4(t2)
4446 ; RV32ZVE32F-NEXT: andi t3, t0, 16
4447 ; RV32ZVE32F-NEXT: beqz t3, .LBB49_5
4448 ; RV32ZVE32F-NEXT: .LBB49_11: # %cond.load10
4449 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4450 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4451 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
4452 ; RV32ZVE32F-NEXT: lw t3, 0(t4)
4453 ; RV32ZVE32F-NEXT: lw t4, 4(t4)
4454 ; RV32ZVE32F-NEXT: andi t5, t0, 32
4455 ; RV32ZVE32F-NEXT: beqz t5, .LBB49_6
4456 ; RV32ZVE32F-NEXT: .LBB49_12: # %cond.load13
4457 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4458 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4459 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
4460 ; RV32ZVE32F-NEXT: lw t5, 0(t6)
4461 ; RV32ZVE32F-NEXT: lw t6, 4(t6)
4462 ; RV32ZVE32F-NEXT: .LBB49_13: # %else14
4463 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4464 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4465 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4466 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4467 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4468 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4469 ; RV32ZVE32F-NEXT: andi s0, t0, 64
4470 ; RV32ZVE32F-NEXT: beqz s0, .LBB49_16
4471 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
4472 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4473 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4474 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
4475 ; RV32ZVE32F-NEXT: lw s0, 0(s1)
4476 ; RV32ZVE32F-NEXT: lw s1, 4(s1)
4477 ; RV32ZVE32F-NEXT: andi t0, t0, -128
4478 ; RV32ZVE32F-NEXT: bnez t0, .LBB49_17
4479 ; RV32ZVE32F-NEXT: .LBB49_15:
4480 ; RV32ZVE32F-NEXT: lw t0, 56(a2)
4481 ; RV32ZVE32F-NEXT: lw a2, 60(a2)
4482 ; RV32ZVE32F-NEXT: j .LBB49_18
4483 ; RV32ZVE32F-NEXT: .LBB49_16:
4484 ; RV32ZVE32F-NEXT: lw s0, 48(a2)
4485 ; RV32ZVE32F-NEXT: lw s1, 52(a2)
4486 ; RV32ZVE32F-NEXT: andi t0, t0, -128
4487 ; RV32ZVE32F-NEXT: beqz t0, .LBB49_15
4488 ; RV32ZVE32F-NEXT: .LBB49_17: # %cond.load19
4489 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4490 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4491 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
4492 ; RV32ZVE32F-NEXT: lw t0, 0(a2)
4493 ; RV32ZVE32F-NEXT: lw a2, 4(a2)
4494 ; RV32ZVE32F-NEXT: .LBB49_18: # %else20
4495 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
4496 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
4497 ; RV32ZVE32F-NEXT: sw a4, 8(a0)
4498 ; RV32ZVE32F-NEXT: sw a5, 12(a0)
4499 ; RV32ZVE32F-NEXT: sw a6, 16(a0)
4500 ; RV32ZVE32F-NEXT: sw a7, 20(a0)
4501 ; RV32ZVE32F-NEXT: sw t1, 24(a0)
4502 ; RV32ZVE32F-NEXT: sw t2, 28(a0)
4503 ; RV32ZVE32F-NEXT: sw t3, 32(a0)
4504 ; RV32ZVE32F-NEXT: sw t4, 36(a0)
4505 ; RV32ZVE32F-NEXT: sw t5, 40(a0)
4506 ; RV32ZVE32F-NEXT: sw t6, 44(a0)
4507 ; RV32ZVE32F-NEXT: sw s0, 48(a0)
4508 ; RV32ZVE32F-NEXT: sw s1, 52(a0)
4509 ; RV32ZVE32F-NEXT: sw t0, 56(a0)
4510 ; RV32ZVE32F-NEXT: sw a2, 60(a0)
4511 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4512 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4513 ; RV32ZVE32F-NEXT: .cfi_restore s0
4514 ; RV32ZVE32F-NEXT: .cfi_restore s1
4515 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4516 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
4517 ; RV32ZVE32F-NEXT: ret
4519 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i64:
4520 ; RV64ZVE32F: # %bb.0:
4521 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4522 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
4523 ; RV64ZVE32F-NEXT: andi a3, a5, 1
4524 ; RV64ZVE32F-NEXT: beqz a3, .LBB49_3
4525 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
4526 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
4527 ; RV64ZVE32F-NEXT: slli a3, a3, 3
4528 ; RV64ZVE32F-NEXT: add a3, a1, a3
4529 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
4530 ; RV64ZVE32F-NEXT: andi a4, a5, 2
4531 ; RV64ZVE32F-NEXT: bnez a4, .LBB49_4
4532 ; RV64ZVE32F-NEXT: .LBB49_2:
4533 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
4534 ; RV64ZVE32F-NEXT: j .LBB49_5
4535 ; RV64ZVE32F-NEXT: .LBB49_3:
4536 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
4537 ; RV64ZVE32F-NEXT: andi a4, a5, 2
4538 ; RV64ZVE32F-NEXT: beqz a4, .LBB49_2
4539 ; RV64ZVE32F-NEXT: .LBB49_4: # %cond.load1
4540 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4541 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4542 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9
4543 ; RV64ZVE32F-NEXT: slli a4, a4, 3
4544 ; RV64ZVE32F-NEXT: add a4, a1, a4
4545 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
4546 ; RV64ZVE32F-NEXT: .LBB49_5: # %else2
4547 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
4548 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4549 ; RV64ZVE32F-NEXT: andi a6, a5, 4
4550 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
4551 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4552 ; RV64ZVE32F-NEXT: beqz a6, .LBB49_10
4553 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
4554 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
4555 ; RV64ZVE32F-NEXT: slli a6, a6, 3
4556 ; RV64ZVE32F-NEXT: add a6, a1, a6
4557 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
4558 ; RV64ZVE32F-NEXT: andi a7, a5, 8
4559 ; RV64ZVE32F-NEXT: bnez a7, .LBB49_11
4560 ; RV64ZVE32F-NEXT: .LBB49_7:
4561 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
4562 ; RV64ZVE32F-NEXT: andi t0, a5, 16
4563 ; RV64ZVE32F-NEXT: bnez t0, .LBB49_12
4564 ; RV64ZVE32F-NEXT: .LBB49_8:
4565 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
4566 ; RV64ZVE32F-NEXT: andi t1, a5, 32
4567 ; RV64ZVE32F-NEXT: bnez t1, .LBB49_13
4568 ; RV64ZVE32F-NEXT: .LBB49_9:
4569 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
4570 ; RV64ZVE32F-NEXT: j .LBB49_14
4571 ; RV64ZVE32F-NEXT: .LBB49_10:
4572 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
4573 ; RV64ZVE32F-NEXT: andi a7, a5, 8
4574 ; RV64ZVE32F-NEXT: beqz a7, .LBB49_7
4575 ; RV64ZVE32F-NEXT: .LBB49_11: # %cond.load7
4576 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4577 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
4578 ; RV64ZVE32F-NEXT: slli a7, a7, 3
4579 ; RV64ZVE32F-NEXT: add a7, a1, a7
4580 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
4581 ; RV64ZVE32F-NEXT: andi t0, a5, 16
4582 ; RV64ZVE32F-NEXT: beqz t0, .LBB49_8
4583 ; RV64ZVE32F-NEXT: .LBB49_12: # %cond.load10
4584 ; RV64ZVE32F-NEXT: vmv.x.s t0, v9
4585 ; RV64ZVE32F-NEXT: slli t0, t0, 3
4586 ; RV64ZVE32F-NEXT: add t0, a1, t0
4587 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
4588 ; RV64ZVE32F-NEXT: andi t1, a5, 32
4589 ; RV64ZVE32F-NEXT: beqz t1, .LBB49_9
4590 ; RV64ZVE32F-NEXT: .LBB49_13: # %cond.load13
4591 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4592 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
4593 ; RV64ZVE32F-NEXT: slli t1, t1, 3
4594 ; RV64ZVE32F-NEXT: add t1, a1, t1
4595 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
4596 ; RV64ZVE32F-NEXT: .LBB49_14: # %else14
4597 ; RV64ZVE32F-NEXT: andi t2, a5, 64
4598 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4599 ; RV64ZVE32F-NEXT: beqz t2, .LBB49_17
4600 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
4601 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4602 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4603 ; RV64ZVE32F-NEXT: add t2, a1, t2
4604 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
4605 ; RV64ZVE32F-NEXT: andi a5, a5, -128
4606 ; RV64ZVE32F-NEXT: bnez a5, .LBB49_18
4607 ; RV64ZVE32F-NEXT: .LBB49_16:
4608 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
4609 ; RV64ZVE32F-NEXT: j .LBB49_19
4610 ; RV64ZVE32F-NEXT: .LBB49_17:
4611 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
4612 ; RV64ZVE32F-NEXT: andi a5, a5, -128
4613 ; RV64ZVE32F-NEXT: beqz a5, .LBB49_16
4614 ; RV64ZVE32F-NEXT: .LBB49_18: # %cond.load19
4615 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4616 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
4617 ; RV64ZVE32F-NEXT: slli a2, a2, 3
4618 ; RV64ZVE32F-NEXT: add a1, a1, a2
4619 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
4620 ; RV64ZVE32F-NEXT: .LBB49_19: # %else20
4621 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4622 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
4623 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
4624 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
4625 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
4626 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
4627 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
4628 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
4629 ; RV64ZVE32F-NEXT: ret
4630 %eidxs = sext <8 x i8> %idxs to <8 x i64>
4631 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4632 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
4636 define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
4637 ; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8i64:
4639 ; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4640 ; RV32V-NEXT: vzext.vf2 v9, v8
4641 ; RV32V-NEXT: vsll.vi v8, v9, 3
4642 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
4643 ; RV32V-NEXT: vluxei16.v v12, (a0), v8, v0.t
4644 ; RV32V-NEXT: vmv.v.v v8, v12
4647 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i64:
4649 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4650 ; RV64V-NEXT: vzext.vf2 v9, v8
4651 ; RV64V-NEXT: vsll.vi v8, v9, 3
4652 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
4653 ; RV64V-NEXT: vluxei16.v v12, (a0), v8, v0.t
4654 ; RV64V-NEXT: vmv.v.v v8, v12
4657 ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i64:
4658 ; RV32ZVE32F: # %bb.0:
4659 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4660 ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
4661 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4662 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4663 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4664 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4665 ; RV32ZVE32F-NEXT: andi a3, t0, 1
4666 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4667 ; RV32ZVE32F-NEXT: beqz a3, .LBB50_7
4668 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
4669 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
4670 ; RV32ZVE32F-NEXT: lw a1, 0(a3)
4671 ; RV32ZVE32F-NEXT: lw a3, 4(a3)
4672 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4673 ; RV32ZVE32F-NEXT: bnez a4, .LBB50_8
4674 ; RV32ZVE32F-NEXT: .LBB50_2:
4675 ; RV32ZVE32F-NEXT: lw a4, 8(a2)
4676 ; RV32ZVE32F-NEXT: lw a5, 12(a2)
4677 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4678 ; RV32ZVE32F-NEXT: bnez a6, .LBB50_9
4679 ; RV32ZVE32F-NEXT: .LBB50_3:
4680 ; RV32ZVE32F-NEXT: lw a6, 16(a2)
4681 ; RV32ZVE32F-NEXT: lw a7, 20(a2)
4682 ; RV32ZVE32F-NEXT: andi t1, t0, 8
4683 ; RV32ZVE32F-NEXT: bnez t1, .LBB50_10
4684 ; RV32ZVE32F-NEXT: .LBB50_4:
4685 ; RV32ZVE32F-NEXT: lw t1, 24(a2)
4686 ; RV32ZVE32F-NEXT: lw t2, 28(a2)
4687 ; RV32ZVE32F-NEXT: andi t3, t0, 16
4688 ; RV32ZVE32F-NEXT: bnez t3, .LBB50_11
4689 ; RV32ZVE32F-NEXT: .LBB50_5:
4690 ; RV32ZVE32F-NEXT: lw t3, 32(a2)
4691 ; RV32ZVE32F-NEXT: lw t4, 36(a2)
4692 ; RV32ZVE32F-NEXT: andi t5, t0, 32
4693 ; RV32ZVE32F-NEXT: bnez t5, .LBB50_12
4694 ; RV32ZVE32F-NEXT: .LBB50_6:
4695 ; RV32ZVE32F-NEXT: lw t5, 40(a2)
4696 ; RV32ZVE32F-NEXT: lw t6, 44(a2)
4697 ; RV32ZVE32F-NEXT: j .LBB50_13
4698 ; RV32ZVE32F-NEXT: .LBB50_7:
4699 ; RV32ZVE32F-NEXT: lw a1, 0(a2)
4700 ; RV32ZVE32F-NEXT: lw a3, 4(a2)
4701 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4702 ; RV32ZVE32F-NEXT: beqz a4, .LBB50_2
4703 ; RV32ZVE32F-NEXT: .LBB50_8: # %cond.load1
4704 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4705 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4706 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
4707 ; RV32ZVE32F-NEXT: lw a4, 0(a5)
4708 ; RV32ZVE32F-NEXT: lw a5, 4(a5)
4709 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4710 ; RV32ZVE32F-NEXT: beqz a6, .LBB50_3
4711 ; RV32ZVE32F-NEXT: .LBB50_9: # %cond.load4
4712 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4713 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4714 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
4715 ; RV32ZVE32F-NEXT: lw a6, 0(a7)
4716 ; RV32ZVE32F-NEXT: lw a7, 4(a7)
4717 ; RV32ZVE32F-NEXT: andi t1, t0, 8
4718 ; RV32ZVE32F-NEXT: beqz t1, .LBB50_4
4719 ; RV32ZVE32F-NEXT: .LBB50_10: # %cond.load7
4720 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4721 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4722 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
4723 ; RV32ZVE32F-NEXT: lw t1, 0(t2)
4724 ; RV32ZVE32F-NEXT: lw t2, 4(t2)
4725 ; RV32ZVE32F-NEXT: andi t3, t0, 16
4726 ; RV32ZVE32F-NEXT: beqz t3, .LBB50_5
4727 ; RV32ZVE32F-NEXT: .LBB50_11: # %cond.load10
4728 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4729 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4730 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
4731 ; RV32ZVE32F-NEXT: lw t3, 0(t4)
4732 ; RV32ZVE32F-NEXT: lw t4, 4(t4)
4733 ; RV32ZVE32F-NEXT: andi t5, t0, 32
4734 ; RV32ZVE32F-NEXT: beqz t5, .LBB50_6
4735 ; RV32ZVE32F-NEXT: .LBB50_12: # %cond.load13
4736 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4737 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4738 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
4739 ; RV32ZVE32F-NEXT: lw t5, 0(t6)
4740 ; RV32ZVE32F-NEXT: lw t6, 4(t6)
4741 ; RV32ZVE32F-NEXT: .LBB50_13: # %else14
4742 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4743 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4744 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4745 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4746 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4747 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4748 ; RV32ZVE32F-NEXT: andi s0, t0, 64
4749 ; RV32ZVE32F-NEXT: beqz s0, .LBB50_16
4750 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
4751 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4752 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4753 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
4754 ; RV32ZVE32F-NEXT: lw s0, 0(s1)
4755 ; RV32ZVE32F-NEXT: lw s1, 4(s1)
4756 ; RV32ZVE32F-NEXT: andi t0, t0, -128
4757 ; RV32ZVE32F-NEXT: bnez t0, .LBB50_17
4758 ; RV32ZVE32F-NEXT: .LBB50_15:
4759 ; RV32ZVE32F-NEXT: lw t0, 56(a2)
4760 ; RV32ZVE32F-NEXT: lw a2, 60(a2)
4761 ; RV32ZVE32F-NEXT: j .LBB50_18
4762 ; RV32ZVE32F-NEXT: .LBB50_16:
4763 ; RV32ZVE32F-NEXT: lw s0, 48(a2)
4764 ; RV32ZVE32F-NEXT: lw s1, 52(a2)
4765 ; RV32ZVE32F-NEXT: andi t0, t0, -128
4766 ; RV32ZVE32F-NEXT: beqz t0, .LBB50_15
4767 ; RV32ZVE32F-NEXT: .LBB50_17: # %cond.load19
4768 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4769 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4770 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
4771 ; RV32ZVE32F-NEXT: lw t0, 0(a2)
4772 ; RV32ZVE32F-NEXT: lw a2, 4(a2)
4773 ; RV32ZVE32F-NEXT: .LBB50_18: # %else20
4774 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
4775 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
4776 ; RV32ZVE32F-NEXT: sw a4, 8(a0)
4777 ; RV32ZVE32F-NEXT: sw a5, 12(a0)
4778 ; RV32ZVE32F-NEXT: sw a6, 16(a0)
4779 ; RV32ZVE32F-NEXT: sw a7, 20(a0)
4780 ; RV32ZVE32F-NEXT: sw t1, 24(a0)
4781 ; RV32ZVE32F-NEXT: sw t2, 28(a0)
4782 ; RV32ZVE32F-NEXT: sw t3, 32(a0)
4783 ; RV32ZVE32F-NEXT: sw t4, 36(a0)
4784 ; RV32ZVE32F-NEXT: sw t5, 40(a0)
4785 ; RV32ZVE32F-NEXT: sw t6, 44(a0)
4786 ; RV32ZVE32F-NEXT: sw s0, 48(a0)
4787 ; RV32ZVE32F-NEXT: sw s1, 52(a0)
4788 ; RV32ZVE32F-NEXT: sw t0, 56(a0)
4789 ; RV32ZVE32F-NEXT: sw a2, 60(a0)
4790 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4791 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4792 ; RV32ZVE32F-NEXT: .cfi_restore s0
4793 ; RV32ZVE32F-NEXT: .cfi_restore s1
4794 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4795 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
4796 ; RV32ZVE32F-NEXT: ret
4798 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i64:
4799 ; RV64ZVE32F: # %bb.0:
4800 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4801 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
4802 ; RV64ZVE32F-NEXT: andi a3, a5, 1
4803 ; RV64ZVE32F-NEXT: beqz a3, .LBB50_3
4804 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
4805 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
4806 ; RV64ZVE32F-NEXT: andi a3, a3, 255
4807 ; RV64ZVE32F-NEXT: slli a3, a3, 3
4808 ; RV64ZVE32F-NEXT: add a3, a1, a3
4809 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
4810 ; RV64ZVE32F-NEXT: andi a4, a5, 2
4811 ; RV64ZVE32F-NEXT: bnez a4, .LBB50_4
4812 ; RV64ZVE32F-NEXT: .LBB50_2:
4813 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
4814 ; RV64ZVE32F-NEXT: j .LBB50_5
4815 ; RV64ZVE32F-NEXT: .LBB50_3:
4816 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
4817 ; RV64ZVE32F-NEXT: andi a4, a5, 2
4818 ; RV64ZVE32F-NEXT: beqz a4, .LBB50_2
4819 ; RV64ZVE32F-NEXT: .LBB50_4: # %cond.load1
4820 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4821 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4822 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9
4823 ; RV64ZVE32F-NEXT: andi a4, a4, 255
4824 ; RV64ZVE32F-NEXT: slli a4, a4, 3
4825 ; RV64ZVE32F-NEXT: add a4, a1, a4
4826 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
4827 ; RV64ZVE32F-NEXT: .LBB50_5: # %else2
4828 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
4829 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4830 ; RV64ZVE32F-NEXT: andi a6, a5, 4
4831 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
4832 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4833 ; RV64ZVE32F-NEXT: beqz a6, .LBB50_10
4834 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
4835 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
4836 ; RV64ZVE32F-NEXT: andi a6, a6, 255
4837 ; RV64ZVE32F-NEXT: slli a6, a6, 3
4838 ; RV64ZVE32F-NEXT: add a6, a1, a6
4839 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
4840 ; RV64ZVE32F-NEXT: andi a7, a5, 8
4841 ; RV64ZVE32F-NEXT: bnez a7, .LBB50_11
4842 ; RV64ZVE32F-NEXT: .LBB50_7:
4843 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
4844 ; RV64ZVE32F-NEXT: andi t0, a5, 16
4845 ; RV64ZVE32F-NEXT: bnez t0, .LBB50_12
4846 ; RV64ZVE32F-NEXT: .LBB50_8:
4847 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
4848 ; RV64ZVE32F-NEXT: andi t1, a5, 32
4849 ; RV64ZVE32F-NEXT: bnez t1, .LBB50_13
4850 ; RV64ZVE32F-NEXT: .LBB50_9:
4851 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
4852 ; RV64ZVE32F-NEXT: j .LBB50_14
4853 ; RV64ZVE32F-NEXT: .LBB50_10:
4854 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
4855 ; RV64ZVE32F-NEXT: andi a7, a5, 8
4856 ; RV64ZVE32F-NEXT: beqz a7, .LBB50_7
4857 ; RV64ZVE32F-NEXT: .LBB50_11: # %cond.load7
4858 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4859 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
4860 ; RV64ZVE32F-NEXT: andi a7, a7, 255
4861 ; RV64ZVE32F-NEXT: slli a7, a7, 3
4862 ; RV64ZVE32F-NEXT: add a7, a1, a7
4863 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
4864 ; RV64ZVE32F-NEXT: andi t0, a5, 16
4865 ; RV64ZVE32F-NEXT: beqz t0, .LBB50_8
4866 ; RV64ZVE32F-NEXT: .LBB50_12: # %cond.load10
4867 ; RV64ZVE32F-NEXT: vmv.x.s t0, v9
4868 ; RV64ZVE32F-NEXT: andi t0, t0, 255
4869 ; RV64ZVE32F-NEXT: slli t0, t0, 3
4870 ; RV64ZVE32F-NEXT: add t0, a1, t0
4871 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
4872 ; RV64ZVE32F-NEXT: andi t1, a5, 32
4873 ; RV64ZVE32F-NEXT: beqz t1, .LBB50_9
4874 ; RV64ZVE32F-NEXT: .LBB50_13: # %cond.load13
4875 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4876 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
4877 ; RV64ZVE32F-NEXT: andi t1, t1, 255
4878 ; RV64ZVE32F-NEXT: slli t1, t1, 3
4879 ; RV64ZVE32F-NEXT: add t1, a1, t1
4880 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
4881 ; RV64ZVE32F-NEXT: .LBB50_14: # %else14
4882 ; RV64ZVE32F-NEXT: andi t2, a5, 64
4883 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4884 ; RV64ZVE32F-NEXT: beqz t2, .LBB50_17
4885 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
4886 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4887 ; RV64ZVE32F-NEXT: andi t2, t2, 255
4888 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4889 ; RV64ZVE32F-NEXT: add t2, a1, t2
4890 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
4891 ; RV64ZVE32F-NEXT: andi a5, a5, -128
4892 ; RV64ZVE32F-NEXT: bnez a5, .LBB50_18
4893 ; RV64ZVE32F-NEXT: .LBB50_16:
4894 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
4895 ; RV64ZVE32F-NEXT: j .LBB50_19
4896 ; RV64ZVE32F-NEXT: .LBB50_17:
4897 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
4898 ; RV64ZVE32F-NEXT: andi a5, a5, -128
4899 ; RV64ZVE32F-NEXT: beqz a5, .LBB50_16
4900 ; RV64ZVE32F-NEXT: .LBB50_18: # %cond.load19
4901 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4902 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
4903 ; RV64ZVE32F-NEXT: andi a2, a2, 255
4904 ; RV64ZVE32F-NEXT: slli a2, a2, 3
4905 ; RV64ZVE32F-NEXT: add a1, a1, a2
4906 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
4907 ; RV64ZVE32F-NEXT: .LBB50_19: # %else20
4908 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4909 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
4910 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
4911 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
4912 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
4913 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
4914 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
4915 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
4916 ; RV64ZVE32F-NEXT: ret
4917 %eidxs = zext <8 x i8> %idxs to <8 x i64>
4918 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4919 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
4923 define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
4924 ; RV32V-LABEL: mgather_baseidx_v8i16_v8i64:
4926 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4927 ; RV32V-NEXT: vsext.vf2 v10, v8
4928 ; RV32V-NEXT: vsll.vi v8, v10, 3
4929 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
4930 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
4931 ; RV32V-NEXT: vmv.v.v v8, v12
4934 ; RV64V-LABEL: mgather_baseidx_v8i16_v8i64:
4936 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
4937 ; RV64V-NEXT: vsext.vf4 v16, v8
4938 ; RV64V-NEXT: vsll.vi v8, v16, 3
4939 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
4940 ; RV64V-NEXT: vmv.v.v v8, v12
4943 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64:
4944 ; RV32ZVE32F: # %bb.0:
4945 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4946 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
4947 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4948 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4949 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4950 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4951 ; RV32ZVE32F-NEXT: andi a3, t0, 1
4952 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4953 ; RV32ZVE32F-NEXT: beqz a3, .LBB51_7
4954 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
4955 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
4956 ; RV32ZVE32F-NEXT: lw a1, 0(a3)
4957 ; RV32ZVE32F-NEXT: lw a3, 4(a3)
4958 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4959 ; RV32ZVE32F-NEXT: bnez a4, .LBB51_8
4960 ; RV32ZVE32F-NEXT: .LBB51_2:
4961 ; RV32ZVE32F-NEXT: lw a4, 8(a2)
4962 ; RV32ZVE32F-NEXT: lw a5, 12(a2)
4963 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4964 ; RV32ZVE32F-NEXT: bnez a6, .LBB51_9
4965 ; RV32ZVE32F-NEXT: .LBB51_3:
4966 ; RV32ZVE32F-NEXT: lw a6, 16(a2)
4967 ; RV32ZVE32F-NEXT: lw a7, 20(a2)
4968 ; RV32ZVE32F-NEXT: andi t1, t0, 8
4969 ; RV32ZVE32F-NEXT: bnez t1, .LBB51_10
4970 ; RV32ZVE32F-NEXT: .LBB51_4:
4971 ; RV32ZVE32F-NEXT: lw t1, 24(a2)
4972 ; RV32ZVE32F-NEXT: lw t2, 28(a2)
4973 ; RV32ZVE32F-NEXT: andi t3, t0, 16
4974 ; RV32ZVE32F-NEXT: bnez t3, .LBB51_11
4975 ; RV32ZVE32F-NEXT: .LBB51_5:
4976 ; RV32ZVE32F-NEXT: lw t3, 32(a2)
4977 ; RV32ZVE32F-NEXT: lw t4, 36(a2)
4978 ; RV32ZVE32F-NEXT: andi t5, t0, 32
4979 ; RV32ZVE32F-NEXT: bnez t5, .LBB51_12
4980 ; RV32ZVE32F-NEXT: .LBB51_6:
4981 ; RV32ZVE32F-NEXT: lw t5, 40(a2)
4982 ; RV32ZVE32F-NEXT: lw t6, 44(a2)
4983 ; RV32ZVE32F-NEXT: j .LBB51_13
4984 ; RV32ZVE32F-NEXT: .LBB51_7:
4985 ; RV32ZVE32F-NEXT: lw a1, 0(a2)
4986 ; RV32ZVE32F-NEXT: lw a3, 4(a2)
4987 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4988 ; RV32ZVE32F-NEXT: beqz a4, .LBB51_2
4989 ; RV32ZVE32F-NEXT: .LBB51_8: # %cond.load1
4990 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4991 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4992 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
4993 ; RV32ZVE32F-NEXT: lw a4, 0(a5)
4994 ; RV32ZVE32F-NEXT: lw a5, 4(a5)
4995 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4996 ; RV32ZVE32F-NEXT: beqz a6, .LBB51_3
4997 ; RV32ZVE32F-NEXT: .LBB51_9: # %cond.load4
4998 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4999 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5000 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
5001 ; RV32ZVE32F-NEXT: lw a6, 0(a7)
5002 ; RV32ZVE32F-NEXT: lw a7, 4(a7)
5003 ; RV32ZVE32F-NEXT: andi t1, t0, 8
5004 ; RV32ZVE32F-NEXT: beqz t1, .LBB51_4
5005 ; RV32ZVE32F-NEXT: .LBB51_10: # %cond.load7
5006 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5007 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5008 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
5009 ; RV32ZVE32F-NEXT: lw t1, 0(t2)
5010 ; RV32ZVE32F-NEXT: lw t2, 4(t2)
5011 ; RV32ZVE32F-NEXT: andi t3, t0, 16
5012 ; RV32ZVE32F-NEXT: beqz t3, .LBB51_5
5013 ; RV32ZVE32F-NEXT: .LBB51_11: # %cond.load10
5014 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5015 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5016 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
5017 ; RV32ZVE32F-NEXT: lw t3, 0(t4)
5018 ; RV32ZVE32F-NEXT: lw t4, 4(t4)
5019 ; RV32ZVE32F-NEXT: andi t5, t0, 32
5020 ; RV32ZVE32F-NEXT: beqz t5, .LBB51_6
5021 ; RV32ZVE32F-NEXT: .LBB51_12: # %cond.load13
5022 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5023 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5024 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
5025 ; RV32ZVE32F-NEXT: lw t5, 0(t6)
5026 ; RV32ZVE32F-NEXT: lw t6, 4(t6)
5027 ; RV32ZVE32F-NEXT: .LBB51_13: # %else14
5028 ; RV32ZVE32F-NEXT: addi sp, sp, -16
5029 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
5030 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
5031 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
5032 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5033 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5034 ; RV32ZVE32F-NEXT: andi s0, t0, 64
5035 ; RV32ZVE32F-NEXT: beqz s0, .LBB51_16
5036 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
5037 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5038 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5039 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
5040 ; RV32ZVE32F-NEXT: lw s0, 0(s1)
5041 ; RV32ZVE32F-NEXT: lw s1, 4(s1)
5042 ; RV32ZVE32F-NEXT: andi t0, t0, -128
5043 ; RV32ZVE32F-NEXT: bnez t0, .LBB51_17
5044 ; RV32ZVE32F-NEXT: .LBB51_15:
5045 ; RV32ZVE32F-NEXT: lw t0, 56(a2)
5046 ; RV32ZVE32F-NEXT: lw a2, 60(a2)
5047 ; RV32ZVE32F-NEXT: j .LBB51_18
5048 ; RV32ZVE32F-NEXT: .LBB51_16:
5049 ; RV32ZVE32F-NEXT: lw s0, 48(a2)
5050 ; RV32ZVE32F-NEXT: lw s1, 52(a2)
5051 ; RV32ZVE32F-NEXT: andi t0, t0, -128
5052 ; RV32ZVE32F-NEXT: beqz t0, .LBB51_15
5053 ; RV32ZVE32F-NEXT: .LBB51_17: # %cond.load19
5054 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5055 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5056 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
5057 ; RV32ZVE32F-NEXT: lw t0, 0(a2)
5058 ; RV32ZVE32F-NEXT: lw a2, 4(a2)
5059 ; RV32ZVE32F-NEXT: .LBB51_18: # %else20
5060 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
5061 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
5062 ; RV32ZVE32F-NEXT: sw a4, 8(a0)
5063 ; RV32ZVE32F-NEXT: sw a5, 12(a0)
5064 ; RV32ZVE32F-NEXT: sw a6, 16(a0)
5065 ; RV32ZVE32F-NEXT: sw a7, 20(a0)
5066 ; RV32ZVE32F-NEXT: sw t1, 24(a0)
5067 ; RV32ZVE32F-NEXT: sw t2, 28(a0)
5068 ; RV32ZVE32F-NEXT: sw t3, 32(a0)
5069 ; RV32ZVE32F-NEXT: sw t4, 36(a0)
5070 ; RV32ZVE32F-NEXT: sw t5, 40(a0)
5071 ; RV32ZVE32F-NEXT: sw t6, 44(a0)
5072 ; RV32ZVE32F-NEXT: sw s0, 48(a0)
5073 ; RV32ZVE32F-NEXT: sw s1, 52(a0)
5074 ; RV32ZVE32F-NEXT: sw t0, 56(a0)
5075 ; RV32ZVE32F-NEXT: sw a2, 60(a0)
5076 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5077 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5078 ; RV32ZVE32F-NEXT: .cfi_restore s0
5079 ; RV32ZVE32F-NEXT: .cfi_restore s1
5080 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5081 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
5082 ; RV32ZVE32F-NEXT: ret
5084 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64:
5085 ; RV64ZVE32F: # %bb.0:
5086 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5087 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
5088 ; RV64ZVE32F-NEXT: andi a3, a5, 1
5089 ; RV64ZVE32F-NEXT: beqz a3, .LBB51_3
5090 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
5091 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
5092 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
5093 ; RV64ZVE32F-NEXT: slli a3, a3, 3
5094 ; RV64ZVE32F-NEXT: add a3, a1, a3
5095 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
5096 ; RV64ZVE32F-NEXT: andi a4, a5, 2
5097 ; RV64ZVE32F-NEXT: bnez a4, .LBB51_4
5098 ; RV64ZVE32F-NEXT: .LBB51_2:
5099 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
5100 ; RV64ZVE32F-NEXT: j .LBB51_5
5101 ; RV64ZVE32F-NEXT: .LBB51_3:
5102 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
5103 ; RV64ZVE32F-NEXT: andi a4, a5, 2
5104 ; RV64ZVE32F-NEXT: beqz a4, .LBB51_2
5105 ; RV64ZVE32F-NEXT: .LBB51_4: # %cond.load1
5106 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5107 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
5108 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9
5109 ; RV64ZVE32F-NEXT: slli a4, a4, 3
5110 ; RV64ZVE32F-NEXT: add a4, a1, a4
5111 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
5112 ; RV64ZVE32F-NEXT: .LBB51_5: # %else2
5113 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
5114 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
5115 ; RV64ZVE32F-NEXT: andi a6, a5, 4
5116 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
5117 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5118 ; RV64ZVE32F-NEXT: beqz a6, .LBB51_10
5119 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
5120 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
5121 ; RV64ZVE32F-NEXT: slli a6, a6, 3
5122 ; RV64ZVE32F-NEXT: add a6, a1, a6
5123 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
5124 ; RV64ZVE32F-NEXT: andi a7, a5, 8
5125 ; RV64ZVE32F-NEXT: bnez a7, .LBB51_11
5126 ; RV64ZVE32F-NEXT: .LBB51_7:
5127 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
5128 ; RV64ZVE32F-NEXT: andi t0, a5, 16
5129 ; RV64ZVE32F-NEXT: bnez t0, .LBB51_12
5130 ; RV64ZVE32F-NEXT: .LBB51_8:
5131 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
5132 ; RV64ZVE32F-NEXT: andi t1, a5, 32
5133 ; RV64ZVE32F-NEXT: bnez t1, .LBB51_13
5134 ; RV64ZVE32F-NEXT: .LBB51_9:
5135 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
5136 ; RV64ZVE32F-NEXT: j .LBB51_14
5137 ; RV64ZVE32F-NEXT: .LBB51_10:
5138 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
5139 ; RV64ZVE32F-NEXT: andi a7, a5, 8
5140 ; RV64ZVE32F-NEXT: beqz a7, .LBB51_7
5141 ; RV64ZVE32F-NEXT: .LBB51_11: # %cond.load7
5142 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5143 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
5144 ; RV64ZVE32F-NEXT: slli a7, a7, 3
5145 ; RV64ZVE32F-NEXT: add a7, a1, a7
5146 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
5147 ; RV64ZVE32F-NEXT: andi t0, a5, 16
5148 ; RV64ZVE32F-NEXT: beqz t0, .LBB51_8
5149 ; RV64ZVE32F-NEXT: .LBB51_12: # %cond.load10
5150 ; RV64ZVE32F-NEXT: vmv.x.s t0, v9
5151 ; RV64ZVE32F-NEXT: slli t0, t0, 3
5152 ; RV64ZVE32F-NEXT: add t0, a1, t0
5153 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
5154 ; RV64ZVE32F-NEXT: andi t1, a5, 32
5155 ; RV64ZVE32F-NEXT: beqz t1, .LBB51_9
5156 ; RV64ZVE32F-NEXT: .LBB51_13: # %cond.load13
5157 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
5158 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
5159 ; RV64ZVE32F-NEXT: slli t1, t1, 3
5160 ; RV64ZVE32F-NEXT: add t1, a1, t1
5161 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
5162 ; RV64ZVE32F-NEXT: .LBB51_14: # %else14
5163 ; RV64ZVE32F-NEXT: andi t2, a5, 64
5164 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
5165 ; RV64ZVE32F-NEXT: beqz t2, .LBB51_17
5166 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
5167 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5168 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5169 ; RV64ZVE32F-NEXT: add t2, a1, t2
5170 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
5171 ; RV64ZVE32F-NEXT: andi a5, a5, -128
5172 ; RV64ZVE32F-NEXT: bnez a5, .LBB51_18
5173 ; RV64ZVE32F-NEXT: .LBB51_16:
5174 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
5175 ; RV64ZVE32F-NEXT: j .LBB51_19
5176 ; RV64ZVE32F-NEXT: .LBB51_17:
5177 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
5178 ; RV64ZVE32F-NEXT: andi a5, a5, -128
5179 ; RV64ZVE32F-NEXT: beqz a5, .LBB51_16
5180 ; RV64ZVE32F-NEXT: .LBB51_18: # %cond.load19
5181 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5182 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
5183 ; RV64ZVE32F-NEXT: slli a2, a2, 3
5184 ; RV64ZVE32F-NEXT: add a1, a1, a2
5185 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
5186 ; RV64ZVE32F-NEXT: .LBB51_19: # %else20
5187 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5188 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
5189 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
5190 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
5191 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
5192 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
5193 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
5194 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
5195 ; RV64ZVE32F-NEXT: ret
5196 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs
5197 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
5201 define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
5202 ; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8i64:
5204 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5205 ; RV32V-NEXT: vsext.vf2 v10, v8
5206 ; RV32V-NEXT: vsll.vi v8, v10, 3
5207 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
5208 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
5209 ; RV32V-NEXT: vmv.v.v v8, v12
5212 ; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8i64:
5214 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
5215 ; RV64V-NEXT: vsext.vf4 v16, v8
5216 ; RV64V-NEXT: vsll.vi v8, v16, 3
5217 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
5218 ; RV64V-NEXT: vmv.v.v v8, v12
5221 ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64:
5222 ; RV32ZVE32F: # %bb.0:
5223 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5224 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
5225 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
5226 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
5227 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5228 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
5229 ; RV32ZVE32F-NEXT: andi a3, t0, 1
5230 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5231 ; RV32ZVE32F-NEXT: beqz a3, .LBB52_7
5232 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
5233 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
5234 ; RV32ZVE32F-NEXT: lw a1, 0(a3)
5235 ; RV32ZVE32F-NEXT: lw a3, 4(a3)
5236 ; RV32ZVE32F-NEXT: andi a4, t0, 2
5237 ; RV32ZVE32F-NEXT: bnez a4, .LBB52_8
5238 ; RV32ZVE32F-NEXT: .LBB52_2:
5239 ; RV32ZVE32F-NEXT: lw a4, 8(a2)
5240 ; RV32ZVE32F-NEXT: lw a5, 12(a2)
5241 ; RV32ZVE32F-NEXT: andi a6, t0, 4
5242 ; RV32ZVE32F-NEXT: bnez a6, .LBB52_9
5243 ; RV32ZVE32F-NEXT: .LBB52_3:
5244 ; RV32ZVE32F-NEXT: lw a6, 16(a2)
5245 ; RV32ZVE32F-NEXT: lw a7, 20(a2)
5246 ; RV32ZVE32F-NEXT: andi t1, t0, 8
5247 ; RV32ZVE32F-NEXT: bnez t1, .LBB52_10
5248 ; RV32ZVE32F-NEXT: .LBB52_4:
5249 ; RV32ZVE32F-NEXT: lw t1, 24(a2)
5250 ; RV32ZVE32F-NEXT: lw t2, 28(a2)
5251 ; RV32ZVE32F-NEXT: andi t3, t0, 16
5252 ; RV32ZVE32F-NEXT: bnez t3, .LBB52_11
5253 ; RV32ZVE32F-NEXT: .LBB52_5:
5254 ; RV32ZVE32F-NEXT: lw t3, 32(a2)
5255 ; RV32ZVE32F-NEXT: lw t4, 36(a2)
5256 ; RV32ZVE32F-NEXT: andi t5, t0, 32
5257 ; RV32ZVE32F-NEXT: bnez t5, .LBB52_12
5258 ; RV32ZVE32F-NEXT: .LBB52_6:
5259 ; RV32ZVE32F-NEXT: lw t5, 40(a2)
5260 ; RV32ZVE32F-NEXT: lw t6, 44(a2)
5261 ; RV32ZVE32F-NEXT: j .LBB52_13
5262 ; RV32ZVE32F-NEXT: .LBB52_7:
5263 ; RV32ZVE32F-NEXT: lw a1, 0(a2)
5264 ; RV32ZVE32F-NEXT: lw a3, 4(a2)
5265 ; RV32ZVE32F-NEXT: andi a4, t0, 2
5266 ; RV32ZVE32F-NEXT: beqz a4, .LBB52_2
5267 ; RV32ZVE32F-NEXT: .LBB52_8: # %cond.load1
5268 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5269 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5270 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
5271 ; RV32ZVE32F-NEXT: lw a4, 0(a5)
5272 ; RV32ZVE32F-NEXT: lw a5, 4(a5)
5273 ; RV32ZVE32F-NEXT: andi a6, t0, 4
5274 ; RV32ZVE32F-NEXT: beqz a6, .LBB52_3
5275 ; RV32ZVE32F-NEXT: .LBB52_9: # %cond.load4
5276 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5277 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5278 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
5279 ; RV32ZVE32F-NEXT: lw a6, 0(a7)
5280 ; RV32ZVE32F-NEXT: lw a7, 4(a7)
5281 ; RV32ZVE32F-NEXT: andi t1, t0, 8
5282 ; RV32ZVE32F-NEXT: beqz t1, .LBB52_4
5283 ; RV32ZVE32F-NEXT: .LBB52_10: # %cond.load7
5284 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5285 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5286 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
5287 ; RV32ZVE32F-NEXT: lw t1, 0(t2)
5288 ; RV32ZVE32F-NEXT: lw t2, 4(t2)
5289 ; RV32ZVE32F-NEXT: andi t3, t0, 16
5290 ; RV32ZVE32F-NEXT: beqz t3, .LBB52_5
5291 ; RV32ZVE32F-NEXT: .LBB52_11: # %cond.load10
5292 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5293 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5294 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
5295 ; RV32ZVE32F-NEXT: lw t3, 0(t4)
5296 ; RV32ZVE32F-NEXT: lw t4, 4(t4)
5297 ; RV32ZVE32F-NEXT: andi t5, t0, 32
5298 ; RV32ZVE32F-NEXT: beqz t5, .LBB52_6
5299 ; RV32ZVE32F-NEXT: .LBB52_12: # %cond.load13
5300 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5301 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5302 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
5303 ; RV32ZVE32F-NEXT: lw t5, 0(t6)
5304 ; RV32ZVE32F-NEXT: lw t6, 4(t6)
5305 ; RV32ZVE32F-NEXT: .LBB52_13: # %else14
5306 ; RV32ZVE32F-NEXT: addi sp, sp, -16
5307 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
5308 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
5309 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
5310 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5311 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5312 ; RV32ZVE32F-NEXT: andi s0, t0, 64
5313 ; RV32ZVE32F-NEXT: beqz s0, .LBB52_16
5314 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
5315 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5316 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5317 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
5318 ; RV32ZVE32F-NEXT: lw s0, 0(s1)
5319 ; RV32ZVE32F-NEXT: lw s1, 4(s1)
5320 ; RV32ZVE32F-NEXT: andi t0, t0, -128
5321 ; RV32ZVE32F-NEXT: bnez t0, .LBB52_17
5322 ; RV32ZVE32F-NEXT: .LBB52_15:
5323 ; RV32ZVE32F-NEXT: lw t0, 56(a2)
5324 ; RV32ZVE32F-NEXT: lw a2, 60(a2)
5325 ; RV32ZVE32F-NEXT: j .LBB52_18
5326 ; RV32ZVE32F-NEXT: .LBB52_16:
5327 ; RV32ZVE32F-NEXT: lw s0, 48(a2)
5328 ; RV32ZVE32F-NEXT: lw s1, 52(a2)
5329 ; RV32ZVE32F-NEXT: andi t0, t0, -128
5330 ; RV32ZVE32F-NEXT: beqz t0, .LBB52_15
5331 ; RV32ZVE32F-NEXT: .LBB52_17: # %cond.load19
5332 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5333 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5334 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
5335 ; RV32ZVE32F-NEXT: lw t0, 0(a2)
5336 ; RV32ZVE32F-NEXT: lw a2, 4(a2)
5337 ; RV32ZVE32F-NEXT: .LBB52_18: # %else20
5338 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
5339 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
5340 ; RV32ZVE32F-NEXT: sw a4, 8(a0)
5341 ; RV32ZVE32F-NEXT: sw a5, 12(a0)
5342 ; RV32ZVE32F-NEXT: sw a6, 16(a0)
5343 ; RV32ZVE32F-NEXT: sw a7, 20(a0)
5344 ; RV32ZVE32F-NEXT: sw t1, 24(a0)
5345 ; RV32ZVE32F-NEXT: sw t2, 28(a0)
5346 ; RV32ZVE32F-NEXT: sw t3, 32(a0)
5347 ; RV32ZVE32F-NEXT: sw t4, 36(a0)
5348 ; RV32ZVE32F-NEXT: sw t5, 40(a0)
5349 ; RV32ZVE32F-NEXT: sw t6, 44(a0)
5350 ; RV32ZVE32F-NEXT: sw s0, 48(a0)
5351 ; RV32ZVE32F-NEXT: sw s1, 52(a0)
5352 ; RV32ZVE32F-NEXT: sw t0, 56(a0)
5353 ; RV32ZVE32F-NEXT: sw a2, 60(a0)
5354 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5355 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5356 ; RV32ZVE32F-NEXT: .cfi_restore s0
5357 ; RV32ZVE32F-NEXT: .cfi_restore s1
5358 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5359 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
5360 ; RV32ZVE32F-NEXT: ret
5362 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64:
5363 ; RV64ZVE32F: # %bb.0:
5364 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5365 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
5366 ; RV64ZVE32F-NEXT: andi a3, a5, 1
5367 ; RV64ZVE32F-NEXT: beqz a3, .LBB52_3
5368 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
5369 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
5370 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
5371 ; RV64ZVE32F-NEXT: slli a3, a3, 3
5372 ; RV64ZVE32F-NEXT: add a3, a1, a3
5373 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
5374 ; RV64ZVE32F-NEXT: andi a4, a5, 2
5375 ; RV64ZVE32F-NEXT: bnez a4, .LBB52_4
5376 ; RV64ZVE32F-NEXT: .LBB52_2:
5377 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
5378 ; RV64ZVE32F-NEXT: j .LBB52_5
5379 ; RV64ZVE32F-NEXT: .LBB52_3:
5380 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
5381 ; RV64ZVE32F-NEXT: andi a4, a5, 2
5382 ; RV64ZVE32F-NEXT: beqz a4, .LBB52_2
5383 ; RV64ZVE32F-NEXT: .LBB52_4: # %cond.load1
5384 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5385 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
5386 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9
5387 ; RV64ZVE32F-NEXT: slli a4, a4, 3
5388 ; RV64ZVE32F-NEXT: add a4, a1, a4
5389 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
5390 ; RV64ZVE32F-NEXT: .LBB52_5: # %else2
5391 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
5392 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
5393 ; RV64ZVE32F-NEXT: andi a6, a5, 4
5394 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
5395 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5396 ; RV64ZVE32F-NEXT: beqz a6, .LBB52_10
5397 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
5398 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
5399 ; RV64ZVE32F-NEXT: slli a6, a6, 3
5400 ; RV64ZVE32F-NEXT: add a6, a1, a6
5401 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
5402 ; RV64ZVE32F-NEXT: andi a7, a5, 8
5403 ; RV64ZVE32F-NEXT: bnez a7, .LBB52_11
5404 ; RV64ZVE32F-NEXT: .LBB52_7:
5405 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
5406 ; RV64ZVE32F-NEXT: andi t0, a5, 16
5407 ; RV64ZVE32F-NEXT: bnez t0, .LBB52_12
5408 ; RV64ZVE32F-NEXT: .LBB52_8:
5409 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
5410 ; RV64ZVE32F-NEXT: andi t1, a5, 32
5411 ; RV64ZVE32F-NEXT: bnez t1, .LBB52_13
5412 ; RV64ZVE32F-NEXT: .LBB52_9:
5413 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
5414 ; RV64ZVE32F-NEXT: j .LBB52_14
5415 ; RV64ZVE32F-NEXT: .LBB52_10:
5416 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
5417 ; RV64ZVE32F-NEXT: andi a7, a5, 8
5418 ; RV64ZVE32F-NEXT: beqz a7, .LBB52_7
5419 ; RV64ZVE32F-NEXT: .LBB52_11: # %cond.load7
5420 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5421 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
5422 ; RV64ZVE32F-NEXT: slli a7, a7, 3
5423 ; RV64ZVE32F-NEXT: add a7, a1, a7
5424 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
5425 ; RV64ZVE32F-NEXT: andi t0, a5, 16
5426 ; RV64ZVE32F-NEXT: beqz t0, .LBB52_8
5427 ; RV64ZVE32F-NEXT: .LBB52_12: # %cond.load10
5428 ; RV64ZVE32F-NEXT: vmv.x.s t0, v9
5429 ; RV64ZVE32F-NEXT: slli t0, t0, 3
5430 ; RV64ZVE32F-NEXT: add t0, a1, t0
5431 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
5432 ; RV64ZVE32F-NEXT: andi t1, a5, 32
5433 ; RV64ZVE32F-NEXT: beqz t1, .LBB52_9
5434 ; RV64ZVE32F-NEXT: .LBB52_13: # %cond.load13
5435 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
5436 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
5437 ; RV64ZVE32F-NEXT: slli t1, t1, 3
5438 ; RV64ZVE32F-NEXT: add t1, a1, t1
5439 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
5440 ; RV64ZVE32F-NEXT: .LBB52_14: # %else14
5441 ; RV64ZVE32F-NEXT: andi t2, a5, 64
5442 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
5443 ; RV64ZVE32F-NEXT: beqz t2, .LBB52_17
5444 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
5445 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5446 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5447 ; RV64ZVE32F-NEXT: add t2, a1, t2
5448 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
5449 ; RV64ZVE32F-NEXT: andi a5, a5, -128
5450 ; RV64ZVE32F-NEXT: bnez a5, .LBB52_18
5451 ; RV64ZVE32F-NEXT: .LBB52_16:
5452 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
5453 ; RV64ZVE32F-NEXT: j .LBB52_19
5454 ; RV64ZVE32F-NEXT: .LBB52_17:
5455 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
5456 ; RV64ZVE32F-NEXT: andi a5, a5, -128
5457 ; RV64ZVE32F-NEXT: beqz a5, .LBB52_16
5458 ; RV64ZVE32F-NEXT: .LBB52_18: # %cond.load19
5459 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5460 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
5461 ; RV64ZVE32F-NEXT: slli a2, a2, 3
5462 ; RV64ZVE32F-NEXT: add a1, a1, a2
5463 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
5464 ; RV64ZVE32F-NEXT: .LBB52_19: # %else20
5465 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5466 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
5467 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
5468 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
5469 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
5470 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
5471 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
5472 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
5473 ; RV64ZVE32F-NEXT: ret
5474 %eidxs = sext <8 x i16> %idxs to <8 x i64>
5475 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
5476 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
5480 define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
5481 ; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8i64:
5483 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5484 ; RV32V-NEXT: vzext.vf2 v10, v8
5485 ; RV32V-NEXT: vsll.vi v8, v10, 3
5486 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
5487 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
5488 ; RV32V-NEXT: vmv.v.v v8, v12
5491 ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i64:
5493 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5494 ; RV64V-NEXT: vzext.vf2 v10, v8
5495 ; RV64V-NEXT: vsll.vi v8, v10, 3
5496 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
5497 ; RV64V-NEXT: vluxei32.v v12, (a0), v8, v0.t
5498 ; RV64V-NEXT: vmv.v.v v8, v12
5501 ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64:
5502 ; RV32ZVE32F: # %bb.0:
5503 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5504 ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
5505 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
5506 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
5507 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5508 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
5509 ; RV32ZVE32F-NEXT: andi a3, t0, 1
5510 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5511 ; RV32ZVE32F-NEXT: beqz a3, .LBB53_7
5512 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
5513 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
5514 ; RV32ZVE32F-NEXT: lw a1, 0(a3)
5515 ; RV32ZVE32F-NEXT: lw a3, 4(a3)
5516 ; RV32ZVE32F-NEXT: andi a4, t0, 2
5517 ; RV32ZVE32F-NEXT: bnez a4, .LBB53_8
5518 ; RV32ZVE32F-NEXT: .LBB53_2:
5519 ; RV32ZVE32F-NEXT: lw a4, 8(a2)
5520 ; RV32ZVE32F-NEXT: lw a5, 12(a2)
5521 ; RV32ZVE32F-NEXT: andi a6, t0, 4
5522 ; RV32ZVE32F-NEXT: bnez a6, .LBB53_9
5523 ; RV32ZVE32F-NEXT: .LBB53_3:
5524 ; RV32ZVE32F-NEXT: lw a6, 16(a2)
5525 ; RV32ZVE32F-NEXT: lw a7, 20(a2)
5526 ; RV32ZVE32F-NEXT: andi t1, t0, 8
5527 ; RV32ZVE32F-NEXT: bnez t1, .LBB53_10
5528 ; RV32ZVE32F-NEXT: .LBB53_4:
5529 ; RV32ZVE32F-NEXT: lw t1, 24(a2)
5530 ; RV32ZVE32F-NEXT: lw t2, 28(a2)
5531 ; RV32ZVE32F-NEXT: andi t3, t0, 16
5532 ; RV32ZVE32F-NEXT: bnez t3, .LBB53_11
5533 ; RV32ZVE32F-NEXT: .LBB53_5:
5534 ; RV32ZVE32F-NEXT: lw t3, 32(a2)
5535 ; RV32ZVE32F-NEXT: lw t4, 36(a2)
5536 ; RV32ZVE32F-NEXT: andi t5, t0, 32
5537 ; RV32ZVE32F-NEXT: bnez t5, .LBB53_12
5538 ; RV32ZVE32F-NEXT: .LBB53_6:
5539 ; RV32ZVE32F-NEXT: lw t5, 40(a2)
5540 ; RV32ZVE32F-NEXT: lw t6, 44(a2)
5541 ; RV32ZVE32F-NEXT: j .LBB53_13
5542 ; RV32ZVE32F-NEXT: .LBB53_7:
5543 ; RV32ZVE32F-NEXT: lw a1, 0(a2)
5544 ; RV32ZVE32F-NEXT: lw a3, 4(a2)
5545 ; RV32ZVE32F-NEXT: andi a4, t0, 2
5546 ; RV32ZVE32F-NEXT: beqz a4, .LBB53_2
5547 ; RV32ZVE32F-NEXT: .LBB53_8: # %cond.load1
5548 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5549 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5550 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
5551 ; RV32ZVE32F-NEXT: lw a4, 0(a5)
5552 ; RV32ZVE32F-NEXT: lw a5, 4(a5)
5553 ; RV32ZVE32F-NEXT: andi a6, t0, 4
5554 ; RV32ZVE32F-NEXT: beqz a6, .LBB53_3
5555 ; RV32ZVE32F-NEXT: .LBB53_9: # %cond.load4
5556 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5557 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5558 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
5559 ; RV32ZVE32F-NEXT: lw a6, 0(a7)
5560 ; RV32ZVE32F-NEXT: lw a7, 4(a7)
5561 ; RV32ZVE32F-NEXT: andi t1, t0, 8
5562 ; RV32ZVE32F-NEXT: beqz t1, .LBB53_4
5563 ; RV32ZVE32F-NEXT: .LBB53_10: # %cond.load7
5564 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5565 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5566 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
5567 ; RV32ZVE32F-NEXT: lw t1, 0(t2)
5568 ; RV32ZVE32F-NEXT: lw t2, 4(t2)
5569 ; RV32ZVE32F-NEXT: andi t3, t0, 16
5570 ; RV32ZVE32F-NEXT: beqz t3, .LBB53_5
5571 ; RV32ZVE32F-NEXT: .LBB53_11: # %cond.load10
5572 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5573 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5574 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
5575 ; RV32ZVE32F-NEXT: lw t3, 0(t4)
5576 ; RV32ZVE32F-NEXT: lw t4, 4(t4)
5577 ; RV32ZVE32F-NEXT: andi t5, t0, 32
5578 ; RV32ZVE32F-NEXT: beqz t5, .LBB53_6
5579 ; RV32ZVE32F-NEXT: .LBB53_12: # %cond.load13
5580 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5581 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5582 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
5583 ; RV32ZVE32F-NEXT: lw t5, 0(t6)
5584 ; RV32ZVE32F-NEXT: lw t6, 4(t6)
5585 ; RV32ZVE32F-NEXT: .LBB53_13: # %else14
5586 ; RV32ZVE32F-NEXT: addi sp, sp, -16
5587 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
5588 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
5589 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
5590 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5591 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5592 ; RV32ZVE32F-NEXT: andi s0, t0, 64
5593 ; RV32ZVE32F-NEXT: beqz s0, .LBB53_16
5594 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
5595 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5596 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5597 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
5598 ; RV32ZVE32F-NEXT: lw s0, 0(s1)
5599 ; RV32ZVE32F-NEXT: lw s1, 4(s1)
5600 ; RV32ZVE32F-NEXT: andi t0, t0, -128
5601 ; RV32ZVE32F-NEXT: bnez t0, .LBB53_17
5602 ; RV32ZVE32F-NEXT: .LBB53_15:
5603 ; RV32ZVE32F-NEXT: lw t0, 56(a2)
5604 ; RV32ZVE32F-NEXT: lw a2, 60(a2)
5605 ; RV32ZVE32F-NEXT: j .LBB53_18
5606 ; RV32ZVE32F-NEXT: .LBB53_16:
5607 ; RV32ZVE32F-NEXT: lw s0, 48(a2)
5608 ; RV32ZVE32F-NEXT: lw s1, 52(a2)
5609 ; RV32ZVE32F-NEXT: andi t0, t0, -128
5610 ; RV32ZVE32F-NEXT: beqz t0, .LBB53_15
5611 ; RV32ZVE32F-NEXT: .LBB53_17: # %cond.load19
5612 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5613 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5614 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
5615 ; RV32ZVE32F-NEXT: lw t0, 0(a2)
5616 ; RV32ZVE32F-NEXT: lw a2, 4(a2)
5617 ; RV32ZVE32F-NEXT: .LBB53_18: # %else20
5618 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
5619 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
5620 ; RV32ZVE32F-NEXT: sw a4, 8(a0)
5621 ; RV32ZVE32F-NEXT: sw a5, 12(a0)
5622 ; RV32ZVE32F-NEXT: sw a6, 16(a0)
5623 ; RV32ZVE32F-NEXT: sw a7, 20(a0)
5624 ; RV32ZVE32F-NEXT: sw t1, 24(a0)
5625 ; RV32ZVE32F-NEXT: sw t2, 28(a0)
5626 ; RV32ZVE32F-NEXT: sw t3, 32(a0)
5627 ; RV32ZVE32F-NEXT: sw t4, 36(a0)
5628 ; RV32ZVE32F-NEXT: sw t5, 40(a0)
5629 ; RV32ZVE32F-NEXT: sw t6, 44(a0)
5630 ; RV32ZVE32F-NEXT: sw s0, 48(a0)
5631 ; RV32ZVE32F-NEXT: sw s1, 52(a0)
5632 ; RV32ZVE32F-NEXT: sw t0, 56(a0)
5633 ; RV32ZVE32F-NEXT: sw a2, 60(a0)
5634 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5635 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5636 ; RV32ZVE32F-NEXT: .cfi_restore s0
5637 ; RV32ZVE32F-NEXT: .cfi_restore s1
5638 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5639 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
5640 ; RV32ZVE32F-NEXT: ret
5642 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64:
5643 ; RV64ZVE32F: # %bb.0:
5644 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5645 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
5646 ; RV64ZVE32F-NEXT: andi a3, a5, 1
5647 ; RV64ZVE32F-NEXT: beqz a3, .LBB53_3
5648 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
5649 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
5650 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
5651 ; RV64ZVE32F-NEXT: slli a3, a3, 48
5652 ; RV64ZVE32F-NEXT: srli a3, a3, 45
5653 ; RV64ZVE32F-NEXT: add a3, a1, a3
5654 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
5655 ; RV64ZVE32F-NEXT: andi a4, a5, 2
5656 ; RV64ZVE32F-NEXT: bnez a4, .LBB53_4
5657 ; RV64ZVE32F-NEXT: .LBB53_2:
5658 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
5659 ; RV64ZVE32F-NEXT: j .LBB53_5
5660 ; RV64ZVE32F-NEXT: .LBB53_3:
5661 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
5662 ; RV64ZVE32F-NEXT: andi a4, a5, 2
5663 ; RV64ZVE32F-NEXT: beqz a4, .LBB53_2
5664 ; RV64ZVE32F-NEXT: .LBB53_4: # %cond.load1
5665 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5666 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
5667 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9
5668 ; RV64ZVE32F-NEXT: slli a4, a4, 48
5669 ; RV64ZVE32F-NEXT: srli a4, a4, 45
5670 ; RV64ZVE32F-NEXT: add a4, a1, a4
5671 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
5672 ; RV64ZVE32F-NEXT: .LBB53_5: # %else2
5673 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
5674 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
5675 ; RV64ZVE32F-NEXT: andi a6, a5, 4
5676 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
5677 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5678 ; RV64ZVE32F-NEXT: beqz a6, .LBB53_10
5679 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
5680 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
5681 ; RV64ZVE32F-NEXT: slli a6, a6, 48
5682 ; RV64ZVE32F-NEXT: srli a6, a6, 45
5683 ; RV64ZVE32F-NEXT: add a6, a1, a6
5684 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
5685 ; RV64ZVE32F-NEXT: andi a7, a5, 8
5686 ; RV64ZVE32F-NEXT: bnez a7, .LBB53_11
5687 ; RV64ZVE32F-NEXT: .LBB53_7:
5688 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
5689 ; RV64ZVE32F-NEXT: andi t0, a5, 16
5690 ; RV64ZVE32F-NEXT: bnez t0, .LBB53_12
5691 ; RV64ZVE32F-NEXT: .LBB53_8:
5692 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
5693 ; RV64ZVE32F-NEXT: andi t1, a5, 32
5694 ; RV64ZVE32F-NEXT: bnez t1, .LBB53_13
5695 ; RV64ZVE32F-NEXT: .LBB53_9:
5696 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
5697 ; RV64ZVE32F-NEXT: j .LBB53_14
5698 ; RV64ZVE32F-NEXT: .LBB53_10:
5699 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
5700 ; RV64ZVE32F-NEXT: andi a7, a5, 8
5701 ; RV64ZVE32F-NEXT: beqz a7, .LBB53_7
5702 ; RV64ZVE32F-NEXT: .LBB53_11: # %cond.load7
5703 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5704 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
5705 ; RV64ZVE32F-NEXT: slli a7, a7, 48
5706 ; RV64ZVE32F-NEXT: srli a7, a7, 45
5707 ; RV64ZVE32F-NEXT: add a7, a1, a7
5708 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
5709 ; RV64ZVE32F-NEXT: andi t0, a5, 16
5710 ; RV64ZVE32F-NEXT: beqz t0, .LBB53_8
5711 ; RV64ZVE32F-NEXT: .LBB53_12: # %cond.load10
5712 ; RV64ZVE32F-NEXT: vmv.x.s t0, v9
5713 ; RV64ZVE32F-NEXT: slli t0, t0, 48
5714 ; RV64ZVE32F-NEXT: srli t0, t0, 45
5715 ; RV64ZVE32F-NEXT: add t0, a1, t0
5716 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
5717 ; RV64ZVE32F-NEXT: andi t1, a5, 32
5718 ; RV64ZVE32F-NEXT: beqz t1, .LBB53_9
5719 ; RV64ZVE32F-NEXT: .LBB53_13: # %cond.load13
5720 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
5721 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
5722 ; RV64ZVE32F-NEXT: slli t1, t1, 48
5723 ; RV64ZVE32F-NEXT: srli t1, t1, 45
5724 ; RV64ZVE32F-NEXT: add t1, a1, t1
5725 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
5726 ; RV64ZVE32F-NEXT: .LBB53_14: # %else14
5727 ; RV64ZVE32F-NEXT: andi t2, a5, 64
5728 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
5729 ; RV64ZVE32F-NEXT: beqz t2, .LBB53_17
5730 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
5731 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5732 ; RV64ZVE32F-NEXT: slli t2, t2, 48
5733 ; RV64ZVE32F-NEXT: srli t2, t2, 45
5734 ; RV64ZVE32F-NEXT: add t2, a1, t2
5735 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
5736 ; RV64ZVE32F-NEXT: andi a5, a5, -128
5737 ; RV64ZVE32F-NEXT: bnez a5, .LBB53_18
5738 ; RV64ZVE32F-NEXT: .LBB53_16:
5739 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
5740 ; RV64ZVE32F-NEXT: j .LBB53_19
5741 ; RV64ZVE32F-NEXT: .LBB53_17:
5742 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
5743 ; RV64ZVE32F-NEXT: andi a5, a5, -128
5744 ; RV64ZVE32F-NEXT: beqz a5, .LBB53_16
5745 ; RV64ZVE32F-NEXT: .LBB53_18: # %cond.load19
5746 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5747 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
5748 ; RV64ZVE32F-NEXT: slli a2, a2, 48
5749 ; RV64ZVE32F-NEXT: srli a2, a2, 45
5750 ; RV64ZVE32F-NEXT: add a1, a1, a2
5751 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
5752 ; RV64ZVE32F-NEXT: .LBB53_19: # %else20
5753 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5754 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
5755 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
5756 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
5757 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
5758 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
5759 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
5760 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
5761 ; RV64ZVE32F-NEXT: ret
5762 %eidxs = zext <8 x i16> %idxs to <8 x i64>
5763 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
5764 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
5768 define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
5769 ; RV32V-LABEL: mgather_baseidx_v8i32_v8i64:
5771 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5772 ; RV32V-NEXT: vsll.vi v8, v8, 3
5773 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
5774 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
5775 ; RV32V-NEXT: vmv.v.v v8, v12
5778 ; RV64V-LABEL: mgather_baseidx_v8i32_v8i64:
5780 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
5781 ; RV64V-NEXT: vsext.vf2 v16, v8
5782 ; RV64V-NEXT: vsll.vi v8, v16, 3
5783 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
5784 ; RV64V-NEXT: vmv.v.v v8, v12
5787 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i32_v8i64:
5788 ; RV32ZVE32F: # %bb.0:
5789 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5790 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
5791 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
5792 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
5793 ; RV32ZVE32F-NEXT: andi a3, t0, 1
5794 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5795 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5796 ; RV32ZVE32F-NEXT: beqz a3, .LBB54_7
5797 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
5798 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
5799 ; RV32ZVE32F-NEXT: lw a1, 0(a3)
5800 ; RV32ZVE32F-NEXT: lw a3, 4(a3)
5801 ; RV32ZVE32F-NEXT: andi a4, t0, 2
5802 ; RV32ZVE32F-NEXT: bnez a4, .LBB54_8
5803 ; RV32ZVE32F-NEXT: .LBB54_2:
5804 ; RV32ZVE32F-NEXT: lw a4, 8(a2)
5805 ; RV32ZVE32F-NEXT: lw a5, 12(a2)
5806 ; RV32ZVE32F-NEXT: andi a6, t0, 4
5807 ; RV32ZVE32F-NEXT: bnez a6, .LBB54_9
5808 ; RV32ZVE32F-NEXT: .LBB54_3:
5809 ; RV32ZVE32F-NEXT: lw a6, 16(a2)
5810 ; RV32ZVE32F-NEXT: lw a7, 20(a2)
5811 ; RV32ZVE32F-NEXT: andi t1, t0, 8
5812 ; RV32ZVE32F-NEXT: bnez t1, .LBB54_10
5813 ; RV32ZVE32F-NEXT: .LBB54_4:
5814 ; RV32ZVE32F-NEXT: lw t1, 24(a2)
5815 ; RV32ZVE32F-NEXT: lw t2, 28(a2)
5816 ; RV32ZVE32F-NEXT: andi t3, t0, 16
5817 ; RV32ZVE32F-NEXT: bnez t3, .LBB54_11
5818 ; RV32ZVE32F-NEXT: .LBB54_5:
5819 ; RV32ZVE32F-NEXT: lw t3, 32(a2)
5820 ; RV32ZVE32F-NEXT: lw t4, 36(a2)
5821 ; RV32ZVE32F-NEXT: andi t5, t0, 32
5822 ; RV32ZVE32F-NEXT: bnez t5, .LBB54_12
5823 ; RV32ZVE32F-NEXT: .LBB54_6:
5824 ; RV32ZVE32F-NEXT: lw t5, 40(a2)
5825 ; RV32ZVE32F-NEXT: lw t6, 44(a2)
5826 ; RV32ZVE32F-NEXT: j .LBB54_13
5827 ; RV32ZVE32F-NEXT: .LBB54_7:
5828 ; RV32ZVE32F-NEXT: lw a1, 0(a2)
5829 ; RV32ZVE32F-NEXT: lw a3, 4(a2)
5830 ; RV32ZVE32F-NEXT: andi a4, t0, 2
5831 ; RV32ZVE32F-NEXT: beqz a4, .LBB54_2
5832 ; RV32ZVE32F-NEXT: .LBB54_8: # %cond.load1
5833 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5834 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5835 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
5836 ; RV32ZVE32F-NEXT: lw a4, 0(a5)
5837 ; RV32ZVE32F-NEXT: lw a5, 4(a5)
5838 ; RV32ZVE32F-NEXT: andi a6, t0, 4
5839 ; RV32ZVE32F-NEXT: beqz a6, .LBB54_3
5840 ; RV32ZVE32F-NEXT: .LBB54_9: # %cond.load4
5841 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5842 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5843 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
5844 ; RV32ZVE32F-NEXT: lw a6, 0(a7)
5845 ; RV32ZVE32F-NEXT: lw a7, 4(a7)
5846 ; RV32ZVE32F-NEXT: andi t1, t0, 8
5847 ; RV32ZVE32F-NEXT: beqz t1, .LBB54_4
5848 ; RV32ZVE32F-NEXT: .LBB54_10: # %cond.load7
5849 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5850 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5851 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
5852 ; RV32ZVE32F-NEXT: lw t1, 0(t2)
5853 ; RV32ZVE32F-NEXT: lw t2, 4(t2)
5854 ; RV32ZVE32F-NEXT: andi t3, t0, 16
5855 ; RV32ZVE32F-NEXT: beqz t3, .LBB54_5
5856 ; RV32ZVE32F-NEXT: .LBB54_11: # %cond.load10
5857 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5858 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5859 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
5860 ; RV32ZVE32F-NEXT: lw t3, 0(t4)
5861 ; RV32ZVE32F-NEXT: lw t4, 4(t4)
5862 ; RV32ZVE32F-NEXT: andi t5, t0, 32
5863 ; RV32ZVE32F-NEXT: beqz t5, .LBB54_6
5864 ; RV32ZVE32F-NEXT: .LBB54_12: # %cond.load13
5865 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5866 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5867 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
5868 ; RV32ZVE32F-NEXT: lw t5, 0(t6)
5869 ; RV32ZVE32F-NEXT: lw t6, 4(t6)
5870 ; RV32ZVE32F-NEXT: .LBB54_13: # %else14
5871 ; RV32ZVE32F-NEXT: addi sp, sp, -16
5872 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
5873 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
5874 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
5875 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5876 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5877 ; RV32ZVE32F-NEXT: andi s0, t0, 64
5878 ; RV32ZVE32F-NEXT: beqz s0, .LBB54_16
5879 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
5880 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5881 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5882 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
5883 ; RV32ZVE32F-NEXT: lw s0, 0(s1)
5884 ; RV32ZVE32F-NEXT: lw s1, 4(s1)
5885 ; RV32ZVE32F-NEXT: andi t0, t0, -128
5886 ; RV32ZVE32F-NEXT: bnez t0, .LBB54_17
5887 ; RV32ZVE32F-NEXT: .LBB54_15:
5888 ; RV32ZVE32F-NEXT: lw t0, 56(a2)
5889 ; RV32ZVE32F-NEXT: lw a2, 60(a2)
5890 ; RV32ZVE32F-NEXT: j .LBB54_18
5891 ; RV32ZVE32F-NEXT: .LBB54_16:
5892 ; RV32ZVE32F-NEXT: lw s0, 48(a2)
5893 ; RV32ZVE32F-NEXT: lw s1, 52(a2)
5894 ; RV32ZVE32F-NEXT: andi t0, t0, -128
5895 ; RV32ZVE32F-NEXT: beqz t0, .LBB54_15
5896 ; RV32ZVE32F-NEXT: .LBB54_17: # %cond.load19
5897 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5898 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5899 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
5900 ; RV32ZVE32F-NEXT: lw t0, 0(a2)
5901 ; RV32ZVE32F-NEXT: lw a2, 4(a2)
5902 ; RV32ZVE32F-NEXT: .LBB54_18: # %else20
5903 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
5904 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
5905 ; RV32ZVE32F-NEXT: sw a4, 8(a0)
5906 ; RV32ZVE32F-NEXT: sw a5, 12(a0)
5907 ; RV32ZVE32F-NEXT: sw a6, 16(a0)
5908 ; RV32ZVE32F-NEXT: sw a7, 20(a0)
5909 ; RV32ZVE32F-NEXT: sw t1, 24(a0)
5910 ; RV32ZVE32F-NEXT: sw t2, 28(a0)
5911 ; RV32ZVE32F-NEXT: sw t3, 32(a0)
5912 ; RV32ZVE32F-NEXT: sw t4, 36(a0)
5913 ; RV32ZVE32F-NEXT: sw t5, 40(a0)
5914 ; RV32ZVE32F-NEXT: sw t6, 44(a0)
5915 ; RV32ZVE32F-NEXT: sw s0, 48(a0)
5916 ; RV32ZVE32F-NEXT: sw s1, 52(a0)
5917 ; RV32ZVE32F-NEXT: sw t0, 56(a0)
5918 ; RV32ZVE32F-NEXT: sw a2, 60(a0)
5919 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5920 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5921 ; RV32ZVE32F-NEXT: .cfi_restore s0
5922 ; RV32ZVE32F-NEXT: .cfi_restore s1
5923 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5924 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
5925 ; RV32ZVE32F-NEXT: ret
5927 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8i64:
5928 ; RV64ZVE32F: # %bb.0:
5929 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5930 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
5931 ; RV64ZVE32F-NEXT: andi a3, a5, 1
5932 ; RV64ZVE32F-NEXT: beqz a3, .LBB54_3
5933 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
5934 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
5935 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
5936 ; RV64ZVE32F-NEXT: slli a3, a3, 3
5937 ; RV64ZVE32F-NEXT: add a3, a1, a3
5938 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
5939 ; RV64ZVE32F-NEXT: andi a4, a5, 2
5940 ; RV64ZVE32F-NEXT: bnez a4, .LBB54_4
5941 ; RV64ZVE32F-NEXT: .LBB54_2:
5942 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
5943 ; RV64ZVE32F-NEXT: j .LBB54_5
5944 ; RV64ZVE32F-NEXT: .LBB54_3:
5945 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
5946 ; RV64ZVE32F-NEXT: andi a4, a5, 2
5947 ; RV64ZVE32F-NEXT: beqz a4, .LBB54_2
5948 ; RV64ZVE32F-NEXT: .LBB54_4: # %cond.load1
5949 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5950 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5951 ; RV64ZVE32F-NEXT: vmv.x.s a4, v10
5952 ; RV64ZVE32F-NEXT: slli a4, a4, 3
5953 ; RV64ZVE32F-NEXT: add a4, a1, a4
5954 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
5955 ; RV64ZVE32F-NEXT: .LBB54_5: # %else2
5956 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
5957 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5958 ; RV64ZVE32F-NEXT: andi a6, a5, 4
5959 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
5960 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5961 ; RV64ZVE32F-NEXT: beqz a6, .LBB54_10
5962 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
5963 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
5964 ; RV64ZVE32F-NEXT: slli a6, a6, 3
5965 ; RV64ZVE32F-NEXT: add a6, a1, a6
5966 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
5967 ; RV64ZVE32F-NEXT: andi a7, a5, 8
5968 ; RV64ZVE32F-NEXT: bnez a7, .LBB54_11
5969 ; RV64ZVE32F-NEXT: .LBB54_7:
5970 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
5971 ; RV64ZVE32F-NEXT: andi t0, a5, 16
5972 ; RV64ZVE32F-NEXT: bnez t0, .LBB54_12
5973 ; RV64ZVE32F-NEXT: .LBB54_8:
5974 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
5975 ; RV64ZVE32F-NEXT: andi t1, a5, 32
5976 ; RV64ZVE32F-NEXT: bnez t1, .LBB54_13
5977 ; RV64ZVE32F-NEXT: .LBB54_9:
5978 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
5979 ; RV64ZVE32F-NEXT: j .LBB54_14
5980 ; RV64ZVE32F-NEXT: .LBB54_10:
5981 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
5982 ; RV64ZVE32F-NEXT: andi a7, a5, 8
5983 ; RV64ZVE32F-NEXT: beqz a7, .LBB54_7
5984 ; RV64ZVE32F-NEXT: .LBB54_11: # %cond.load7
5985 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5986 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
5987 ; RV64ZVE32F-NEXT: slli a7, a7, 3
5988 ; RV64ZVE32F-NEXT: add a7, a1, a7
5989 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
5990 ; RV64ZVE32F-NEXT: andi t0, a5, 16
5991 ; RV64ZVE32F-NEXT: beqz t0, .LBB54_8
5992 ; RV64ZVE32F-NEXT: .LBB54_12: # %cond.load10
5993 ; RV64ZVE32F-NEXT: vmv.x.s t0, v10
5994 ; RV64ZVE32F-NEXT: slli t0, t0, 3
5995 ; RV64ZVE32F-NEXT: add t0, a1, t0
5996 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
5997 ; RV64ZVE32F-NEXT: andi t1, a5, 32
5998 ; RV64ZVE32F-NEXT: beqz t1, .LBB54_9
5999 ; RV64ZVE32F-NEXT: .LBB54_13: # %cond.load13
6000 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
6001 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
6002 ; RV64ZVE32F-NEXT: slli t1, t1, 3
6003 ; RV64ZVE32F-NEXT: add t1, a1, t1
6004 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
6005 ; RV64ZVE32F-NEXT: .LBB54_14: # %else14
6006 ; RV64ZVE32F-NEXT: andi t2, a5, 64
6007 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
6008 ; RV64ZVE32F-NEXT: beqz t2, .LBB54_17
6009 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
6010 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
6011 ; RV64ZVE32F-NEXT: slli t2, t2, 3
6012 ; RV64ZVE32F-NEXT: add t2, a1, t2
6013 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
6014 ; RV64ZVE32F-NEXT: andi a5, a5, -128
6015 ; RV64ZVE32F-NEXT: bnez a5, .LBB54_18
6016 ; RV64ZVE32F-NEXT: .LBB54_16:
6017 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
6018 ; RV64ZVE32F-NEXT: j .LBB54_19
6019 ; RV64ZVE32F-NEXT: .LBB54_17:
6020 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
6021 ; RV64ZVE32F-NEXT: andi a5, a5, -128
6022 ; RV64ZVE32F-NEXT: beqz a5, .LBB54_16
6023 ; RV64ZVE32F-NEXT: .LBB54_18: # %cond.load19
6024 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6025 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
6026 ; RV64ZVE32F-NEXT: slli a2, a2, 3
6027 ; RV64ZVE32F-NEXT: add a1, a1, a2
6028 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
6029 ; RV64ZVE32F-NEXT: .LBB54_19: # %else20
6030 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
6031 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
6032 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
6033 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
6034 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
6035 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
6036 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
6037 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
6038 ; RV64ZVE32F-NEXT: ret
6039 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs
6040 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
6044 define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
6045 ; RV32V-LABEL: mgather_baseidx_sext_v8i32_v8i64:
6047 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6048 ; RV32V-NEXT: vsll.vi v8, v8, 3
6049 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
6050 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
6051 ; RV32V-NEXT: vmv.v.v v8, v12
6054 ; RV64V-LABEL: mgather_baseidx_sext_v8i32_v8i64:
6056 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
6057 ; RV64V-NEXT: vsext.vf2 v16, v8
6058 ; RV64V-NEXT: vsll.vi v8, v16, 3
6059 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
6060 ; RV64V-NEXT: vmv.v.v v8, v12
6063 ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8i64:
6064 ; RV32ZVE32F: # %bb.0:
6065 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6066 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
6067 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
6068 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
6069 ; RV32ZVE32F-NEXT: andi a3, t0, 1
6070 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
6071 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
6072 ; RV32ZVE32F-NEXT: beqz a3, .LBB55_7
6073 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
6074 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
6075 ; RV32ZVE32F-NEXT: lw a1, 0(a3)
6076 ; RV32ZVE32F-NEXT: lw a3, 4(a3)
6077 ; RV32ZVE32F-NEXT: andi a4, t0, 2
6078 ; RV32ZVE32F-NEXT: bnez a4, .LBB55_8
6079 ; RV32ZVE32F-NEXT: .LBB55_2:
6080 ; RV32ZVE32F-NEXT: lw a4, 8(a2)
6081 ; RV32ZVE32F-NEXT: lw a5, 12(a2)
6082 ; RV32ZVE32F-NEXT: andi a6, t0, 4
6083 ; RV32ZVE32F-NEXT: bnez a6, .LBB55_9
6084 ; RV32ZVE32F-NEXT: .LBB55_3:
6085 ; RV32ZVE32F-NEXT: lw a6, 16(a2)
6086 ; RV32ZVE32F-NEXT: lw a7, 20(a2)
6087 ; RV32ZVE32F-NEXT: andi t1, t0, 8
6088 ; RV32ZVE32F-NEXT: bnez t1, .LBB55_10
6089 ; RV32ZVE32F-NEXT: .LBB55_4:
6090 ; RV32ZVE32F-NEXT: lw t1, 24(a2)
6091 ; RV32ZVE32F-NEXT: lw t2, 28(a2)
6092 ; RV32ZVE32F-NEXT: andi t3, t0, 16
6093 ; RV32ZVE32F-NEXT: bnez t3, .LBB55_11
6094 ; RV32ZVE32F-NEXT: .LBB55_5:
6095 ; RV32ZVE32F-NEXT: lw t3, 32(a2)
6096 ; RV32ZVE32F-NEXT: lw t4, 36(a2)
6097 ; RV32ZVE32F-NEXT: andi t5, t0, 32
6098 ; RV32ZVE32F-NEXT: bnez t5, .LBB55_12
6099 ; RV32ZVE32F-NEXT: .LBB55_6:
6100 ; RV32ZVE32F-NEXT: lw t5, 40(a2)
6101 ; RV32ZVE32F-NEXT: lw t6, 44(a2)
6102 ; RV32ZVE32F-NEXT: j .LBB55_13
6103 ; RV32ZVE32F-NEXT: .LBB55_7:
6104 ; RV32ZVE32F-NEXT: lw a1, 0(a2)
6105 ; RV32ZVE32F-NEXT: lw a3, 4(a2)
6106 ; RV32ZVE32F-NEXT: andi a4, t0, 2
6107 ; RV32ZVE32F-NEXT: beqz a4, .LBB55_2
6108 ; RV32ZVE32F-NEXT: .LBB55_8: # %cond.load1
6109 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6110 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6111 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
6112 ; RV32ZVE32F-NEXT: lw a4, 0(a5)
6113 ; RV32ZVE32F-NEXT: lw a5, 4(a5)
6114 ; RV32ZVE32F-NEXT: andi a6, t0, 4
6115 ; RV32ZVE32F-NEXT: beqz a6, .LBB55_3
6116 ; RV32ZVE32F-NEXT: .LBB55_9: # %cond.load4
6117 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6118 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
6119 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
6120 ; RV32ZVE32F-NEXT: lw a6, 0(a7)
6121 ; RV32ZVE32F-NEXT: lw a7, 4(a7)
6122 ; RV32ZVE32F-NEXT: andi t1, t0, 8
6123 ; RV32ZVE32F-NEXT: beqz t1, .LBB55_4
6124 ; RV32ZVE32F-NEXT: .LBB55_10: # %cond.load7
6125 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6126 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
6127 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
6128 ; RV32ZVE32F-NEXT: lw t1, 0(t2)
6129 ; RV32ZVE32F-NEXT: lw t2, 4(t2)
6130 ; RV32ZVE32F-NEXT: andi t3, t0, 16
6131 ; RV32ZVE32F-NEXT: beqz t3, .LBB55_5
6132 ; RV32ZVE32F-NEXT: .LBB55_11: # %cond.load10
6133 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6134 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
6135 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
6136 ; RV32ZVE32F-NEXT: lw t3, 0(t4)
6137 ; RV32ZVE32F-NEXT: lw t4, 4(t4)
6138 ; RV32ZVE32F-NEXT: andi t5, t0, 32
6139 ; RV32ZVE32F-NEXT: beqz t5, .LBB55_6
6140 ; RV32ZVE32F-NEXT: .LBB55_12: # %cond.load13
6141 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6142 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
6143 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
6144 ; RV32ZVE32F-NEXT: lw t5, 0(t6)
6145 ; RV32ZVE32F-NEXT: lw t6, 4(t6)
6146 ; RV32ZVE32F-NEXT: .LBB55_13: # %else14
6147 ; RV32ZVE32F-NEXT: addi sp, sp, -16
6148 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
6149 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
6150 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
6151 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
6152 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
6153 ; RV32ZVE32F-NEXT: andi s0, t0, 64
6154 ; RV32ZVE32F-NEXT: beqz s0, .LBB55_16
6155 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
6156 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6157 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6158 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
6159 ; RV32ZVE32F-NEXT: lw s0, 0(s1)
6160 ; RV32ZVE32F-NEXT: lw s1, 4(s1)
6161 ; RV32ZVE32F-NEXT: andi t0, t0, -128
6162 ; RV32ZVE32F-NEXT: bnez t0, .LBB55_17
6163 ; RV32ZVE32F-NEXT: .LBB55_15:
6164 ; RV32ZVE32F-NEXT: lw t0, 56(a2)
6165 ; RV32ZVE32F-NEXT: lw a2, 60(a2)
6166 ; RV32ZVE32F-NEXT: j .LBB55_18
6167 ; RV32ZVE32F-NEXT: .LBB55_16:
6168 ; RV32ZVE32F-NEXT: lw s0, 48(a2)
6169 ; RV32ZVE32F-NEXT: lw s1, 52(a2)
6170 ; RV32ZVE32F-NEXT: andi t0, t0, -128
6171 ; RV32ZVE32F-NEXT: beqz t0, .LBB55_15
6172 ; RV32ZVE32F-NEXT: .LBB55_17: # %cond.load19
6173 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6174 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6175 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
6176 ; RV32ZVE32F-NEXT: lw t0, 0(a2)
6177 ; RV32ZVE32F-NEXT: lw a2, 4(a2)
6178 ; RV32ZVE32F-NEXT: .LBB55_18: # %else20
6179 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
6180 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
6181 ; RV32ZVE32F-NEXT: sw a4, 8(a0)
6182 ; RV32ZVE32F-NEXT: sw a5, 12(a0)
6183 ; RV32ZVE32F-NEXT: sw a6, 16(a0)
6184 ; RV32ZVE32F-NEXT: sw a7, 20(a0)
6185 ; RV32ZVE32F-NEXT: sw t1, 24(a0)
6186 ; RV32ZVE32F-NEXT: sw t2, 28(a0)
6187 ; RV32ZVE32F-NEXT: sw t3, 32(a0)
6188 ; RV32ZVE32F-NEXT: sw t4, 36(a0)
6189 ; RV32ZVE32F-NEXT: sw t5, 40(a0)
6190 ; RV32ZVE32F-NEXT: sw t6, 44(a0)
6191 ; RV32ZVE32F-NEXT: sw s0, 48(a0)
6192 ; RV32ZVE32F-NEXT: sw s1, 52(a0)
6193 ; RV32ZVE32F-NEXT: sw t0, 56(a0)
6194 ; RV32ZVE32F-NEXT: sw a2, 60(a0)
6195 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
6196 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
6197 ; RV32ZVE32F-NEXT: .cfi_restore s0
6198 ; RV32ZVE32F-NEXT: .cfi_restore s1
6199 ; RV32ZVE32F-NEXT: addi sp, sp, 16
6200 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
6201 ; RV32ZVE32F-NEXT: ret
6203 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8i64:
6204 ; RV64ZVE32F: # %bb.0:
6205 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6206 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
6207 ; RV64ZVE32F-NEXT: andi a3, a5, 1
6208 ; RV64ZVE32F-NEXT: beqz a3, .LBB55_3
6209 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
6210 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
6211 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
6212 ; RV64ZVE32F-NEXT: slli a3, a3, 3
6213 ; RV64ZVE32F-NEXT: add a3, a1, a3
6214 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
6215 ; RV64ZVE32F-NEXT: andi a4, a5, 2
6216 ; RV64ZVE32F-NEXT: bnez a4, .LBB55_4
6217 ; RV64ZVE32F-NEXT: .LBB55_2:
6218 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
6219 ; RV64ZVE32F-NEXT: j .LBB55_5
6220 ; RV64ZVE32F-NEXT: .LBB55_3:
6221 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
6222 ; RV64ZVE32F-NEXT: andi a4, a5, 2
6223 ; RV64ZVE32F-NEXT: beqz a4, .LBB55_2
6224 ; RV64ZVE32F-NEXT: .LBB55_4: # %cond.load1
6225 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6226 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6227 ; RV64ZVE32F-NEXT: vmv.x.s a4, v10
6228 ; RV64ZVE32F-NEXT: slli a4, a4, 3
6229 ; RV64ZVE32F-NEXT: add a4, a1, a4
6230 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
6231 ; RV64ZVE32F-NEXT: .LBB55_5: # %else2
6232 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
6233 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
6234 ; RV64ZVE32F-NEXT: andi a6, a5, 4
6235 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
6236 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
6237 ; RV64ZVE32F-NEXT: beqz a6, .LBB55_10
6238 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
6239 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
6240 ; RV64ZVE32F-NEXT: slli a6, a6, 3
6241 ; RV64ZVE32F-NEXT: add a6, a1, a6
6242 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
6243 ; RV64ZVE32F-NEXT: andi a7, a5, 8
6244 ; RV64ZVE32F-NEXT: bnez a7, .LBB55_11
6245 ; RV64ZVE32F-NEXT: .LBB55_7:
6246 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
6247 ; RV64ZVE32F-NEXT: andi t0, a5, 16
6248 ; RV64ZVE32F-NEXT: bnez t0, .LBB55_12
6249 ; RV64ZVE32F-NEXT: .LBB55_8:
6250 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
6251 ; RV64ZVE32F-NEXT: andi t1, a5, 32
6252 ; RV64ZVE32F-NEXT: bnez t1, .LBB55_13
6253 ; RV64ZVE32F-NEXT: .LBB55_9:
6254 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
6255 ; RV64ZVE32F-NEXT: j .LBB55_14
6256 ; RV64ZVE32F-NEXT: .LBB55_10:
6257 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
6258 ; RV64ZVE32F-NEXT: andi a7, a5, 8
6259 ; RV64ZVE32F-NEXT: beqz a7, .LBB55_7
6260 ; RV64ZVE32F-NEXT: .LBB55_11: # %cond.load7
6261 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6262 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
6263 ; RV64ZVE32F-NEXT: slli a7, a7, 3
6264 ; RV64ZVE32F-NEXT: add a7, a1, a7
6265 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
6266 ; RV64ZVE32F-NEXT: andi t0, a5, 16
6267 ; RV64ZVE32F-NEXT: beqz t0, .LBB55_8
6268 ; RV64ZVE32F-NEXT: .LBB55_12: # %cond.load10
6269 ; RV64ZVE32F-NEXT: vmv.x.s t0, v10
6270 ; RV64ZVE32F-NEXT: slli t0, t0, 3
6271 ; RV64ZVE32F-NEXT: add t0, a1, t0
6272 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
6273 ; RV64ZVE32F-NEXT: andi t1, a5, 32
6274 ; RV64ZVE32F-NEXT: beqz t1, .LBB55_9
6275 ; RV64ZVE32F-NEXT: .LBB55_13: # %cond.load13
6276 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
6277 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
6278 ; RV64ZVE32F-NEXT: slli t1, t1, 3
6279 ; RV64ZVE32F-NEXT: add t1, a1, t1
6280 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
6281 ; RV64ZVE32F-NEXT: .LBB55_14: # %else14
6282 ; RV64ZVE32F-NEXT: andi t2, a5, 64
6283 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
6284 ; RV64ZVE32F-NEXT: beqz t2, .LBB55_17
6285 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
6286 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
6287 ; RV64ZVE32F-NEXT: slli t2, t2, 3
6288 ; RV64ZVE32F-NEXT: add t2, a1, t2
6289 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
6290 ; RV64ZVE32F-NEXT: andi a5, a5, -128
6291 ; RV64ZVE32F-NEXT: bnez a5, .LBB55_18
6292 ; RV64ZVE32F-NEXT: .LBB55_16:
6293 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
6294 ; RV64ZVE32F-NEXT: j .LBB55_19
6295 ; RV64ZVE32F-NEXT: .LBB55_17:
6296 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
6297 ; RV64ZVE32F-NEXT: andi a5, a5, -128
6298 ; RV64ZVE32F-NEXT: beqz a5, .LBB55_16
6299 ; RV64ZVE32F-NEXT: .LBB55_18: # %cond.load19
6300 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6301 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
6302 ; RV64ZVE32F-NEXT: slli a2, a2, 3
6303 ; RV64ZVE32F-NEXT: add a1, a1, a2
6304 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
6305 ; RV64ZVE32F-NEXT: .LBB55_19: # %else20
6306 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
6307 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
6308 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
6309 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
6310 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
6311 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
6312 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
6313 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
6314 ; RV64ZVE32F-NEXT: ret
6315 %eidxs = sext <8 x i32> %idxs to <8 x i64>
6316 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
6317 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
6321 define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
6322 ; RV32V-LABEL: mgather_baseidx_zext_v8i32_v8i64:
6324 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6325 ; RV32V-NEXT: vsll.vi v8, v8, 3
6326 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
6327 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
6328 ; RV32V-NEXT: vmv.v.v v8, v12
6331 ; RV64V-LABEL: mgather_baseidx_zext_v8i32_v8i64:
6333 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
6334 ; RV64V-NEXT: vzext.vf2 v16, v8
6335 ; RV64V-NEXT: vsll.vi v8, v16, 3
6336 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
6337 ; RV64V-NEXT: vmv.v.v v8, v12
6340 ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8i64:
6341 ; RV32ZVE32F: # %bb.0:
6342 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6343 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
6344 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
6345 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
6346 ; RV32ZVE32F-NEXT: andi a3, t0, 1
6347 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
6348 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
6349 ; RV32ZVE32F-NEXT: beqz a3, .LBB56_7
6350 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
6351 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
6352 ; RV32ZVE32F-NEXT: lw a1, 0(a3)
6353 ; RV32ZVE32F-NEXT: lw a3, 4(a3)
6354 ; RV32ZVE32F-NEXT: andi a4, t0, 2
6355 ; RV32ZVE32F-NEXT: bnez a4, .LBB56_8
6356 ; RV32ZVE32F-NEXT: .LBB56_2:
6357 ; RV32ZVE32F-NEXT: lw a4, 8(a2)
6358 ; RV32ZVE32F-NEXT: lw a5, 12(a2)
6359 ; RV32ZVE32F-NEXT: andi a6, t0, 4
6360 ; RV32ZVE32F-NEXT: bnez a6, .LBB56_9
6361 ; RV32ZVE32F-NEXT: .LBB56_3:
6362 ; RV32ZVE32F-NEXT: lw a6, 16(a2)
6363 ; RV32ZVE32F-NEXT: lw a7, 20(a2)
6364 ; RV32ZVE32F-NEXT: andi t1, t0, 8
6365 ; RV32ZVE32F-NEXT: bnez t1, .LBB56_10
6366 ; RV32ZVE32F-NEXT: .LBB56_4:
6367 ; RV32ZVE32F-NEXT: lw t1, 24(a2)
6368 ; RV32ZVE32F-NEXT: lw t2, 28(a2)
6369 ; RV32ZVE32F-NEXT: andi t3, t0, 16
6370 ; RV32ZVE32F-NEXT: bnez t3, .LBB56_11
6371 ; RV32ZVE32F-NEXT: .LBB56_5:
6372 ; RV32ZVE32F-NEXT: lw t3, 32(a2)
6373 ; RV32ZVE32F-NEXT: lw t4, 36(a2)
6374 ; RV32ZVE32F-NEXT: andi t5, t0, 32
6375 ; RV32ZVE32F-NEXT: bnez t5, .LBB56_12
6376 ; RV32ZVE32F-NEXT: .LBB56_6:
6377 ; RV32ZVE32F-NEXT: lw t5, 40(a2)
6378 ; RV32ZVE32F-NEXT: lw t6, 44(a2)
6379 ; RV32ZVE32F-NEXT: j .LBB56_13
6380 ; RV32ZVE32F-NEXT: .LBB56_7:
6381 ; RV32ZVE32F-NEXT: lw a1, 0(a2)
6382 ; RV32ZVE32F-NEXT: lw a3, 4(a2)
6383 ; RV32ZVE32F-NEXT: andi a4, t0, 2
6384 ; RV32ZVE32F-NEXT: beqz a4, .LBB56_2
6385 ; RV32ZVE32F-NEXT: .LBB56_8: # %cond.load1
6386 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6387 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6388 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
6389 ; RV32ZVE32F-NEXT: lw a4, 0(a5)
6390 ; RV32ZVE32F-NEXT: lw a5, 4(a5)
6391 ; RV32ZVE32F-NEXT: andi a6, t0, 4
6392 ; RV32ZVE32F-NEXT: beqz a6, .LBB56_3
6393 ; RV32ZVE32F-NEXT: .LBB56_9: # %cond.load4
6394 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6395 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
6396 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
6397 ; RV32ZVE32F-NEXT: lw a6, 0(a7)
6398 ; RV32ZVE32F-NEXT: lw a7, 4(a7)
6399 ; RV32ZVE32F-NEXT: andi t1, t0, 8
6400 ; RV32ZVE32F-NEXT: beqz t1, .LBB56_4
6401 ; RV32ZVE32F-NEXT: .LBB56_10: # %cond.load7
6402 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6403 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
6404 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
6405 ; RV32ZVE32F-NEXT: lw t1, 0(t2)
6406 ; RV32ZVE32F-NEXT: lw t2, 4(t2)
6407 ; RV32ZVE32F-NEXT: andi t3, t0, 16
6408 ; RV32ZVE32F-NEXT: beqz t3, .LBB56_5
6409 ; RV32ZVE32F-NEXT: .LBB56_11: # %cond.load10
6410 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6411 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
6412 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
6413 ; RV32ZVE32F-NEXT: lw t3, 0(t4)
6414 ; RV32ZVE32F-NEXT: lw t4, 4(t4)
6415 ; RV32ZVE32F-NEXT: andi t5, t0, 32
6416 ; RV32ZVE32F-NEXT: beqz t5, .LBB56_6
6417 ; RV32ZVE32F-NEXT: .LBB56_12: # %cond.load13
6418 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6419 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
6420 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
6421 ; RV32ZVE32F-NEXT: lw t5, 0(t6)
6422 ; RV32ZVE32F-NEXT: lw t6, 4(t6)
6423 ; RV32ZVE32F-NEXT: .LBB56_13: # %else14
6424 ; RV32ZVE32F-NEXT: addi sp, sp, -16
6425 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
6426 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
6427 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
6428 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
6429 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
6430 ; RV32ZVE32F-NEXT: andi s0, t0, 64
6431 ; RV32ZVE32F-NEXT: beqz s0, .LBB56_16
6432 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
6433 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6434 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6435 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
6436 ; RV32ZVE32F-NEXT: lw s0, 0(s1)
6437 ; RV32ZVE32F-NEXT: lw s1, 4(s1)
6438 ; RV32ZVE32F-NEXT: andi t0, t0, -128
6439 ; RV32ZVE32F-NEXT: bnez t0, .LBB56_17
6440 ; RV32ZVE32F-NEXT: .LBB56_15:
6441 ; RV32ZVE32F-NEXT: lw t0, 56(a2)
6442 ; RV32ZVE32F-NEXT: lw a2, 60(a2)
6443 ; RV32ZVE32F-NEXT: j .LBB56_18
6444 ; RV32ZVE32F-NEXT: .LBB56_16:
6445 ; RV32ZVE32F-NEXT: lw s0, 48(a2)
6446 ; RV32ZVE32F-NEXT: lw s1, 52(a2)
6447 ; RV32ZVE32F-NEXT: andi t0, t0, -128
6448 ; RV32ZVE32F-NEXT: beqz t0, .LBB56_15
6449 ; RV32ZVE32F-NEXT: .LBB56_17: # %cond.load19
6450 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6451 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6452 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
6453 ; RV32ZVE32F-NEXT: lw t0, 0(a2)
6454 ; RV32ZVE32F-NEXT: lw a2, 4(a2)
6455 ; RV32ZVE32F-NEXT: .LBB56_18: # %else20
6456 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
6457 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
6458 ; RV32ZVE32F-NEXT: sw a4, 8(a0)
6459 ; RV32ZVE32F-NEXT: sw a5, 12(a0)
6460 ; RV32ZVE32F-NEXT: sw a6, 16(a0)
6461 ; RV32ZVE32F-NEXT: sw a7, 20(a0)
6462 ; RV32ZVE32F-NEXT: sw t1, 24(a0)
6463 ; RV32ZVE32F-NEXT: sw t2, 28(a0)
6464 ; RV32ZVE32F-NEXT: sw t3, 32(a0)
6465 ; RV32ZVE32F-NEXT: sw t4, 36(a0)
6466 ; RV32ZVE32F-NEXT: sw t5, 40(a0)
6467 ; RV32ZVE32F-NEXT: sw t6, 44(a0)
6468 ; RV32ZVE32F-NEXT: sw s0, 48(a0)
6469 ; RV32ZVE32F-NEXT: sw s1, 52(a0)
6470 ; RV32ZVE32F-NEXT: sw t0, 56(a0)
6471 ; RV32ZVE32F-NEXT: sw a2, 60(a0)
6472 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
6473 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
6474 ; RV32ZVE32F-NEXT: .cfi_restore s0
6475 ; RV32ZVE32F-NEXT: .cfi_restore s1
6476 ; RV32ZVE32F-NEXT: addi sp, sp, 16
6477 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
6478 ; RV32ZVE32F-NEXT: ret
6480 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8i64:
6481 ; RV64ZVE32F: # %bb.0:
6482 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6483 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
6484 ; RV64ZVE32F-NEXT: andi a3, a5, 1
6485 ; RV64ZVE32F-NEXT: beqz a3, .LBB56_3
6486 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
6487 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
6488 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
6489 ; RV64ZVE32F-NEXT: slli a3, a3, 32
6490 ; RV64ZVE32F-NEXT: srli a3, a3, 29
6491 ; RV64ZVE32F-NEXT: add a3, a1, a3
6492 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
6493 ; RV64ZVE32F-NEXT: andi a4, a5, 2
6494 ; RV64ZVE32F-NEXT: bnez a4, .LBB56_4
6495 ; RV64ZVE32F-NEXT: .LBB56_2:
6496 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
6497 ; RV64ZVE32F-NEXT: j .LBB56_5
6498 ; RV64ZVE32F-NEXT: .LBB56_3:
6499 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
6500 ; RV64ZVE32F-NEXT: andi a4, a5, 2
6501 ; RV64ZVE32F-NEXT: beqz a4, .LBB56_2
6502 ; RV64ZVE32F-NEXT: .LBB56_4: # %cond.load1
6503 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6504 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6505 ; RV64ZVE32F-NEXT: vmv.x.s a4, v10
6506 ; RV64ZVE32F-NEXT: slli a4, a4, 32
6507 ; RV64ZVE32F-NEXT: srli a4, a4, 29
6508 ; RV64ZVE32F-NEXT: add a4, a1, a4
6509 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
6510 ; RV64ZVE32F-NEXT: .LBB56_5: # %else2
6511 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
6512 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
6513 ; RV64ZVE32F-NEXT: andi a6, a5, 4
6514 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
6515 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
6516 ; RV64ZVE32F-NEXT: beqz a6, .LBB56_10
6517 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
6518 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
6519 ; RV64ZVE32F-NEXT: slli a6, a6, 32
6520 ; RV64ZVE32F-NEXT: srli a6, a6, 29
6521 ; RV64ZVE32F-NEXT: add a6, a1, a6
6522 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
6523 ; RV64ZVE32F-NEXT: andi a7, a5, 8
6524 ; RV64ZVE32F-NEXT: bnez a7, .LBB56_11
6525 ; RV64ZVE32F-NEXT: .LBB56_7:
6526 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
6527 ; RV64ZVE32F-NEXT: andi t0, a5, 16
6528 ; RV64ZVE32F-NEXT: bnez t0, .LBB56_12
6529 ; RV64ZVE32F-NEXT: .LBB56_8:
6530 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
6531 ; RV64ZVE32F-NEXT: andi t1, a5, 32
6532 ; RV64ZVE32F-NEXT: bnez t1, .LBB56_13
6533 ; RV64ZVE32F-NEXT: .LBB56_9:
6534 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
6535 ; RV64ZVE32F-NEXT: j .LBB56_14
6536 ; RV64ZVE32F-NEXT: .LBB56_10:
6537 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
6538 ; RV64ZVE32F-NEXT: andi a7, a5, 8
6539 ; RV64ZVE32F-NEXT: beqz a7, .LBB56_7
6540 ; RV64ZVE32F-NEXT: .LBB56_11: # %cond.load7
6541 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6542 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
6543 ; RV64ZVE32F-NEXT: slli a7, a7, 32
6544 ; RV64ZVE32F-NEXT: srli a7, a7, 29
6545 ; RV64ZVE32F-NEXT: add a7, a1, a7
6546 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
6547 ; RV64ZVE32F-NEXT: andi t0, a5, 16
6548 ; RV64ZVE32F-NEXT: beqz t0, .LBB56_8
6549 ; RV64ZVE32F-NEXT: .LBB56_12: # %cond.load10
6550 ; RV64ZVE32F-NEXT: vmv.x.s t0, v10
6551 ; RV64ZVE32F-NEXT: slli t0, t0, 32
6552 ; RV64ZVE32F-NEXT: srli t0, t0, 29
6553 ; RV64ZVE32F-NEXT: add t0, a1, t0
6554 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
6555 ; RV64ZVE32F-NEXT: andi t1, a5, 32
6556 ; RV64ZVE32F-NEXT: beqz t1, .LBB56_9
6557 ; RV64ZVE32F-NEXT: .LBB56_13: # %cond.load13
6558 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
6559 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
6560 ; RV64ZVE32F-NEXT: slli t1, t1, 32
6561 ; RV64ZVE32F-NEXT: srli t1, t1, 29
6562 ; RV64ZVE32F-NEXT: add t1, a1, t1
6563 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
6564 ; RV64ZVE32F-NEXT: .LBB56_14: # %else14
6565 ; RV64ZVE32F-NEXT: andi t2, a5, 64
6566 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
6567 ; RV64ZVE32F-NEXT: beqz t2, .LBB56_17
6568 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
6569 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
6570 ; RV64ZVE32F-NEXT: slli t2, t2, 32
6571 ; RV64ZVE32F-NEXT: srli t2, t2, 29
6572 ; RV64ZVE32F-NEXT: add t2, a1, t2
6573 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
6574 ; RV64ZVE32F-NEXT: andi a5, a5, -128
6575 ; RV64ZVE32F-NEXT: bnez a5, .LBB56_18
6576 ; RV64ZVE32F-NEXT: .LBB56_16:
6577 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
6578 ; RV64ZVE32F-NEXT: j .LBB56_19
6579 ; RV64ZVE32F-NEXT: .LBB56_17:
6580 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
6581 ; RV64ZVE32F-NEXT: andi a5, a5, -128
6582 ; RV64ZVE32F-NEXT: beqz a5, .LBB56_16
6583 ; RV64ZVE32F-NEXT: .LBB56_18: # %cond.load19
6584 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6585 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
6586 ; RV64ZVE32F-NEXT: slli a2, a2, 32
6587 ; RV64ZVE32F-NEXT: srli a2, a2, 29
6588 ; RV64ZVE32F-NEXT: add a1, a1, a2
6589 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
6590 ; RV64ZVE32F-NEXT: .LBB56_19: # %else20
6591 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
6592 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
6593 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
6594 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
6595 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
6596 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
6597 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
6598 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
6599 ; RV64ZVE32F-NEXT: ret
6600 %eidxs = zext <8 x i32> %idxs to <8 x i64>
6601 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
6602 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
6606 define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
6607 ; RV32V-LABEL: mgather_baseidx_v8i64:
6609 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6610 ; RV32V-NEXT: vnsrl.wi v16, v8, 0
6611 ; RV32V-NEXT: vsll.vi v8, v16, 3
6612 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
6613 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
6614 ; RV32V-NEXT: vmv.v.v v8, v12
6617 ; RV64V-LABEL: mgather_baseidx_v8i64:
6619 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
6620 ; RV64V-NEXT: vsll.vi v8, v8, 3
6621 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
6622 ; RV64V-NEXT: vmv.v.v v8, v12
6625 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i64:
6626 ; RV32ZVE32F: # %bb.0:
6627 ; RV32ZVE32F-NEXT: lw a4, 32(a2)
6628 ; RV32ZVE32F-NEXT: lw a5, 40(a2)
6629 ; RV32ZVE32F-NEXT: lw a6, 48(a2)
6630 ; RV32ZVE32F-NEXT: lw a7, 56(a2)
6631 ; RV32ZVE32F-NEXT: lw t0, 0(a2)
6632 ; RV32ZVE32F-NEXT: lw t1, 8(a2)
6633 ; RV32ZVE32F-NEXT: lw t2, 16(a2)
6634 ; RV32ZVE32F-NEXT: lw a2, 24(a2)
6635 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6636 ; RV32ZVE32F-NEXT: vmv.v.x v8, t0
6637 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
6638 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
6639 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
6640 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
6641 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t2
6642 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
6643 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
6644 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
6645 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
6646 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
6647 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
6648 ; RV32ZVE32F-NEXT: andi a2, t0, 1
6649 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
6650 ; RV32ZVE32F-NEXT: beqz a2, .LBB57_7
6651 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
6652 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
6653 ; RV32ZVE32F-NEXT: lw a1, 0(a2)
6654 ; RV32ZVE32F-NEXT: lw a2, 4(a2)
6655 ; RV32ZVE32F-NEXT: andi a4, t0, 2
6656 ; RV32ZVE32F-NEXT: bnez a4, .LBB57_8
6657 ; RV32ZVE32F-NEXT: .LBB57_2:
6658 ; RV32ZVE32F-NEXT: lw a4, 8(a3)
6659 ; RV32ZVE32F-NEXT: lw a5, 12(a3)
6660 ; RV32ZVE32F-NEXT: andi a6, t0, 4
6661 ; RV32ZVE32F-NEXT: bnez a6, .LBB57_9
6662 ; RV32ZVE32F-NEXT: .LBB57_3:
6663 ; RV32ZVE32F-NEXT: lw a6, 16(a3)
6664 ; RV32ZVE32F-NEXT: lw a7, 20(a3)
6665 ; RV32ZVE32F-NEXT: andi t1, t0, 8
6666 ; RV32ZVE32F-NEXT: bnez t1, .LBB57_10
6667 ; RV32ZVE32F-NEXT: .LBB57_4:
6668 ; RV32ZVE32F-NEXT: lw t1, 24(a3)
6669 ; RV32ZVE32F-NEXT: lw t2, 28(a3)
6670 ; RV32ZVE32F-NEXT: andi t3, t0, 16
6671 ; RV32ZVE32F-NEXT: bnez t3, .LBB57_11
6672 ; RV32ZVE32F-NEXT: .LBB57_5:
6673 ; RV32ZVE32F-NEXT: lw t3, 32(a3)
6674 ; RV32ZVE32F-NEXT: lw t4, 36(a3)
6675 ; RV32ZVE32F-NEXT: andi t5, t0, 32
6676 ; RV32ZVE32F-NEXT: bnez t5, .LBB57_12
6677 ; RV32ZVE32F-NEXT: .LBB57_6:
6678 ; RV32ZVE32F-NEXT: lw t5, 40(a3)
6679 ; RV32ZVE32F-NEXT: lw t6, 44(a3)
6680 ; RV32ZVE32F-NEXT: j .LBB57_13
6681 ; RV32ZVE32F-NEXT: .LBB57_7:
6682 ; RV32ZVE32F-NEXT: lw a1, 0(a3)
6683 ; RV32ZVE32F-NEXT: lw a2, 4(a3)
6684 ; RV32ZVE32F-NEXT: andi a4, t0, 2
6685 ; RV32ZVE32F-NEXT: beqz a4, .LBB57_2
6686 ; RV32ZVE32F-NEXT: .LBB57_8: # %cond.load1
6687 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6688 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6689 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
6690 ; RV32ZVE32F-NEXT: lw a4, 0(a5)
6691 ; RV32ZVE32F-NEXT: lw a5, 4(a5)
6692 ; RV32ZVE32F-NEXT: andi a6, t0, 4
6693 ; RV32ZVE32F-NEXT: beqz a6, .LBB57_3
6694 ; RV32ZVE32F-NEXT: .LBB57_9: # %cond.load4
6695 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6696 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
6697 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
6698 ; RV32ZVE32F-NEXT: lw a6, 0(a7)
6699 ; RV32ZVE32F-NEXT: lw a7, 4(a7)
6700 ; RV32ZVE32F-NEXT: andi t1, t0, 8
6701 ; RV32ZVE32F-NEXT: beqz t1, .LBB57_4
6702 ; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load7
6703 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6704 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
6705 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
6706 ; RV32ZVE32F-NEXT: lw t1, 0(t2)
6707 ; RV32ZVE32F-NEXT: lw t2, 4(t2)
6708 ; RV32ZVE32F-NEXT: andi t3, t0, 16
6709 ; RV32ZVE32F-NEXT: beqz t3, .LBB57_5
6710 ; RV32ZVE32F-NEXT: .LBB57_11: # %cond.load10
6711 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6712 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
6713 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
6714 ; RV32ZVE32F-NEXT: lw t3, 0(t4)
6715 ; RV32ZVE32F-NEXT: lw t4, 4(t4)
6716 ; RV32ZVE32F-NEXT: andi t5, t0, 32
6717 ; RV32ZVE32F-NEXT: beqz t5, .LBB57_6
6718 ; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load13
6719 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6720 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
6721 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
6722 ; RV32ZVE32F-NEXT: lw t5, 0(t6)
6723 ; RV32ZVE32F-NEXT: lw t6, 4(t6)
6724 ; RV32ZVE32F-NEXT: .LBB57_13: # %else14
6725 ; RV32ZVE32F-NEXT: addi sp, sp, -16
6726 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
6727 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
6728 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
6729 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
6730 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
6731 ; RV32ZVE32F-NEXT: andi s0, t0, 64
6732 ; RV32ZVE32F-NEXT: beqz s0, .LBB57_16
6733 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
6734 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6735 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6736 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
6737 ; RV32ZVE32F-NEXT: lw s0, 0(s1)
6738 ; RV32ZVE32F-NEXT: lw s1, 4(s1)
6739 ; RV32ZVE32F-NEXT: andi t0, t0, -128
6740 ; RV32ZVE32F-NEXT: bnez t0, .LBB57_17
6741 ; RV32ZVE32F-NEXT: .LBB57_15:
6742 ; RV32ZVE32F-NEXT: lw t0, 56(a3)
6743 ; RV32ZVE32F-NEXT: lw a3, 60(a3)
6744 ; RV32ZVE32F-NEXT: j .LBB57_18
6745 ; RV32ZVE32F-NEXT: .LBB57_16:
6746 ; RV32ZVE32F-NEXT: lw s0, 48(a3)
6747 ; RV32ZVE32F-NEXT: lw s1, 52(a3)
6748 ; RV32ZVE32F-NEXT: andi t0, t0, -128
6749 ; RV32ZVE32F-NEXT: beqz t0, .LBB57_15
6750 ; RV32ZVE32F-NEXT: .LBB57_17: # %cond.load19
6751 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6752 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6753 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
6754 ; RV32ZVE32F-NEXT: lw t0, 0(a3)
6755 ; RV32ZVE32F-NEXT: lw a3, 4(a3)
6756 ; RV32ZVE32F-NEXT: .LBB57_18: # %else20
6757 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
6758 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
6759 ; RV32ZVE32F-NEXT: sw a4, 8(a0)
6760 ; RV32ZVE32F-NEXT: sw a5, 12(a0)
6761 ; RV32ZVE32F-NEXT: sw a6, 16(a0)
6762 ; RV32ZVE32F-NEXT: sw a7, 20(a0)
6763 ; RV32ZVE32F-NEXT: sw t1, 24(a0)
6764 ; RV32ZVE32F-NEXT: sw t2, 28(a0)
6765 ; RV32ZVE32F-NEXT: sw t3, 32(a0)
6766 ; RV32ZVE32F-NEXT: sw t4, 36(a0)
6767 ; RV32ZVE32F-NEXT: sw t5, 40(a0)
6768 ; RV32ZVE32F-NEXT: sw t6, 44(a0)
6769 ; RV32ZVE32F-NEXT: sw s0, 48(a0)
6770 ; RV32ZVE32F-NEXT: sw s1, 52(a0)
6771 ; RV32ZVE32F-NEXT: sw t0, 56(a0)
6772 ; RV32ZVE32F-NEXT: sw a3, 60(a0)
6773 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
6774 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
6775 ; RV32ZVE32F-NEXT: .cfi_restore s0
6776 ; RV32ZVE32F-NEXT: .cfi_restore s1
6777 ; RV32ZVE32F-NEXT: addi sp, sp, 16
6778 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
6779 ; RV32ZVE32F-NEXT: ret
6781 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i64:
6782 ; RV64ZVE32F: # %bb.0:
6783 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6784 ; RV64ZVE32F-NEXT: vmv.x.s a7, v0
6785 ; RV64ZVE32F-NEXT: andi a4, a7, 1
6786 ; RV64ZVE32F-NEXT: beqz a4, .LBB57_9
6787 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
6788 ; RV64ZVE32F-NEXT: ld a4, 0(a2)
6789 ; RV64ZVE32F-NEXT: slli a4, a4, 3
6790 ; RV64ZVE32F-NEXT: add a4, a1, a4
6791 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
6792 ; RV64ZVE32F-NEXT: andi a5, a7, 2
6793 ; RV64ZVE32F-NEXT: bnez a5, .LBB57_10
6794 ; RV64ZVE32F-NEXT: .LBB57_2:
6795 ; RV64ZVE32F-NEXT: ld a5, 8(a3)
6796 ; RV64ZVE32F-NEXT: andi a6, a7, 4
6797 ; RV64ZVE32F-NEXT: bnez a6, .LBB57_11
6798 ; RV64ZVE32F-NEXT: .LBB57_3:
6799 ; RV64ZVE32F-NEXT: ld a6, 16(a3)
6800 ; RV64ZVE32F-NEXT: andi t0, a7, 8
6801 ; RV64ZVE32F-NEXT: bnez t0, .LBB57_12
6802 ; RV64ZVE32F-NEXT: .LBB57_4:
6803 ; RV64ZVE32F-NEXT: ld t0, 24(a3)
6804 ; RV64ZVE32F-NEXT: andi t1, a7, 16
6805 ; RV64ZVE32F-NEXT: bnez t1, .LBB57_13
6806 ; RV64ZVE32F-NEXT: .LBB57_5:
6807 ; RV64ZVE32F-NEXT: ld t1, 32(a3)
6808 ; RV64ZVE32F-NEXT: andi t2, a7, 32
6809 ; RV64ZVE32F-NEXT: bnez t2, .LBB57_14
6810 ; RV64ZVE32F-NEXT: .LBB57_6:
6811 ; RV64ZVE32F-NEXT: ld t2, 40(a3)
6812 ; RV64ZVE32F-NEXT: andi t3, a7, 64
6813 ; RV64ZVE32F-NEXT: bnez t3, .LBB57_15
6814 ; RV64ZVE32F-NEXT: .LBB57_7:
6815 ; RV64ZVE32F-NEXT: ld t3, 48(a3)
6816 ; RV64ZVE32F-NEXT: andi a7, a7, -128
6817 ; RV64ZVE32F-NEXT: bnez a7, .LBB57_16
6818 ; RV64ZVE32F-NEXT: .LBB57_8:
6819 ; RV64ZVE32F-NEXT: ld a1, 56(a3)
6820 ; RV64ZVE32F-NEXT: j .LBB57_17
6821 ; RV64ZVE32F-NEXT: .LBB57_9:
6822 ; RV64ZVE32F-NEXT: ld a4, 0(a3)
6823 ; RV64ZVE32F-NEXT: andi a5, a7, 2
6824 ; RV64ZVE32F-NEXT: beqz a5, .LBB57_2
6825 ; RV64ZVE32F-NEXT: .LBB57_10: # %cond.load1
6826 ; RV64ZVE32F-NEXT: ld a5, 8(a2)
6827 ; RV64ZVE32F-NEXT: slli a5, a5, 3
6828 ; RV64ZVE32F-NEXT: add a5, a1, a5
6829 ; RV64ZVE32F-NEXT: ld a5, 0(a5)
6830 ; RV64ZVE32F-NEXT: andi a6, a7, 4
6831 ; RV64ZVE32F-NEXT: beqz a6, .LBB57_3
6832 ; RV64ZVE32F-NEXT: .LBB57_11: # %cond.load4
6833 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
6834 ; RV64ZVE32F-NEXT: slli a6, a6, 3
6835 ; RV64ZVE32F-NEXT: add a6, a1, a6
6836 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
6837 ; RV64ZVE32F-NEXT: andi t0, a7, 8
6838 ; RV64ZVE32F-NEXT: beqz t0, .LBB57_4
6839 ; RV64ZVE32F-NEXT: .LBB57_12: # %cond.load7
6840 ; RV64ZVE32F-NEXT: ld t0, 24(a2)
6841 ; RV64ZVE32F-NEXT: slli t0, t0, 3
6842 ; RV64ZVE32F-NEXT: add t0, a1, t0
6843 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
6844 ; RV64ZVE32F-NEXT: andi t1, a7, 16
6845 ; RV64ZVE32F-NEXT: beqz t1, .LBB57_5
6846 ; RV64ZVE32F-NEXT: .LBB57_13: # %cond.load10
6847 ; RV64ZVE32F-NEXT: ld t1, 32(a2)
6848 ; RV64ZVE32F-NEXT: slli t1, t1, 3
6849 ; RV64ZVE32F-NEXT: add t1, a1, t1
6850 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
6851 ; RV64ZVE32F-NEXT: andi t2, a7, 32
6852 ; RV64ZVE32F-NEXT: beqz t2, .LBB57_6
6853 ; RV64ZVE32F-NEXT: .LBB57_14: # %cond.load13
6854 ; RV64ZVE32F-NEXT: ld t2, 40(a2)
6855 ; RV64ZVE32F-NEXT: slli t2, t2, 3
6856 ; RV64ZVE32F-NEXT: add t2, a1, t2
6857 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
6858 ; RV64ZVE32F-NEXT: andi t3, a7, 64
6859 ; RV64ZVE32F-NEXT: beqz t3, .LBB57_7
6860 ; RV64ZVE32F-NEXT: .LBB57_15: # %cond.load16
6861 ; RV64ZVE32F-NEXT: ld t3, 48(a2)
6862 ; RV64ZVE32F-NEXT: slli t3, t3, 3
6863 ; RV64ZVE32F-NEXT: add t3, a1, t3
6864 ; RV64ZVE32F-NEXT: ld t3, 0(t3)
6865 ; RV64ZVE32F-NEXT: andi a7, a7, -128
6866 ; RV64ZVE32F-NEXT: beqz a7, .LBB57_8
6867 ; RV64ZVE32F-NEXT: .LBB57_16: # %cond.load19
6868 ; RV64ZVE32F-NEXT: ld a2, 56(a2)
6869 ; RV64ZVE32F-NEXT: slli a2, a2, 3
6870 ; RV64ZVE32F-NEXT: add a1, a1, a2
6871 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
6872 ; RV64ZVE32F-NEXT: .LBB57_17: # %else20
6873 ; RV64ZVE32F-NEXT: sd a4, 0(a0)
6874 ; RV64ZVE32F-NEXT: sd a5, 8(a0)
6875 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
6876 ; RV64ZVE32F-NEXT: sd t0, 24(a0)
6877 ; RV64ZVE32F-NEXT: sd t1, 32(a0)
6878 ; RV64ZVE32F-NEXT: sd t2, 40(a0)
6879 ; RV64ZVE32F-NEXT: sd t3, 48(a0)
6880 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
6881 ; RV64ZVE32F-NEXT: ret
6882 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
6883 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
6887 declare <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x bfloat>)
6889 define <1 x bfloat> @mgather_v1bf16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x bfloat> %passthru) {
6890 ; RV32V-LABEL: mgather_v1bf16:
6892 ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
6893 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
6894 ; RV32V-NEXT: vmv1r.v v8, v9
6897 ; RV64V-LABEL: mgather_v1bf16:
6899 ; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
6900 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
6901 ; RV64V-NEXT: vmv1r.v v8, v9
6904 ; RV32ZVE32F-LABEL: mgather_v1bf16:
6905 ; RV32ZVE32F: # %bb.0:
6906 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
6907 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
6908 ; RV32ZVE32F-NEXT: vmv1r.v v8, v9
6909 ; RV32ZVE32F-NEXT: ret
6911 ; RV64ZVE32F-LABEL: mgather_v1bf16:
6912 ; RV64ZVE32F: # %bb.0:
6913 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
6914 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
6915 ; RV64ZVE32F-NEXT: bnez a1, .LBB58_2
6916 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
6917 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6918 ; RV64ZVE32F-NEXT: vle16.v v8, (a0)
6919 ; RV64ZVE32F-NEXT: .LBB58_2: # %else
6920 ; RV64ZVE32F-NEXT: ret
6921 %v = call <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x bfloat> %passthru)
6925 declare <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x bfloat>)
6927 define <2 x bfloat> @mgather_v2bf16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x bfloat> %passthru) {
6928 ; RV32V-LABEL: mgather_v2bf16:
6930 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
6931 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
6932 ; RV32V-NEXT: vmv1r.v v8, v9
6935 ; RV64V-LABEL: mgather_v2bf16:
6937 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
6938 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
6939 ; RV64V-NEXT: vmv1r.v v8, v9
6942 ; RV32ZVE32F-LABEL: mgather_v2bf16:
6943 ; RV32ZVE32F: # %bb.0:
6944 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
6945 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
6946 ; RV32ZVE32F-NEXT: vmv1r.v v8, v9
6947 ; RV32ZVE32F-NEXT: ret
6949 ; RV64ZVE32F-LABEL: mgather_v2bf16:
6950 ; RV64ZVE32F: # %bb.0:
6951 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6952 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
6953 ; RV64ZVE32F-NEXT: andi a3, a2, 1
6954 ; RV64ZVE32F-NEXT: bnez a3, .LBB59_3
6955 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6956 ; RV64ZVE32F-NEXT: andi a2, a2, 2
6957 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_4
6958 ; RV64ZVE32F-NEXT: .LBB59_2: # %else2
6959 ; RV64ZVE32F-NEXT: ret
6960 ; RV64ZVE32F-NEXT: .LBB59_3: # %cond.load
6961 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
6962 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
6963 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
6964 ; RV64ZVE32F-NEXT: andi a2, a2, 2
6965 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_2
6966 ; RV64ZVE32F-NEXT: .LBB59_4: # %cond.load1
6967 ; RV64ZVE32F-NEXT: lh a0, 0(a1)
6968 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
6969 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
6970 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
6971 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
6972 ; RV64ZVE32F-NEXT: ret
6973 %v = call <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x bfloat> %passthru)
6977 declare <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x bfloat>)
6979 define <4 x bfloat> @mgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x bfloat> %passthru) {
6980 ; RV32-LABEL: mgather_v4bf16:
6982 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
6983 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
6984 ; RV32-NEXT: vmv1r.v v8, v9
6987 ; RV64V-LABEL: mgather_v4bf16:
6989 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
6990 ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
6991 ; RV64V-NEXT: vmv1r.v v8, v10
6994 ; RV64ZVE32F-LABEL: mgather_v4bf16:
6995 ; RV64ZVE32F: # %bb.0:
6996 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6997 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6998 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6999 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_5
7000 ; RV64ZVE32F-NEXT: # %bb.1: # %else
7001 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7002 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_6
7003 ; RV64ZVE32F-NEXT: .LBB60_2: # %else2
7004 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7005 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_7
7006 ; RV64ZVE32F-NEXT: .LBB60_3: # %else5
7007 ; RV64ZVE32F-NEXT: andi a1, a1, 8
7008 ; RV64ZVE32F-NEXT: bnez a1, .LBB60_8
7009 ; RV64ZVE32F-NEXT: .LBB60_4: # %else8
7010 ; RV64ZVE32F-NEXT: ret
7011 ; RV64ZVE32F-NEXT: .LBB60_5: # %cond.load
7012 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
7013 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7014 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
7015 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
7016 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7017 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_2
7018 ; RV64ZVE32F-NEXT: .LBB60_6: # %cond.load1
7019 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
7020 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7021 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7022 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
7023 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
7024 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
7025 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7026 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_3
7027 ; RV64ZVE32F-NEXT: .LBB60_7: # %cond.load4
7028 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
7029 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7030 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
7031 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
7032 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
7033 ; RV64ZVE32F-NEXT: andi a1, a1, 8
7034 ; RV64ZVE32F-NEXT: beqz a1, .LBB60_4
7035 ; RV64ZVE32F-NEXT: .LBB60_8: # %cond.load7
7036 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
7037 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
7038 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
7039 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
7040 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
7041 ; RV64ZVE32F-NEXT: ret
7042 %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x bfloat> %passthru)
7046 define <4 x bfloat> @mgather_truemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %passthru) {
7047 ; RV32-LABEL: mgather_truemask_v4bf16:
7049 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
7050 ; RV32-NEXT: vluxei32.v v9, (zero), v8
7051 ; RV32-NEXT: vmv1r.v v8, v9
7054 ; RV64V-LABEL: mgather_truemask_v4bf16:
7056 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
7057 ; RV64V-NEXT: vluxei64.v v10, (zero), v8
7058 ; RV64V-NEXT: vmv1r.v v8, v10
7061 ; RV64ZVE32F-LABEL: mgather_truemask_v4bf16:
7062 ; RV64ZVE32F: # %bb.0:
7063 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
7064 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
7065 ; RV64ZVE32F-NEXT: ld a3, 16(a0)
7066 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
7067 ; RV64ZVE32F-NEXT: lh a1, 0(a1)
7068 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7069 ; RV64ZVE32F-NEXT: lh a3, 0(a3)
7070 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
7071 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
7072 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1
7073 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
7074 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
7075 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
7076 ; RV64ZVE32F-NEXT: ret
7077 %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x bfloat> %passthru)
7081 define <4 x bfloat> @mgather_falsemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %passthru) {
7082 ; RV32-LABEL: mgather_falsemask_v4bf16:
7084 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7085 ; RV32-NEXT: vmv1r.v v8, v9
7088 ; RV64V-LABEL: mgather_falsemask_v4bf16:
7090 ; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7091 ; RV64V-NEXT: vmv1r.v v8, v10
7094 ; RV64ZVE32F-LABEL: mgather_falsemask_v4bf16:
7095 ; RV64ZVE32F: # %bb.0:
7096 ; RV64ZVE32F-NEXT: ret
7097 %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x bfloat> %passthru)
7101 declare <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x bfloat>)
7103 define <8 x bfloat> @mgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x bfloat> %passthru) {
7104 ; RV32-LABEL: mgather_v8bf16:
7106 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
7107 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
7108 ; RV32-NEXT: vmv.v.v v8, v10
7111 ; RV64V-LABEL: mgather_v8bf16:
7113 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
7114 ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
7115 ; RV64V-NEXT: vmv.v.v v8, v12
7118 ; RV64ZVE32F-LABEL: mgather_v8bf16:
7119 ; RV64ZVE32F: # %bb.0:
7120 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7121 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7122 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7123 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_9
7124 ; RV64ZVE32F-NEXT: # %bb.1: # %else
7125 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7126 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_10
7127 ; RV64ZVE32F-NEXT: .LBB63_2: # %else2
7128 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7129 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_11
7130 ; RV64ZVE32F-NEXT: .LBB63_3: # %else5
7131 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7132 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_12
7133 ; RV64ZVE32F-NEXT: .LBB63_4: # %else8
7134 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7135 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_13
7136 ; RV64ZVE32F-NEXT: .LBB63_5: # %else11
7137 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7138 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_14
7139 ; RV64ZVE32F-NEXT: .LBB63_6: # %else14
7140 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7141 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_15
7142 ; RV64ZVE32F-NEXT: .LBB63_7: # %else17
7143 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7144 ; RV64ZVE32F-NEXT: bnez a1, .LBB63_16
7145 ; RV64ZVE32F-NEXT: .LBB63_8: # %else20
7146 ; RV64ZVE32F-NEXT: ret
7147 ; RV64ZVE32F-NEXT: .LBB63_9: # %cond.load
7148 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
7149 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7150 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
7151 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
7152 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7153 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_2
7154 ; RV64ZVE32F-NEXT: .LBB63_10: # %cond.load1
7155 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
7156 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7157 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7158 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
7159 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
7160 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
7161 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7162 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_3
7163 ; RV64ZVE32F-NEXT: .LBB63_11: # %cond.load4
7164 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
7165 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7166 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
7167 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
7168 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
7169 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7170 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_4
7171 ; RV64ZVE32F-NEXT: .LBB63_12: # %cond.load7
7172 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
7173 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7174 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
7175 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
7176 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
7177 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7178 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_5
7179 ; RV64ZVE32F-NEXT: .LBB63_13: # %cond.load10
7180 ; RV64ZVE32F-NEXT: ld a2, 32(a0)
7181 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7182 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
7183 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
7184 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
7185 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7186 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_6
7187 ; RV64ZVE32F-NEXT: .LBB63_14: # %cond.load13
7188 ; RV64ZVE32F-NEXT: ld a2, 40(a0)
7189 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7190 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
7191 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
7192 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
7193 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7194 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_7
7195 ; RV64ZVE32F-NEXT: .LBB63_15: # %cond.load16
7196 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
7197 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7198 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
7199 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
7200 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
7201 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7202 ; RV64ZVE32F-NEXT: beqz a1, .LBB63_8
7203 ; RV64ZVE32F-NEXT: .LBB63_16: # %cond.load19
7204 ; RV64ZVE32F-NEXT: ld a0, 56(a0)
7205 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
7206 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7207 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
7208 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
7209 ; RV64ZVE32F-NEXT: ret
7210 %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
7214 define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) {
7215 ; RV32-LABEL: mgather_baseidx_v8i8_v8bf16:
7217 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7218 ; RV32-NEXT: vsext.vf4 v10, v8
7219 ; RV32-NEXT: vadd.vv v10, v10, v10
7220 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7221 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
7222 ; RV32-NEXT: vmv.v.v v8, v9
7225 ; RV64V-LABEL: mgather_baseidx_v8i8_v8bf16:
7227 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7228 ; RV64V-NEXT: vsext.vf8 v12, v8
7229 ; RV64V-NEXT: vadd.vv v12, v12, v12
7230 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7231 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
7232 ; RV64V-NEXT: vmv.v.v v8, v9
7235 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8bf16:
7236 ; RV64ZVE32F: # %bb.0:
7237 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7238 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7239 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7240 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_2
7241 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
7242 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7243 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7244 ; RV64ZVE32F-NEXT: add a2, a0, a2
7245 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7246 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
7247 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
7248 ; RV64ZVE32F-NEXT: .LBB64_2: # %else
7249 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7250 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_4
7251 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
7252 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7253 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
7254 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7255 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7256 ; RV64ZVE32F-NEXT: add a2, a0, a2
7257 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7258 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7259 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
7260 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
7261 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
7262 ; RV64ZVE32F-NEXT: .LBB64_4: # %else2
7263 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7264 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
7265 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7266 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7267 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
7268 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_14
7269 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
7270 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7271 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_15
7272 ; RV64ZVE32F-NEXT: .LBB64_6: # %else8
7273 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7274 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_16
7275 ; RV64ZVE32F-NEXT: .LBB64_7: # %else11
7276 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7277 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_9
7278 ; RV64ZVE32F-NEXT: .LBB64_8: # %cond.load13
7279 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7280 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
7281 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7282 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7283 ; RV64ZVE32F-NEXT: add a2, a0, a2
7284 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7285 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7286 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
7287 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
7288 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
7289 ; RV64ZVE32F-NEXT: .LBB64_9: # %else14
7290 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7291 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7292 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
7293 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_11
7294 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
7295 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7296 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7297 ; RV64ZVE32F-NEXT: add a2, a0, a2
7298 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7299 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7300 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
7301 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
7302 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
7303 ; RV64ZVE32F-NEXT: .LBB64_11: # %else17
7304 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7305 ; RV64ZVE32F-NEXT: beqz a1, .LBB64_13
7306 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
7307 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7308 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7309 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
7310 ; RV64ZVE32F-NEXT: slli a1, a1, 1
7311 ; RV64ZVE32F-NEXT: add a0, a0, a1
7312 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
7313 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7314 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
7315 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7316 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
7317 ; RV64ZVE32F-NEXT: .LBB64_13: # %else20
7318 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7319 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
7320 ; RV64ZVE32F-NEXT: ret
7321 ; RV64ZVE32F-NEXT: .LBB64_14: # %cond.load4
7322 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7323 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7324 ; RV64ZVE32F-NEXT: add a2, a0, a2
7325 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7326 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7327 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
7328 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
7329 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
7330 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7331 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_6
7332 ; RV64ZVE32F-NEXT: .LBB64_15: # %cond.load7
7333 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7334 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7335 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7336 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7337 ; RV64ZVE32F-NEXT: add a2, a0, a2
7338 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7339 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7340 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
7341 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
7342 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
7343 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7344 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_7
7345 ; RV64ZVE32F-NEXT: .LBB64_16: # %cond.load10
7346 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7347 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7348 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7349 ; RV64ZVE32F-NEXT: add a2, a0, a2
7350 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7351 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7352 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
7353 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
7354 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
7355 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7356 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_8
7357 ; RV64ZVE32F-NEXT: j .LBB64_9
7358 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs
7359 %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
7363 define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) {
7364 ; RV32-LABEL: mgather_baseidx_sext_v8i8_v8bf16:
7366 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7367 ; RV32-NEXT: vsext.vf4 v10, v8
7368 ; RV32-NEXT: vadd.vv v10, v10, v10
7369 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7370 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
7371 ; RV32-NEXT: vmv.v.v v8, v9
7374 ; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8bf16:
7376 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7377 ; RV64V-NEXT: vsext.vf8 v12, v8
7378 ; RV64V-NEXT: vadd.vv v12, v12, v12
7379 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7380 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
7381 ; RV64V-NEXT: vmv.v.v v8, v9
7384 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8bf16:
7385 ; RV64ZVE32F: # %bb.0:
7386 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7387 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7388 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7389 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_2
7390 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
7391 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7392 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7393 ; RV64ZVE32F-NEXT: add a2, a0, a2
7394 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7395 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
7396 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
7397 ; RV64ZVE32F-NEXT: .LBB65_2: # %else
7398 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7399 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_4
7400 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
7401 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7402 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
7403 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7404 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7405 ; RV64ZVE32F-NEXT: add a2, a0, a2
7406 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7407 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7408 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
7409 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
7410 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
7411 ; RV64ZVE32F-NEXT: .LBB65_4: # %else2
7412 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7413 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
7414 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7415 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7416 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
7417 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_14
7418 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
7419 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7420 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_15
7421 ; RV64ZVE32F-NEXT: .LBB65_6: # %else8
7422 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7423 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_16
7424 ; RV64ZVE32F-NEXT: .LBB65_7: # %else11
7425 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7426 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_9
7427 ; RV64ZVE32F-NEXT: .LBB65_8: # %cond.load13
7428 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7429 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
7430 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7431 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7432 ; RV64ZVE32F-NEXT: add a2, a0, a2
7433 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7434 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7435 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
7436 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
7437 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
7438 ; RV64ZVE32F-NEXT: .LBB65_9: # %else14
7439 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7440 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7441 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
7442 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_11
7443 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
7444 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7445 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7446 ; RV64ZVE32F-NEXT: add a2, a0, a2
7447 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7448 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7449 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
7450 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
7451 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
7452 ; RV64ZVE32F-NEXT: .LBB65_11: # %else17
7453 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7454 ; RV64ZVE32F-NEXT: beqz a1, .LBB65_13
7455 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
7456 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7457 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7458 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
7459 ; RV64ZVE32F-NEXT: slli a1, a1, 1
7460 ; RV64ZVE32F-NEXT: add a0, a0, a1
7461 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
7462 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7463 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
7464 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7465 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
7466 ; RV64ZVE32F-NEXT: .LBB65_13: # %else20
7467 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7468 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
7469 ; RV64ZVE32F-NEXT: ret
7470 ; RV64ZVE32F-NEXT: .LBB65_14: # %cond.load4
7471 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7472 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7473 ; RV64ZVE32F-NEXT: add a2, a0, a2
7474 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7475 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7476 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
7477 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
7478 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
7479 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7480 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_6
7481 ; RV64ZVE32F-NEXT: .LBB65_15: # %cond.load7
7482 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7483 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7484 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7485 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7486 ; RV64ZVE32F-NEXT: add a2, a0, a2
7487 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7488 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7489 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
7490 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
7491 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
7492 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7493 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_7
7494 ; RV64ZVE32F-NEXT: .LBB65_16: # %cond.load10
7495 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7496 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7497 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7498 ; RV64ZVE32F-NEXT: add a2, a0, a2
7499 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7500 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7501 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
7502 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
7503 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
7504 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7505 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_8
7506 ; RV64ZVE32F-NEXT: j .LBB65_9
7507 %eidxs = sext <8 x i8> %idxs to <8 x i16>
7508 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
7509 %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
7513 define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) {
7514 ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8bf16:
7516 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
7517 ; RV32-NEXT: vwaddu.vv v10, v8, v8
7518 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7519 ; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t
7520 ; RV32-NEXT: vmv.v.v v8, v9
7523 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8bf16:
7525 ; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
7526 ; RV64V-NEXT: vwaddu.vv v10, v8, v8
7527 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7528 ; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t
7529 ; RV64V-NEXT: vmv.v.v v8, v9
7532 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8bf16:
7533 ; RV64ZVE32F: # %bb.0:
7534 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7535 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7536 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7537 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_2
7538 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
7539 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7540 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7541 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7542 ; RV64ZVE32F-NEXT: add a2, a0, a2
7543 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7544 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
7545 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
7546 ; RV64ZVE32F-NEXT: .LBB66_2: # %else
7547 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7548 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_4
7549 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
7550 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7551 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
7552 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7553 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7554 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7555 ; RV64ZVE32F-NEXT: add a2, a0, a2
7556 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7557 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7558 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
7559 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
7560 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
7561 ; RV64ZVE32F-NEXT: .LBB66_4: # %else2
7562 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7563 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
7564 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7565 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7566 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
7567 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_14
7568 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
7569 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7570 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_15
7571 ; RV64ZVE32F-NEXT: .LBB66_6: # %else8
7572 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7573 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_16
7574 ; RV64ZVE32F-NEXT: .LBB66_7: # %else11
7575 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7576 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_9
7577 ; RV64ZVE32F-NEXT: .LBB66_8: # %cond.load13
7578 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7579 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
7580 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7581 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7582 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7583 ; RV64ZVE32F-NEXT: add a2, a0, a2
7584 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7585 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7586 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
7587 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
7588 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
7589 ; RV64ZVE32F-NEXT: .LBB66_9: # %else14
7590 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7591 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7592 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
7593 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_11
7594 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
7595 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7596 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7597 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7598 ; RV64ZVE32F-NEXT: add a2, a0, a2
7599 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7600 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7601 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
7602 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
7603 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
7604 ; RV64ZVE32F-NEXT: .LBB66_11: # %else17
7605 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7606 ; RV64ZVE32F-NEXT: beqz a1, .LBB66_13
7607 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
7608 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7609 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7610 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
7611 ; RV64ZVE32F-NEXT: andi a1, a1, 255
7612 ; RV64ZVE32F-NEXT: slli a1, a1, 1
7613 ; RV64ZVE32F-NEXT: add a0, a0, a1
7614 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
7615 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7616 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
7617 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7618 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
7619 ; RV64ZVE32F-NEXT: .LBB66_13: # %else20
7620 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7621 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
7622 ; RV64ZVE32F-NEXT: ret
7623 ; RV64ZVE32F-NEXT: .LBB66_14: # %cond.load4
7624 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7625 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7626 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7627 ; RV64ZVE32F-NEXT: add a2, a0, a2
7628 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7629 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7630 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
7631 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
7632 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
7633 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7634 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_6
7635 ; RV64ZVE32F-NEXT: .LBB66_15: # %cond.load7
7636 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7637 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7638 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7639 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7640 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7641 ; RV64ZVE32F-NEXT: add a2, a0, a2
7642 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7643 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7644 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
7645 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
7646 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
7647 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7648 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_7
7649 ; RV64ZVE32F-NEXT: .LBB66_16: # %cond.load10
7650 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7651 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7652 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7653 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7654 ; RV64ZVE32F-NEXT: add a2, a0, a2
7655 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7656 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7657 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
7658 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
7659 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
7660 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7661 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_8
7662 ; RV64ZVE32F-NEXT: j .LBB66_9
7663 %eidxs = zext <8 x i8> %idxs to <8 x i16>
7664 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
7665 %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
7669 define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) {
7670 ; RV32-LABEL: mgather_baseidx_v8bf16:
7672 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
7673 ; RV32-NEXT: vwadd.vv v10, v8, v8
7674 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
7675 ; RV32-NEXT: vmv.v.v v8, v9
7678 ; RV64V-LABEL: mgather_baseidx_v8bf16:
7680 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7681 ; RV64V-NEXT: vsext.vf4 v12, v8
7682 ; RV64V-NEXT: vadd.vv v12, v12, v12
7683 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7684 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
7685 ; RV64V-NEXT: vmv.v.v v8, v9
7688 ; RV64ZVE32F-LABEL: mgather_baseidx_v8bf16:
7689 ; RV64ZVE32F: # %bb.0:
7690 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7691 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7692 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7693 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_2
7694 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
7695 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
7696 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7697 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7698 ; RV64ZVE32F-NEXT: add a2, a0, a2
7699 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7700 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
7701 ; RV64ZVE32F-NEXT: .LBB67_2: # %else
7702 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7703 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_4
7704 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
7705 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7706 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
7707 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7708 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7709 ; RV64ZVE32F-NEXT: add a2, a0, a2
7710 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7711 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
7712 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
7713 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
7714 ; RV64ZVE32F-NEXT: .LBB67_4: # %else2
7715 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
7716 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
7717 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7718 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7719 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
7720 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_14
7721 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
7722 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7723 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_15
7724 ; RV64ZVE32F-NEXT: .LBB67_6: # %else8
7725 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7726 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_16
7727 ; RV64ZVE32F-NEXT: .LBB67_7: # %else11
7728 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7729 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_9
7730 ; RV64ZVE32F-NEXT: .LBB67_8: # %cond.load13
7731 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7732 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
7733 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7734 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7735 ; RV64ZVE32F-NEXT: add a2, a0, a2
7736 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7737 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
7738 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
7739 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
7740 ; RV64ZVE32F-NEXT: .LBB67_9: # %else14
7741 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7742 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7743 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
7744 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_11
7745 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
7746 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7747 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7748 ; RV64ZVE32F-NEXT: add a2, a0, a2
7749 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7750 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
7751 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
7752 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
7753 ; RV64ZVE32F-NEXT: .LBB67_11: # %else17
7754 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7755 ; RV64ZVE32F-NEXT: beqz a1, .LBB67_13
7756 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
7757 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7758 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7759 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
7760 ; RV64ZVE32F-NEXT: slli a1, a1, 1
7761 ; RV64ZVE32F-NEXT: add a0, a0, a1
7762 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
7763 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
7764 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7765 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
7766 ; RV64ZVE32F-NEXT: .LBB67_13: # %else20
7767 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7768 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
7769 ; RV64ZVE32F-NEXT: ret
7770 ; RV64ZVE32F-NEXT: .LBB67_14: # %cond.load4
7771 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7772 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7773 ; RV64ZVE32F-NEXT: add a2, a0, a2
7774 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7775 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
7776 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
7777 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
7778 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7779 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_6
7780 ; RV64ZVE32F-NEXT: .LBB67_15: # %cond.load7
7781 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7782 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7783 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7784 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7785 ; RV64ZVE32F-NEXT: add a2, a0, a2
7786 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7787 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
7788 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
7789 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
7790 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7791 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_7
7792 ; RV64ZVE32F-NEXT: .LBB67_16: # %cond.load10
7793 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
7794 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7795 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7796 ; RV64ZVE32F-NEXT: add a2, a0, a2
7797 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
7798 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
7799 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
7800 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7801 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_8
7802 ; RV64ZVE32F-NEXT: j .LBB67_9
7803 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs
7804 %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
7808 declare <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x half>)
7810 define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passthru) {
7811 ; RV32V-LABEL: mgather_v1f16:
7813 ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
7814 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
7815 ; RV32V-NEXT: vmv1r.v v8, v9
7818 ; RV64V-LABEL: mgather_v1f16:
7820 ; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
7821 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
7822 ; RV64V-NEXT: vmv1r.v v8, v9
7825 ; RV32ZVE32F-LABEL: mgather_v1f16:
7826 ; RV32ZVE32F: # %bb.0:
7827 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
7828 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
7829 ; RV32ZVE32F-NEXT: vmv1r.v v8, v9
7830 ; RV32ZVE32F-NEXT: ret
7832 ; RV64ZVE32F-LABEL: mgather_v1f16:
7833 ; RV64ZVE32F: # %bb.0:
7834 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
7835 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
7836 ; RV64ZVE32F-NEXT: bnez a1, .LBB68_2
7837 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
7838 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7839 ; RV64ZVE32F-NEXT: vle16.v v8, (a0)
7840 ; RV64ZVE32F-NEXT: .LBB68_2: # %else
7841 ; RV64ZVE32F-NEXT: ret
7842 %v = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru)
7846 declare <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x half>)
7848 define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passthru) {
7849 ; RV32V-LABEL: mgather_v2f16:
7851 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
7852 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
7853 ; RV32V-NEXT: vmv1r.v v8, v9
7856 ; RV64V-LABEL: mgather_v2f16:
7858 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
7859 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
7860 ; RV64V-NEXT: vmv1r.v v8, v9
7863 ; RV32ZVE32F-LABEL: mgather_v2f16:
7864 ; RV32ZVE32F: # %bb.0:
7865 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
7866 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
7867 ; RV32ZVE32F-NEXT: vmv1r.v v8, v9
7868 ; RV32ZVE32F-NEXT: ret
7870 ; RV64ZVE32F-ZVFH-LABEL: mgather_v2f16:
7871 ; RV64ZVE32F-ZVFH: # %bb.0:
7872 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7873 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v0
7874 ; RV64ZVE32F-ZVFH-NEXT: andi a3, a2, 1
7875 ; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB69_3
7876 ; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
7877 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
7878 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_4
7879 ; RV64ZVE32F-ZVFH-NEXT: .LBB69_2: # %else2
7880 ; RV64ZVE32F-ZVFH-NEXT: ret
7881 ; RV64ZVE32F-ZVFH-NEXT: .LBB69_3: # %cond.load
7882 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
7883 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
7884 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
7885 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
7886 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_2
7887 ; RV64ZVE32F-ZVFH-NEXT: .LBB69_4: # %cond.load1
7888 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a1)
7889 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7890 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
7891 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7892 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1
7893 ; RV64ZVE32F-ZVFH-NEXT: ret
7895 ; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v2f16:
7896 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
7897 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7898 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v0
7899 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a2, 1
7900 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB69_3
7901 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
7902 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
7903 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_4
7904 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_2: # %else2
7905 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
7906 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_3: # %cond.load
7907 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
7908 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
7909 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
7910 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
7911 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_2
7912 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %cond.load1
7913 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a1)
7914 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7915 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0
7916 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7917 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1
7918 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
7919 %v = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x half> %passthru)
7923 declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>)
7925 define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passthru) {
7926 ; RV32-LABEL: mgather_v4f16:
7928 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
7929 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
7930 ; RV32-NEXT: vmv1r.v v8, v9
7933 ; RV64V-LABEL: mgather_v4f16:
7935 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
7936 ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
7937 ; RV64V-NEXT: vmv1r.v v8, v10
7940 ; RV64ZVE32F-ZVFH-LABEL: mgather_v4f16:
7941 ; RV64ZVE32F-ZVFH: # %bb.0:
7942 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7943 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
7944 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
7945 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_5
7946 ; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
7947 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
7948 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_6
7949 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_2: # %else2
7950 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
7951 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_7
7952 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_3: # %else5
7953 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 8
7954 ; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB70_8
7955 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_4: # %else8
7956 ; RV64ZVE32F-ZVFH-NEXT: ret
7957 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_5: # %cond.load
7958 ; RV64ZVE32F-ZVFH-NEXT: ld a2, 0(a0)
7959 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
7960 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
7961 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
7962 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
7963 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_2
7964 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_6: # %cond.load1
7965 ; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0)
7966 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
7967 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7968 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
7969 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
7970 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1
7971 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
7972 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_3
7973 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_7: # %cond.load4
7974 ; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0)
7975 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
7976 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
7977 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
7978 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 2
7979 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 8
7980 ; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB70_4
7981 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_8: # %cond.load7
7982 ; RV64ZVE32F-ZVFH-NEXT: ld a0, 24(a0)
7983 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
7984 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
7985 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
7986 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 3
7987 ; RV64ZVE32F-ZVFH-NEXT: ret
7989 ; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v4f16:
7990 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
7991 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7992 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
7993 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
7994 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_5
7995 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
7996 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
7997 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_6
7998 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_2: # %else2
7999 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
8000 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_7
8001 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_3: # %else5
8002 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 8
8003 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_8
8004 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else8
8005 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
8006 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_5: # %cond.load
8007 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 0(a0)
8008 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8009 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
8010 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
8011 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
8012 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_2
8013 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %cond.load1
8014 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0)
8015 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8016 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
8017 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
8018 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
8019 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1
8020 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
8021 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_3
8022 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %cond.load4
8023 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0)
8024 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8025 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
8026 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
8027 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 2
8028 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 8
8029 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_4
8030 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %cond.load7
8031 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 24(a0)
8032 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
8033 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
8034 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0
8035 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 3
8036 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
8037 %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x half> %passthru)
8041 define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
8042 ; RV32-LABEL: mgather_truemask_v4f16:
8044 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
8045 ; RV32-NEXT: vluxei32.v v9, (zero), v8
8046 ; RV32-NEXT: vmv1r.v v8, v9
8049 ; RV64V-LABEL: mgather_truemask_v4f16:
8051 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
8052 ; RV64V-NEXT: vluxei64.v v10, (zero), v8
8053 ; RV64V-NEXT: vmv1r.v v8, v10
8056 ; RV64ZVE32F-ZVFH-LABEL: mgather_truemask_v4f16:
8057 ; RV64ZVE32F-ZVFH: # %bb.0:
8058 ; RV64ZVE32F-ZVFH-NEXT: ld a1, 0(a0)
8059 ; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0)
8060 ; RV64ZVE32F-ZVFH-NEXT: ld a3, 16(a0)
8061 ; RV64ZVE32F-ZVFH-NEXT: ld a0, 24(a0)
8062 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a1)
8063 ; RV64ZVE32F-ZVFH-NEXT: flh fa4, 0(a2)
8064 ; RV64ZVE32F-ZVFH-NEXT: flh fa3, 0(a3)
8065 ; RV64ZVE32F-ZVFH-NEXT: flh fa2, 0(a0)
8066 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
8067 ; RV64ZVE32F-ZVFH-NEXT: vfmv.v.f v8, fa5
8068 ; RV64ZVE32F-ZVFH-NEXT: vfslide1down.vf v8, v8, fa4
8069 ; RV64ZVE32F-ZVFH-NEXT: vfslide1down.vf v8, v8, fa3
8070 ; RV64ZVE32F-ZVFH-NEXT: vfslide1down.vf v8, v8, fa2
8071 ; RV64ZVE32F-ZVFH-NEXT: ret
8073 ; RV64ZVE32F-ZVFHMIN-LABEL: mgather_truemask_v4f16:
8074 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
8075 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 0(a0)
8076 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0)
8077 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a3, 16(a0)
8078 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 24(a0)
8079 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
8080 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8081 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a3, 0(a3)
8082 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
8083 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
8084 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.v.x v8, a1
8085 ; RV64ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
8086 ; RV64ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a3
8087 ; RV64ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
8088 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
8089 %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x half> %passthru)
8093 define <4 x half> @mgather_falsemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
8094 ; RV32-LABEL: mgather_falsemask_v4f16:
8096 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8097 ; RV32-NEXT: vmv1r.v v8, v9
8100 ; RV64V-LABEL: mgather_falsemask_v4f16:
8102 ; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8103 ; RV64V-NEXT: vmv1r.v v8, v10
8106 ; RV64ZVE32F-LABEL: mgather_falsemask_v4f16:
8107 ; RV64ZVE32F: # %bb.0:
8108 ; RV64ZVE32F-NEXT: ret
8109 %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x half> %passthru)
8113 declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>)
8115 define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passthru) {
8116 ; RV32-LABEL: mgather_v8f16:
8118 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
8119 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
8120 ; RV32-NEXT: vmv.v.v v8, v10
8123 ; RV64V-LABEL: mgather_v8f16:
8125 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
8126 ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
8127 ; RV64V-NEXT: vmv.v.v v8, v12
8130 ; RV64ZVE32F-ZVFH-LABEL: mgather_v8f16:
8131 ; RV64ZVE32F-ZVFH: # %bb.0:
8132 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8133 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
8134 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
8135 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_9
8136 ; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
8137 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
8138 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_10
8139 ; RV64ZVE32F-ZVFH-NEXT: .LBB73_2: # %else2
8140 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
8141 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_11
8142 ; RV64ZVE32F-ZVFH-NEXT: .LBB73_3: # %else5
8143 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
8144 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_12
8145 ; RV64ZVE32F-ZVFH-NEXT: .LBB73_4: # %else8
8146 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
8147 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_13
8148 ; RV64ZVE32F-ZVFH-NEXT: .LBB73_5: # %else11
8149 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
8150 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_14
8151 ; RV64ZVE32F-ZVFH-NEXT: .LBB73_6: # %else14
8152 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
8153 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_15
8154 ; RV64ZVE32F-ZVFH-NEXT: .LBB73_7: # %else17
8155 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
8156 ; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB73_16
8157 ; RV64ZVE32F-ZVFH-NEXT: .LBB73_8: # %else20
8158 ; RV64ZVE32F-ZVFH-NEXT: ret
8159 ; RV64ZVE32F-ZVFH-NEXT: .LBB73_9: # %cond.load
8160 ; RV64ZVE32F-ZVFH-NEXT: ld a2, 0(a0)
8161 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8162 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
8163 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
8164 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
8165 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_2
8166 ; RV64ZVE32F-ZVFH-NEXT: .LBB73_10: # %cond.load1
8167 ; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0)
8168 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8169 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
8170 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
8171 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma
8172 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1
8173 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
8174 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_3
8175 ; RV64ZVE32F-ZVFH-NEXT: .LBB73_11: # %cond.load4
8176 ; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0)
8177 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8178 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
8179 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
8180 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 2
8181 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
8182 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_4
8183 ; RV64ZVE32F-ZVFH-NEXT: .LBB73_12: # %cond.load7
8184 ; RV64ZVE32F-ZVFH-NEXT: ld a2, 24(a0)
8185 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8186 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
8187 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
8188 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 3
8189 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
8190 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_5
8191 ; RV64ZVE32F-ZVFH-NEXT: .LBB73_13: # %cond.load10
8192 ; RV64ZVE32F-ZVFH-NEXT: ld a2, 32(a0)
8193 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8194 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
8195 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
8196 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 4
8197 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
8198 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_6
8199 ; RV64ZVE32F-ZVFH-NEXT: .LBB73_14: # %cond.load13
8200 ; RV64ZVE32F-ZVFH-NEXT: ld a2, 40(a0)
8201 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8202 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma
8203 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
8204 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 5
8205 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
8206 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_7
8207 ; RV64ZVE32F-ZVFH-NEXT: .LBB73_15: # %cond.load16
8208 ; RV64ZVE32F-ZVFH-NEXT: ld a2, 48(a0)
8209 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8210 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma
8211 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
8212 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 6
8213 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
8214 ; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB73_8
8215 ; RV64ZVE32F-ZVFH-NEXT: .LBB73_16: # %cond.load19
8216 ; RV64ZVE32F-ZVFH-NEXT: ld a0, 56(a0)
8217 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
8218 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8219 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
8220 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 7
8221 ; RV64ZVE32F-ZVFH-NEXT: ret
8223 ; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v8f16:
8224 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
8225 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8226 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
8227 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
8228 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_9
8229 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
8230 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
8231 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_10
8232 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_2: # %else2
8233 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
8234 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_11
8235 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_3: # %else5
8236 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
8237 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_12
8238 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_4: # %else8
8239 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
8240 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_13
8241 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_5: # %else11
8242 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
8243 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_14
8244 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_6: # %else14
8245 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
8246 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_15
8247 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_7: # %else17
8248 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
8249 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_16
8250 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_8: # %else20
8251 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
8252 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_9: # %cond.load
8253 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 0(a0)
8254 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8255 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
8256 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
8257 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
8258 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_2
8259 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_10: # %cond.load1
8260 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0)
8261 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8262 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
8263 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
8264 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
8265 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1
8266 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
8267 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_3
8268 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_11: # %cond.load4
8269 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0)
8270 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8271 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
8272 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
8273 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 2
8274 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
8275 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_4
8276 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_12: # %cond.load7
8277 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 24(a0)
8278 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8279 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
8280 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
8281 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 3
8282 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
8283 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_5
8284 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_13: # %cond.load10
8285 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 32(a0)
8286 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8287 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
8288 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
8289 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 4
8290 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
8291 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_6
8292 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_14: # %cond.load13
8293 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 40(a0)
8294 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8295 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
8296 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
8297 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 5
8298 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
8299 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_7
8300 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_15: # %cond.load16
8301 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 48(a0)
8302 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8303 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
8304 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
8305 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 6
8306 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
8307 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_8
8308 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_16: # %cond.load19
8309 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 56(a0)
8310 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
8311 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8312 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0
8313 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 7
8314 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
8315 %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
8319 define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
8320 ; RV32-LABEL: mgather_baseidx_v8i8_v8f16:
8322 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8323 ; RV32-NEXT: vsext.vf4 v10, v8
8324 ; RV32-NEXT: vadd.vv v10, v10, v10
8325 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
8326 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
8327 ; RV32-NEXT: vmv.v.v v8, v9
8330 ; RV64V-LABEL: mgather_baseidx_v8i8_v8f16:
8332 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8333 ; RV64V-NEXT: vsext.vf8 v12, v8
8334 ; RV64V-NEXT: vadd.vv v12, v12, v12
8335 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
8336 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
8337 ; RV64V-NEXT: vmv.v.v v8, v9
8340 ; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_v8i8_v8f16:
8341 ; RV64ZVE32F-ZVFH: # %bb.0:
8342 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8343 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
8344 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
8345 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_2
8346 ; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load
8347 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
8348 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8349 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8350 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8351 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
8352 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
8353 ; RV64ZVE32F-ZVFH-NEXT: .LBB74_2: # %else
8354 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
8355 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_4
8356 ; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1
8357 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8358 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
8359 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
8360 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8361 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8362 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8363 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8364 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
8365 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma
8366 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1
8367 ; RV64ZVE32F-ZVFH-NEXT: .LBB74_4: # %else2
8368 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8369 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4
8370 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
8371 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8372 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
8373 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_14
8374 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5
8375 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
8376 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_15
8377 ; RV64ZVE32F-ZVFH-NEXT: .LBB74_6: # %else8
8378 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
8379 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_16
8380 ; RV64ZVE32F-ZVFH-NEXT: .LBB74_7: # %else11
8381 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
8382 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_9
8383 ; RV64ZVE32F-ZVFH-NEXT: .LBB74_8: # %cond.load13
8384 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8385 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1
8386 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
8387 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8388 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8389 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8390 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8391 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
8392 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma
8393 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5
8394 ; RV64ZVE32F-ZVFH-NEXT: .LBB74_9: # %else14
8395 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
8396 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8397 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2
8398 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_11
8399 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16
8400 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
8401 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8402 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8403 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8404 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8405 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
8406 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma
8407 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6
8408 ; RV64ZVE32F-ZVFH-NEXT: .LBB74_11: # %else17
8409 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
8410 ; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB74_13
8411 ; RV64ZVE32F-ZVFH-NEXT: # %bb.12: # %cond.load19
8412 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8413 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
8414 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8
8415 ; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
8416 ; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
8417 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
8418 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8419 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
8420 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8421 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7
8422 ; RV64ZVE32F-ZVFH-NEXT: .LBB74_13: # %else20
8423 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8424 ; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
8425 ; RV64ZVE32F-ZVFH-NEXT: ret
8426 ; RV64ZVE32F-ZVFH-NEXT: .LBB74_14: # %cond.load4
8427 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
8428 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8429 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8430 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8431 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8432 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
8433 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
8434 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
8435 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
8436 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_6
8437 ; RV64ZVE32F-ZVFH-NEXT: .LBB74_15: # %cond.load7
8438 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8439 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
8440 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
8441 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8442 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8443 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8444 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8445 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
8446 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
8447 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
8448 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
8449 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_7
8450 ; RV64ZVE32F-ZVFH-NEXT: .LBB74_16: # %cond.load10
8451 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8452 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
8453 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8454 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8455 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8456 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
8457 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
8458 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
8459 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
8460 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
8461 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_8
8462 ; RV64ZVE32F-ZVFH-NEXT: j .LBB74_9
8464 ; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8i8_v8f16:
8465 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
8466 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8467 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
8468 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
8469 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_2
8470 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load
8471 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
8472 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8473 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8474 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8475 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
8476 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
8477 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_2: # %else
8478 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
8479 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_4
8480 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1
8481 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8482 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
8483 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
8484 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8485 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8486 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8487 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8488 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
8489 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
8490 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1
8491 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_4: # %else2
8492 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8493 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4
8494 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
8495 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8496 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
8497 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_14
8498 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5
8499 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
8500 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_15
8501 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_6: # %else8
8502 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
8503 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_16
8504 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_7: # %else11
8505 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
8506 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_9
8507 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_8: # %cond.load13
8508 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8509 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
8510 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
8511 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8512 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8513 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8514 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8515 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
8516 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
8517 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
8518 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_9: # %else14
8519 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
8520 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8521 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
8522 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_11
8523 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16
8524 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
8525 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8526 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8527 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8528 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8529 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
8530 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
8531 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6
8532 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_11: # %else17
8533 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
8534 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB74_13
8535 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.12: # %cond.load19
8536 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8537 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
8538 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
8539 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
8540 ; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
8541 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
8542 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8543 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
8544 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8545 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
8546 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_13: # %else20
8547 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8548 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
8549 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
8550 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_14: # %cond.load4
8551 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
8552 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8553 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8554 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8555 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8556 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
8557 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
8558 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
8559 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
8560 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_6
8561 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_15: # %cond.load7
8562 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8563 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
8564 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
8565 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8566 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8567 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8568 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8569 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
8570 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
8571 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
8572 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
8573 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_7
8574 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_16: # %cond.load10
8575 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8576 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
8577 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8578 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8579 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8580 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
8581 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
8582 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
8583 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
8584 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
8585 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_8
8586 ; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB74_9
8587 %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
8588 %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
8592 define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
8593 ; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f16:
8595 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8596 ; RV32-NEXT: vsext.vf4 v10, v8
8597 ; RV32-NEXT: vadd.vv v10, v10, v10
8598 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
8599 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
8600 ; RV32-NEXT: vmv.v.v v8, v9
8603 ; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f16:
8605 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8606 ; RV64V-NEXT: vsext.vf8 v12, v8
8607 ; RV64V-NEXT: vadd.vv v12, v12, v12
8608 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
8609 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
8610 ; RV64V-NEXT: vmv.v.v v8, v9
8613 ; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_sext_v8i8_v8f16:
8614 ; RV64ZVE32F-ZVFH: # %bb.0:
8615 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8616 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
8617 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
8618 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_2
8619 ; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load
8620 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
8621 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8622 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8623 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8624 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
8625 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
8626 ; RV64ZVE32F-ZVFH-NEXT: .LBB75_2: # %else
8627 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
8628 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_4
8629 ; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1
8630 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8631 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
8632 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
8633 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8634 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8635 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8636 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8637 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
8638 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma
8639 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1
8640 ; RV64ZVE32F-ZVFH-NEXT: .LBB75_4: # %else2
8641 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8642 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4
8643 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
8644 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8645 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
8646 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_14
8647 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5
8648 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
8649 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_15
8650 ; RV64ZVE32F-ZVFH-NEXT: .LBB75_6: # %else8
8651 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
8652 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_16
8653 ; RV64ZVE32F-ZVFH-NEXT: .LBB75_7: # %else11
8654 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
8655 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_9
8656 ; RV64ZVE32F-ZVFH-NEXT: .LBB75_8: # %cond.load13
8657 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8658 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1
8659 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
8660 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8661 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8662 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8663 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8664 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
8665 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma
8666 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5
8667 ; RV64ZVE32F-ZVFH-NEXT: .LBB75_9: # %else14
8668 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
8669 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8670 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2
8671 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_11
8672 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16
8673 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
8674 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8675 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8676 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8677 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8678 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
8679 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma
8680 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6
8681 ; RV64ZVE32F-ZVFH-NEXT: .LBB75_11: # %else17
8682 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
8683 ; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB75_13
8684 ; RV64ZVE32F-ZVFH-NEXT: # %bb.12: # %cond.load19
8685 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8686 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
8687 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8
8688 ; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
8689 ; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
8690 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
8691 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8692 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
8693 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8694 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7
8695 ; RV64ZVE32F-ZVFH-NEXT: .LBB75_13: # %else20
8696 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8697 ; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
8698 ; RV64ZVE32F-ZVFH-NEXT: ret
8699 ; RV64ZVE32F-ZVFH-NEXT: .LBB75_14: # %cond.load4
8700 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
8701 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8702 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8703 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8704 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8705 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
8706 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
8707 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
8708 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
8709 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_6
8710 ; RV64ZVE32F-ZVFH-NEXT: .LBB75_15: # %cond.load7
8711 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8712 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
8713 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
8714 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8715 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8716 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8717 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8718 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
8719 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
8720 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
8721 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
8722 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_7
8723 ; RV64ZVE32F-ZVFH-NEXT: .LBB75_16: # %cond.load10
8724 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8725 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
8726 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8727 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8728 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8729 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
8730 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
8731 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
8732 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
8733 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
8734 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_8
8735 ; RV64ZVE32F-ZVFH-NEXT: j .LBB75_9
8737 ; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_sext_v8i8_v8f16:
8738 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
8739 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8740 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
8741 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
8742 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_2
8743 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load
8744 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
8745 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8746 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8747 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8748 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
8749 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
8750 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_2: # %else
8751 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
8752 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_4
8753 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1
8754 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8755 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
8756 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
8757 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8758 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8759 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8760 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8761 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
8762 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
8763 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1
8764 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_4: # %else2
8765 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8766 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4
8767 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
8768 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8769 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
8770 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_14
8771 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5
8772 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
8773 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_15
8774 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_6: # %else8
8775 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
8776 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_16
8777 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_7: # %else11
8778 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
8779 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_9
8780 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_8: # %cond.load13
8781 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8782 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
8783 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
8784 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8785 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8786 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8787 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8788 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
8789 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
8790 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
8791 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_9: # %else14
8792 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
8793 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8794 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
8795 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_11
8796 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16
8797 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
8798 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8799 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8800 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8801 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8802 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
8803 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
8804 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6
8805 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_11: # %else17
8806 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
8807 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB75_13
8808 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.12: # %cond.load19
8809 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8810 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
8811 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
8812 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
8813 ; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
8814 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
8815 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8816 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
8817 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8818 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
8819 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_13: # %else20
8820 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8821 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
8822 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
8823 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_14: # %cond.load4
8824 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
8825 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8826 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8827 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8828 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8829 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
8830 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
8831 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
8832 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
8833 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_6
8834 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_15: # %cond.load7
8835 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8836 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
8837 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
8838 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8839 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8840 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8841 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8842 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
8843 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
8844 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
8845 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
8846 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_7
8847 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_16: # %cond.load10
8848 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8849 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
8850 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8851 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8852 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
8853 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
8854 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
8855 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
8856 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
8857 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
8858 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_8
8859 ; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB75_9
8860 %eidxs = sext <8 x i8> %idxs to <8 x i16>
8861 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
8862 %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
8866 define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
8867 ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f16:
8869 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
8870 ; RV32-NEXT: vwaddu.vv v10, v8, v8
8871 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
8872 ; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t
8873 ; RV32-NEXT: vmv.v.v v8, v9
8876 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f16:
8878 ; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
8879 ; RV64V-NEXT: vwaddu.vv v10, v8, v8
8880 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
8881 ; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t
8882 ; RV64V-NEXT: vmv.v.v v8, v9
8885 ; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_zext_v8i8_v8f16:
8886 ; RV64ZVE32F-ZVFH: # %bb.0:
8887 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8888 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
8889 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
8890 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_2
8891 ; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load
8892 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
8893 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
8894 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8895 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8896 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8897 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
8898 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
8899 ; RV64ZVE32F-ZVFH-NEXT: .LBB76_2: # %else
8900 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
8901 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_4
8902 ; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1
8903 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8904 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
8905 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
8906 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
8907 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8908 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8909 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8910 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8911 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
8912 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma
8913 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1
8914 ; RV64ZVE32F-ZVFH-NEXT: .LBB76_4: # %else2
8915 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8916 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4
8917 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
8918 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8919 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
8920 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_14
8921 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5
8922 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
8923 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_15
8924 ; RV64ZVE32F-ZVFH-NEXT: .LBB76_6: # %else8
8925 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
8926 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_16
8927 ; RV64ZVE32F-ZVFH-NEXT: .LBB76_7: # %else11
8928 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
8929 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_9
8930 ; RV64ZVE32F-ZVFH-NEXT: .LBB76_8: # %cond.load13
8931 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8932 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1
8933 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
8934 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
8935 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8936 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8937 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8938 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8939 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
8940 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma
8941 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5
8942 ; RV64ZVE32F-ZVFH-NEXT: .LBB76_9: # %else14
8943 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
8944 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8945 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2
8946 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_11
8947 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16
8948 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
8949 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
8950 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8951 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8952 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8953 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8954 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
8955 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma
8956 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6
8957 ; RV64ZVE32F-ZVFH-NEXT: .LBB76_11: # %else17
8958 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
8959 ; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB76_13
8960 ; RV64ZVE32F-ZVFH-NEXT: # %bb.12: # %cond.load19
8961 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8962 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
8963 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8
8964 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 255
8965 ; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
8966 ; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
8967 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
8968 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8969 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
8970 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8971 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7
8972 ; RV64ZVE32F-ZVFH-NEXT: .LBB76_13: # %else20
8973 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8974 ; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
8975 ; RV64ZVE32F-ZVFH-NEXT: ret
8976 ; RV64ZVE32F-ZVFH-NEXT: .LBB76_14: # %cond.load4
8977 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
8978 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
8979 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8980 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8981 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8982 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8983 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
8984 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
8985 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
8986 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
8987 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_6
8988 ; RV64ZVE32F-ZVFH-NEXT: .LBB76_15: # %cond.load7
8989 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8990 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
8991 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
8992 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
8993 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8994 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8995 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
8996 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8997 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
8998 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
8999 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
9000 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
9001 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_7
9002 ; RV64ZVE32F-ZVFH-NEXT: .LBB76_16: # %cond.load10
9003 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9004 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
9005 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
9006 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
9007 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
9008 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
9009 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
9010 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
9011 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
9012 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
9013 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
9014 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_8
9015 ; RV64ZVE32F-ZVFH-NEXT: j .LBB76_9
9017 ; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_zext_v8i8_v8f16:
9018 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
9019 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9020 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
9021 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
9022 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_2
9023 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load
9024 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
9025 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
9026 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
9027 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
9028 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
9029 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
9030 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
9031 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_2: # %else
9032 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
9033 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_4
9034 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1
9035 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9036 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
9037 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
9038 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
9039 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
9040 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
9041 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
9042 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
9043 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
9044 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
9045 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1
9046 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_4: # %else2
9047 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
9048 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4
9049 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
9050 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
9051 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
9052 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_14
9053 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5
9054 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
9055 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_15
9056 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_6: # %else8
9057 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
9058 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_16
9059 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_7: # %else11
9060 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
9061 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_9
9062 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_8: # %cond.load13
9063 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9064 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
9065 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
9066 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
9067 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
9068 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
9069 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
9070 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
9071 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
9072 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
9073 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
9074 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_9: # %else14
9075 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
9076 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
9077 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
9078 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_11
9079 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16
9080 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
9081 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
9082 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
9083 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
9084 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
9085 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
9086 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
9087 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
9088 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6
9089 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_11: # %else17
9090 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
9091 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB76_13
9092 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.12: # %cond.load19
9093 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9094 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
9095 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
9096 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 255
9097 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
9098 ; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
9099 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
9100 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
9101 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
9102 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
9103 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
9104 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_13: # %else20
9105 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9106 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
9107 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
9108 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_14: # %cond.load4
9109 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
9110 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
9111 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
9112 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
9113 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
9114 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
9115 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
9116 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
9117 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
9118 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
9119 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_6
9120 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_15: # %cond.load7
9121 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9122 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
9123 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
9124 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
9125 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
9126 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
9127 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
9128 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
9129 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
9130 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
9131 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
9132 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
9133 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_7
9134 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_16: # %cond.load10
9135 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9136 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
9137 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
9138 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
9139 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
9140 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
9141 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
9142 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
9143 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
9144 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
9145 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
9146 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_8
9147 ; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB76_9
9148 %eidxs = zext <8 x i8> %idxs to <8 x i16>
9149 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
9150 %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
9154 define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x half> %passthru) {
9155 ; RV32-LABEL: mgather_baseidx_v8f16:
9157 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
9158 ; RV32-NEXT: vwadd.vv v10, v8, v8
9159 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
9160 ; RV32-NEXT: vmv.v.v v8, v9
9163 ; RV64V-LABEL: mgather_baseidx_v8f16:
9165 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9166 ; RV64V-NEXT: vsext.vf4 v12, v8
9167 ; RV64V-NEXT: vadd.vv v12, v12, v12
9168 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
9169 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
9170 ; RV64V-NEXT: vmv.v.v v8, v9
9173 ; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_v8f16:
9174 ; RV64ZVE32F-ZVFH: # %bb.0:
9175 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9176 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
9177 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
9178 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_2
9179 ; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load
9180 ; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
9181 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
9182 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
9183 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
9184 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
9185 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
9186 ; RV64ZVE32F-ZVFH-NEXT: .LBB77_2: # %else
9187 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
9188 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_4
9189 ; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1
9190 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9191 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
9192 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
9193 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
9194 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
9195 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
9196 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
9197 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma
9198 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1
9199 ; RV64ZVE32F-ZVFH-NEXT: .LBB77_4: # %else2
9200 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, ta, ma
9201 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4
9202 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
9203 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9204 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
9205 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_14
9206 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5
9207 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
9208 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_15
9209 ; RV64ZVE32F-ZVFH-NEXT: .LBB77_6: # %else8
9210 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
9211 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_16
9212 ; RV64ZVE32F-ZVFH-NEXT: .LBB77_7: # %else11
9213 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
9214 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_9
9215 ; RV64ZVE32F-ZVFH-NEXT: .LBB77_8: # %cond.load13
9216 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9217 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1
9218 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
9219 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
9220 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
9221 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
9222 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
9223 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma
9224 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5
9225 ; RV64ZVE32F-ZVFH-NEXT: .LBB77_9: # %else14
9226 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
9227 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9228 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2
9229 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_11
9230 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16
9231 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
9232 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
9233 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
9234 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
9235 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
9236 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma
9237 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6
9238 ; RV64ZVE32F-ZVFH-NEXT: .LBB77_11: # %else17
9239 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
9240 ; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB77_13
9241 ; RV64ZVE32F-ZVFH-NEXT: # %bb.12: # %cond.load19
9242 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9243 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
9244 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8
9245 ; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
9246 ; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
9247 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
9248 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
9249 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
9250 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7
9251 ; RV64ZVE32F-ZVFH-NEXT: .LBB77_13: # %else20
9252 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9253 ; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
9254 ; RV64ZVE32F-ZVFH-NEXT: ret
9255 ; RV64ZVE32F-ZVFH-NEXT: .LBB77_14: # %cond.load4
9256 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
9257 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
9258 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
9259 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
9260 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
9261 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
9262 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
9263 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
9264 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_6
9265 ; RV64ZVE32F-ZVFH-NEXT: .LBB77_15: # %cond.load7
9266 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9267 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
9268 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
9269 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
9270 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
9271 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
9272 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
9273 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
9274 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
9275 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
9276 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_7
9277 ; RV64ZVE32F-ZVFH-NEXT: .LBB77_16: # %cond.load10
9278 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
9279 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
9280 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
9281 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
9282 ; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
9283 ; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
9284 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
9285 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
9286 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_8
9287 ; RV64ZVE32F-ZVFH-NEXT: j .LBB77_9
9289 ; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8f16:
9290 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
9291 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9292 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
9293 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
9294 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_2
9295 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load
9296 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
9297 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
9298 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
9299 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
9300 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
9301 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
9302 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_2: # %else
9303 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
9304 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_4
9305 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1
9306 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9307 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
9308 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
9309 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
9310 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
9311 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
9312 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
9313 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
9314 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1
9315 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_4: # %else2
9316 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma
9317 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4
9318 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
9319 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9320 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
9321 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_14
9322 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5
9323 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
9324 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_15
9325 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_6: # %else8
9326 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
9327 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_16
9328 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_7: # %else11
9329 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
9330 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_9
9331 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_8: # %cond.load13
9332 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9333 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
9334 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
9335 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
9336 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
9337 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
9338 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
9339 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
9340 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
9341 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_9: # %else14
9342 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
9343 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9344 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
9345 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_11
9346 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16
9347 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
9348 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
9349 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
9350 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
9351 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
9352 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
9353 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6
9354 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_11: # %else17
9355 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
9356 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB77_13
9357 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.12: # %cond.load19
9358 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9359 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
9360 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
9361 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
9362 ; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
9363 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
9364 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
9365 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
9366 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
9367 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_13: # %else20
9368 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9369 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
9370 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
9371 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_14: # %cond.load4
9372 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
9373 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
9374 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
9375 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
9376 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
9377 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
9378 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
9379 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
9380 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_6
9381 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_15: # %cond.load7
9382 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9383 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
9384 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
9385 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
9386 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
9387 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
9388 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
9389 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
9390 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
9391 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
9392 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_7
9393 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_16: # %cond.load10
9394 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
9395 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
9396 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
9397 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
9398 ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
9399 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
9400 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
9401 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
9402 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_8
9403 ; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB77_9
9404 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
9405 %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
9409 declare <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x float>)
9411 define <1 x float> @mgather_v1f32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x float> %passthru) {
9412 ; RV32V-LABEL: mgather_v1f32:
9414 ; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
9415 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
9416 ; RV32V-NEXT: vmv1r.v v8, v9
9419 ; RV64V-LABEL: mgather_v1f32:
9421 ; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
9422 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
9423 ; RV64V-NEXT: vmv1r.v v8, v9
9426 ; RV32ZVE32F-LABEL: mgather_v1f32:
9427 ; RV32ZVE32F: # %bb.0:
9428 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
9429 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
9430 ; RV32ZVE32F-NEXT: vmv.v.v v8, v9
9431 ; RV32ZVE32F-NEXT: ret
9433 ; RV64ZVE32F-LABEL: mgather_v1f32:
9434 ; RV64ZVE32F: # %bb.0:
9435 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
9436 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
9437 ; RV64ZVE32F-NEXT: bnez a1, .LBB78_2
9438 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
9439 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9440 ; RV64ZVE32F-NEXT: vle32.v v8, (a0)
9441 ; RV64ZVE32F-NEXT: .LBB78_2: # %else
9442 ; RV64ZVE32F-NEXT: ret
9443 %v = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x float> %passthru)
9447 declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>)
9449 define <2 x float> @mgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x float> %passthru) {
9450 ; RV32V-LABEL: mgather_v2f32:
9452 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
9453 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
9454 ; RV32V-NEXT: vmv1r.v v8, v9
9457 ; RV64V-LABEL: mgather_v2f32:
9459 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
9460 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
9461 ; RV64V-NEXT: vmv1r.v v8, v9
9464 ; RV32ZVE32F-LABEL: mgather_v2f32:
9465 ; RV32ZVE32F: # %bb.0:
9466 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
9467 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
9468 ; RV32ZVE32F-NEXT: vmv.v.v v8, v9
9469 ; RV32ZVE32F-NEXT: ret
9471 ; RV64ZVE32F-LABEL: mgather_v2f32:
9472 ; RV64ZVE32F: # %bb.0:
9473 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9474 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
9475 ; RV64ZVE32F-NEXT: andi a3, a2, 1
9476 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_3
9477 ; RV64ZVE32F-NEXT: # %bb.1: # %else
9478 ; RV64ZVE32F-NEXT: andi a2, a2, 2
9479 ; RV64ZVE32F-NEXT: bnez a2, .LBB79_4
9480 ; RV64ZVE32F-NEXT: .LBB79_2: # %else2
9481 ; RV64ZVE32F-NEXT: ret
9482 ; RV64ZVE32F-NEXT: .LBB79_3: # %cond.load
9483 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
9484 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
9485 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9486 ; RV64ZVE32F-NEXT: andi a2, a2, 2
9487 ; RV64ZVE32F-NEXT: beqz a2, .LBB79_2
9488 ; RV64ZVE32F-NEXT: .LBB79_4: # %cond.load1
9489 ; RV64ZVE32F-NEXT: flw fa5, 0(a1)
9490 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
9491 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
9492 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
9493 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
9494 ; RV64ZVE32F-NEXT: ret
9495 %v = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x float> %passthru)
9499 declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>)
9501 define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %passthru) {
9502 ; RV32-LABEL: mgather_v4f32:
9504 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
9505 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
9506 ; RV32-NEXT: vmv.v.v v8, v9
9509 ; RV64V-LABEL: mgather_v4f32:
9511 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, mu
9512 ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
9513 ; RV64V-NEXT: vmv.v.v v8, v10
9516 ; RV64ZVE32F-LABEL: mgather_v4f32:
9517 ; RV64ZVE32F: # %bb.0:
9518 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9519 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9520 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9521 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_5
9522 ; RV64ZVE32F-NEXT: # %bb.1: # %else
9523 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9524 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_6
9525 ; RV64ZVE32F-NEXT: .LBB80_2: # %else2
9526 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9527 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_7
9528 ; RV64ZVE32F-NEXT: .LBB80_3: # %else5
9529 ; RV64ZVE32F-NEXT: andi a1, a1, 8
9530 ; RV64ZVE32F-NEXT: bnez a1, .LBB80_8
9531 ; RV64ZVE32F-NEXT: .LBB80_4: # %else8
9532 ; RV64ZVE32F-NEXT: ret
9533 ; RV64ZVE32F-NEXT: .LBB80_5: # %cond.load
9534 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
9535 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9536 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
9537 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9538 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9539 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_2
9540 ; RV64ZVE32F-NEXT: .LBB80_6: # %cond.load1
9541 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
9542 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9543 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
9544 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
9545 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
9546 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
9547 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9548 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_3
9549 ; RV64ZVE32F-NEXT: .LBB80_7: # %cond.load4
9550 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
9551 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9552 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
9553 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
9554 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
9555 ; RV64ZVE32F-NEXT: andi a1, a1, 8
9556 ; RV64ZVE32F-NEXT: beqz a1, .LBB80_4
9557 ; RV64ZVE32F-NEXT: .LBB80_8: # %cond.load7
9558 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
9559 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
9560 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
9561 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
9562 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
9563 ; RV64ZVE32F-NEXT: ret
9564 %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x float> %passthru)
9568 define <4 x float> @mgather_truemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthru) {
9569 ; RV32-LABEL: mgather_truemask_v4f32:
9571 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
9572 ; RV32-NEXT: vluxei32.v v8, (zero), v8
9575 ; RV64V-LABEL: mgather_truemask_v4f32:
9577 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
9578 ; RV64V-NEXT: vluxei64.v v10, (zero), v8
9579 ; RV64V-NEXT: vmv.v.v v8, v10
9582 ; RV64ZVE32F-LABEL: mgather_truemask_v4f32:
9583 ; RV64ZVE32F: # %bb.0:
9584 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
9585 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
9586 ; RV64ZVE32F-NEXT: ld a3, 16(a0)
9587 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
9588 ; RV64ZVE32F-NEXT: flw fa5, 0(a1)
9589 ; RV64ZVE32F-NEXT: flw fa4, 0(a2)
9590 ; RV64ZVE32F-NEXT: flw fa3, 0(a3)
9591 ; RV64ZVE32F-NEXT: flw fa2, 0(a0)
9592 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
9593 ; RV64ZVE32F-NEXT: vfmv.v.f v8, fa5
9594 ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4
9595 ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3
9596 ; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa2
9597 ; RV64ZVE32F-NEXT: ret
9598 %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1), <4 x float> %passthru)
9602 define <4 x float> @mgather_falsemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthru) {
9603 ; RV32-LABEL: mgather_falsemask_v4f32:
9605 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9606 ; RV32-NEXT: vmv1r.v v8, v9
9609 ; RV64V-LABEL: mgather_falsemask_v4f32:
9611 ; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9612 ; RV64V-NEXT: vmv1r.v v8, v10
9615 ; RV64ZVE32F-LABEL: mgather_falsemask_v4f32:
9616 ; RV64ZVE32F: # %bb.0:
9617 ; RV64ZVE32F-NEXT: ret
9618 %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer, <4 x float> %passthru)
9622 declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>)
9624 define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %passthru) {
9625 ; RV32-LABEL: mgather_v8f32:
9627 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
9628 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
9629 ; RV32-NEXT: vmv.v.v v8, v10
9632 ; RV64V-LABEL: mgather_v8f32:
9634 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu
9635 ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
9636 ; RV64V-NEXT: vmv.v.v v8, v12
9639 ; RV64ZVE32F-LABEL: mgather_v8f32:
9640 ; RV64ZVE32F: # %bb.0:
9641 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9642 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9643 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9644 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_9
9645 ; RV64ZVE32F-NEXT: # %bb.1: # %else
9646 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9647 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_10
9648 ; RV64ZVE32F-NEXT: .LBB83_2: # %else2
9649 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9650 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_11
9651 ; RV64ZVE32F-NEXT: .LBB83_3: # %else5
9652 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9653 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_12
9654 ; RV64ZVE32F-NEXT: .LBB83_4: # %else8
9655 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9656 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_13
9657 ; RV64ZVE32F-NEXT: .LBB83_5: # %else11
9658 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9659 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_14
9660 ; RV64ZVE32F-NEXT: .LBB83_6: # %else14
9661 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9662 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_15
9663 ; RV64ZVE32F-NEXT: .LBB83_7: # %else17
9664 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9665 ; RV64ZVE32F-NEXT: bnez a1, .LBB83_16
9666 ; RV64ZVE32F-NEXT: .LBB83_8: # %else20
9667 ; RV64ZVE32F-NEXT: ret
9668 ; RV64ZVE32F-NEXT: .LBB83_9: # %cond.load
9669 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
9670 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9671 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
9672 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9673 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9674 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_2
9675 ; RV64ZVE32F-NEXT: .LBB83_10: # %cond.load1
9676 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
9677 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9678 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
9679 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
9680 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
9681 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1
9682 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9683 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_3
9684 ; RV64ZVE32F-NEXT: .LBB83_11: # %cond.load4
9685 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
9686 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9687 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
9688 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
9689 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2
9690 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9691 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_4
9692 ; RV64ZVE32F-NEXT: .LBB83_12: # %cond.load7
9693 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
9694 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9695 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
9696 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
9697 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3
9698 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9699 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_5
9700 ; RV64ZVE32F-NEXT: .LBB83_13: # %cond.load10
9701 ; RV64ZVE32F-NEXT: ld a2, 32(a0)
9702 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9703 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
9704 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
9705 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4
9706 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9707 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_6
9708 ; RV64ZVE32F-NEXT: .LBB83_14: # %cond.load13
9709 ; RV64ZVE32F-NEXT: ld a2, 40(a0)
9710 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9711 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
9712 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
9713 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5
9714 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9715 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_7
9716 ; RV64ZVE32F-NEXT: .LBB83_15: # %cond.load16
9717 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
9718 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9719 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
9720 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
9721 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6
9722 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9723 ; RV64ZVE32F-NEXT: beqz a1, .LBB83_8
9724 ; RV64ZVE32F-NEXT: .LBB83_16: # %cond.load19
9725 ; RV64ZVE32F-NEXT: ld a0, 56(a0)
9726 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
9727 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9728 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
9729 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7
9730 ; RV64ZVE32F-NEXT: ret
9731 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
9735 define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) {
9736 ; RV32-LABEL: mgather_baseidx_v8i8_v8f32:
9738 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
9739 ; RV32-NEXT: vsext.vf4 v12, v8
9740 ; RV32-NEXT: vsll.vi v8, v12, 2
9741 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
9742 ; RV32-NEXT: vmv.v.v v8, v10
9745 ; RV64V-LABEL: mgather_baseidx_v8i8_v8f32:
9747 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9748 ; RV64V-NEXT: vsext.vf8 v12, v8
9749 ; RV64V-NEXT: vsll.vi v12, v12, 2
9750 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
9751 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
9752 ; RV64V-NEXT: vmv.v.v v8, v10
9755 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f32:
9756 ; RV64ZVE32F: # %bb.0:
9757 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9758 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9759 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9760 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_2
9761 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
9762 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9763 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9764 ; RV64ZVE32F-NEXT: add a2, a0, a2
9765 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9766 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
9767 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
9768 ; RV64ZVE32F-NEXT: .LBB84_2: # %else
9769 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9770 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_4
9771 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
9772 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9773 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9774 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9775 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9776 ; RV64ZVE32F-NEXT: add a2, a0, a2
9777 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9778 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9779 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
9780 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
9781 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
9782 ; RV64ZVE32F-NEXT: .LBB84_4: # %else2
9783 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
9784 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
9785 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9786 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
9787 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9788 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_14
9789 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
9790 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9791 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_15
9792 ; RV64ZVE32F-NEXT: .LBB84_6: # %else8
9793 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9794 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_16
9795 ; RV64ZVE32F-NEXT: .LBB84_7: # %else11
9796 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9797 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_9
9798 ; RV64ZVE32F-NEXT: .LBB84_8: # %cond.load13
9799 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9800 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
9801 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9802 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9803 ; RV64ZVE32F-NEXT: add a2, a0, a2
9804 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9805 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9806 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
9807 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
9808 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
9809 ; RV64ZVE32F-NEXT: .LBB84_9: # %else14
9810 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9811 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
9812 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
9813 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_11
9814 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
9815 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9816 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9817 ; RV64ZVE32F-NEXT: add a2, a0, a2
9818 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9819 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9820 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
9821 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
9822 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
9823 ; RV64ZVE32F-NEXT: .LBB84_11: # %else17
9824 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9825 ; RV64ZVE32F-NEXT: beqz a1, .LBB84_13
9826 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
9827 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9828 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9829 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
9830 ; RV64ZVE32F-NEXT: slli a1, a1, 2
9831 ; RV64ZVE32F-NEXT: add a0, a0, a1
9832 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
9833 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9834 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9835 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9836 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
9837 ; RV64ZVE32F-NEXT: .LBB84_13: # %else20
9838 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9839 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
9840 ; RV64ZVE32F-NEXT: ret
9841 ; RV64ZVE32F-NEXT: .LBB84_14: # %cond.load4
9842 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9843 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9844 ; RV64ZVE32F-NEXT: add a2, a0, a2
9845 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9846 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9847 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
9848 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
9849 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
9850 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9851 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_6
9852 ; RV64ZVE32F-NEXT: .LBB84_15: # %cond.load7
9853 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9854 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9855 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9856 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9857 ; RV64ZVE32F-NEXT: add a2, a0, a2
9858 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9859 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9860 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9861 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
9862 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
9863 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9864 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_7
9865 ; RV64ZVE32F-NEXT: .LBB84_16: # %cond.load10
9866 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9867 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9868 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9869 ; RV64ZVE32F-NEXT: add a2, a0, a2
9870 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9871 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
9872 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
9873 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
9874 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
9875 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9876 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_8
9877 ; RV64ZVE32F-NEXT: j .LBB84_9
9878 %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
9879 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
9883 define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) {
9884 ; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f32:
9886 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
9887 ; RV32-NEXT: vsext.vf4 v12, v8
9888 ; RV32-NEXT: vsll.vi v8, v12, 2
9889 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
9890 ; RV32-NEXT: vmv.v.v v8, v10
9893 ; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f32:
9895 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9896 ; RV64V-NEXT: vsext.vf8 v12, v8
9897 ; RV64V-NEXT: vsll.vi v12, v12, 2
9898 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
9899 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
9900 ; RV64V-NEXT: vmv.v.v v8, v10
9903 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f32:
9904 ; RV64ZVE32F: # %bb.0:
9905 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9906 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9907 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9908 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_2
9909 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
9910 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9911 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9912 ; RV64ZVE32F-NEXT: add a2, a0, a2
9913 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9914 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
9915 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
9916 ; RV64ZVE32F-NEXT: .LBB85_2: # %else
9917 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9918 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_4
9919 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
9920 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9921 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9922 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9923 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9924 ; RV64ZVE32F-NEXT: add a2, a0, a2
9925 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9926 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9927 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
9928 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
9929 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
9930 ; RV64ZVE32F-NEXT: .LBB85_4: # %else2
9931 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
9932 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
9933 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9934 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
9935 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9936 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_14
9937 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
9938 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9939 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_15
9940 ; RV64ZVE32F-NEXT: .LBB85_6: # %else8
9941 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9942 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_16
9943 ; RV64ZVE32F-NEXT: .LBB85_7: # %else11
9944 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9945 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_9
9946 ; RV64ZVE32F-NEXT: .LBB85_8: # %cond.load13
9947 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9948 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
9949 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9950 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9951 ; RV64ZVE32F-NEXT: add a2, a0, a2
9952 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9953 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9954 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
9955 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
9956 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
9957 ; RV64ZVE32F-NEXT: .LBB85_9: # %else14
9958 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9959 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
9960 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
9961 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_11
9962 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
9963 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9964 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9965 ; RV64ZVE32F-NEXT: add a2, a0, a2
9966 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9967 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9968 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
9969 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
9970 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
9971 ; RV64ZVE32F-NEXT: .LBB85_11: # %else17
9972 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9973 ; RV64ZVE32F-NEXT: beqz a1, .LBB85_13
9974 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
9975 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9976 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9977 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
9978 ; RV64ZVE32F-NEXT: slli a1, a1, 2
9979 ; RV64ZVE32F-NEXT: add a0, a0, a1
9980 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
9981 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9982 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9983 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9984 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
9985 ; RV64ZVE32F-NEXT: .LBB85_13: # %else20
9986 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9987 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
9988 ; RV64ZVE32F-NEXT: ret
9989 ; RV64ZVE32F-NEXT: .LBB85_14: # %cond.load4
9990 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9991 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9992 ; RV64ZVE32F-NEXT: add a2, a0, a2
9993 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9994 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9995 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
9996 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
9997 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
9998 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9999 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_6
10000 ; RV64ZVE32F-NEXT: .LBB85_15: # %cond.load7
10001 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10002 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10003 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10004 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10005 ; RV64ZVE32F-NEXT: add a2, a0, a2
10006 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10007 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10008 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
10009 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
10010 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
10011 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10012 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_7
10013 ; RV64ZVE32F-NEXT: .LBB85_16: # %cond.load10
10014 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10015 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10016 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10017 ; RV64ZVE32F-NEXT: add a2, a0, a2
10018 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10019 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
10020 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10021 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
10022 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
10023 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10024 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_8
10025 ; RV64ZVE32F-NEXT: j .LBB85_9
10026 %eidxs = sext <8 x i8> %idxs to <8 x i32>
10027 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
10028 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
10032 define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) {
10033 ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f32:
10035 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
10036 ; RV32-NEXT: vzext.vf2 v9, v8
10037 ; RV32-NEXT: vsll.vi v8, v9, 2
10038 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
10039 ; RV32-NEXT: vluxei16.v v10, (a0), v8, v0.t
10040 ; RV32-NEXT: vmv.v.v v8, v10
10043 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f32:
10045 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
10046 ; RV64V-NEXT: vzext.vf2 v9, v8
10047 ; RV64V-NEXT: vsll.vi v8, v9, 2
10048 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
10049 ; RV64V-NEXT: vluxei16.v v10, (a0), v8, v0.t
10050 ; RV64V-NEXT: vmv.v.v v8, v10
10053 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f32:
10054 ; RV64ZVE32F: # %bb.0:
10055 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10056 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10057 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10058 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_2
10059 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
10060 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10061 ; RV64ZVE32F-NEXT: andi a2, a2, 255
10062 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10063 ; RV64ZVE32F-NEXT: add a2, a0, a2
10064 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10065 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
10066 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
10067 ; RV64ZVE32F-NEXT: .LBB86_2: # %else
10068 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10069 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_4
10070 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
10071 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10072 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
10073 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10074 ; RV64ZVE32F-NEXT: andi a2, a2, 255
10075 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10076 ; RV64ZVE32F-NEXT: add a2, a0, a2
10077 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10078 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10079 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
10080 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
10081 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
10082 ; RV64ZVE32F-NEXT: .LBB86_4: # %else2
10083 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10084 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
10085 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10086 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10087 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10088 ; RV64ZVE32F-NEXT: bnez a2, .LBB86_14
10089 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
10090 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10091 ; RV64ZVE32F-NEXT: bnez a2, .LBB86_15
10092 ; RV64ZVE32F-NEXT: .LBB86_6: # %else8
10093 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10094 ; RV64ZVE32F-NEXT: bnez a2, .LBB86_16
10095 ; RV64ZVE32F-NEXT: .LBB86_7: # %else11
10096 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10097 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_9
10098 ; RV64ZVE32F-NEXT: .LBB86_8: # %cond.load13
10099 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10100 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
10101 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10102 ; RV64ZVE32F-NEXT: andi a2, a2, 255
10103 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10104 ; RV64ZVE32F-NEXT: add a2, a0, a2
10105 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10106 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10107 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10108 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
10109 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
10110 ; RV64ZVE32F-NEXT: .LBB86_9: # %else14
10111 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10112 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10113 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
10114 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_11
10115 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
10116 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10117 ; RV64ZVE32F-NEXT: andi a2, a2, 255
10118 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10119 ; RV64ZVE32F-NEXT: add a2, a0, a2
10120 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10121 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10122 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10123 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
10124 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
10125 ; RV64ZVE32F-NEXT: .LBB86_11: # %else17
10126 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10127 ; RV64ZVE32F-NEXT: beqz a1, .LBB86_13
10128 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
10129 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10130 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10131 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10132 ; RV64ZVE32F-NEXT: andi a1, a1, 255
10133 ; RV64ZVE32F-NEXT: slli a1, a1, 2
10134 ; RV64ZVE32F-NEXT: add a0, a0, a1
10135 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
10136 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10137 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
10138 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10139 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
10140 ; RV64ZVE32F-NEXT: .LBB86_13: # %else20
10141 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10142 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
10143 ; RV64ZVE32F-NEXT: ret
10144 ; RV64ZVE32F-NEXT: .LBB86_14: # %cond.load4
10145 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10146 ; RV64ZVE32F-NEXT: andi a2, a2, 255
10147 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10148 ; RV64ZVE32F-NEXT: add a2, a0, a2
10149 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10150 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10151 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10152 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
10153 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
10154 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10155 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_6
10156 ; RV64ZVE32F-NEXT: .LBB86_15: # %cond.load7
10157 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10158 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10159 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10160 ; RV64ZVE32F-NEXT: andi a2, a2, 255
10161 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10162 ; RV64ZVE32F-NEXT: add a2, a0, a2
10163 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10164 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10165 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
10166 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
10167 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
10168 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10169 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_7
10170 ; RV64ZVE32F-NEXT: .LBB86_16: # %cond.load10
10171 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10172 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10173 ; RV64ZVE32F-NEXT: andi a2, a2, 255
10174 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10175 ; RV64ZVE32F-NEXT: add a2, a0, a2
10176 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10177 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
10178 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10179 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
10180 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
10181 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10182 ; RV64ZVE32F-NEXT: bnez a2, .LBB86_8
10183 ; RV64ZVE32F-NEXT: j .LBB86_9
10184 %eidxs = zext <8 x i8> %idxs to <8 x i32>
10185 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
10186 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
10190 define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) {
10191 ; RV32-LABEL: mgather_baseidx_v8i16_v8f32:
10193 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
10194 ; RV32-NEXT: vsext.vf2 v12, v8
10195 ; RV32-NEXT: vsll.vi v8, v12, 2
10196 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
10197 ; RV32-NEXT: vmv.v.v v8, v10
10200 ; RV64V-LABEL: mgather_baseidx_v8i16_v8f32:
10202 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
10203 ; RV64V-NEXT: vsext.vf4 v12, v8
10204 ; RV64V-NEXT: vsll.vi v12, v12, 2
10205 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
10206 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
10207 ; RV64V-NEXT: vmv.v.v v8, v10
10210 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f32:
10211 ; RV64ZVE32F: # %bb.0:
10212 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10213 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10214 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10215 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_2
10216 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
10217 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
10218 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10219 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10220 ; RV64ZVE32F-NEXT: add a2, a0, a2
10221 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10222 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
10223 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
10224 ; RV64ZVE32F-NEXT: .LBB87_2: # %else
10225 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10226 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_4
10227 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
10228 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10229 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
10230 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10231 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10232 ; RV64ZVE32F-NEXT: add a2, a0, a2
10233 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10234 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10235 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
10236 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
10237 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
10238 ; RV64ZVE32F-NEXT: .LBB87_4: # %else2
10239 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
10240 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
10241 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10242 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
10243 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10244 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_14
10245 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
10246 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10247 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_15
10248 ; RV64ZVE32F-NEXT: .LBB87_6: # %else8
10249 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10250 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_16
10251 ; RV64ZVE32F-NEXT: .LBB87_7: # %else11
10252 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10253 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_9
10254 ; RV64ZVE32F-NEXT: .LBB87_8: # %cond.load13
10255 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10256 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
10257 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10258 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10259 ; RV64ZVE32F-NEXT: add a2, a0, a2
10260 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10261 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10262 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10263 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
10264 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
10265 ; RV64ZVE32F-NEXT: .LBB87_9: # %else14
10266 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10267 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
10268 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
10269 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_11
10270 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
10271 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10272 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10273 ; RV64ZVE32F-NEXT: add a2, a0, a2
10274 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10275 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10276 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10277 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
10278 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
10279 ; RV64ZVE32F-NEXT: .LBB87_11: # %else17
10280 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10281 ; RV64ZVE32F-NEXT: beqz a1, .LBB87_13
10282 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
10283 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10284 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10285 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10286 ; RV64ZVE32F-NEXT: slli a1, a1, 2
10287 ; RV64ZVE32F-NEXT: add a0, a0, a1
10288 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
10289 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10290 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
10291 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10292 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
10293 ; RV64ZVE32F-NEXT: .LBB87_13: # %else20
10294 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10295 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
10296 ; RV64ZVE32F-NEXT: ret
10297 ; RV64ZVE32F-NEXT: .LBB87_14: # %cond.load4
10298 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10299 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10300 ; RV64ZVE32F-NEXT: add a2, a0, a2
10301 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10302 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10303 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10304 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
10305 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
10306 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10307 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_6
10308 ; RV64ZVE32F-NEXT: .LBB87_15: # %cond.load7
10309 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10310 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10311 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10312 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10313 ; RV64ZVE32F-NEXT: add a2, a0, a2
10314 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10315 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10316 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
10317 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
10318 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
10319 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10320 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_7
10321 ; RV64ZVE32F-NEXT: .LBB87_16: # %cond.load10
10322 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
10323 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10324 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10325 ; RV64ZVE32F-NEXT: add a2, a0, a2
10326 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10327 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
10328 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10329 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
10330 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
10331 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10332 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_8
10333 ; RV64ZVE32F-NEXT: j .LBB87_9
10334 %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
10335 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
10339 define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) {
10340 ; RV32-LABEL: mgather_baseidx_sext_v8i16_v8f32:
10342 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
10343 ; RV32-NEXT: vsext.vf2 v12, v8
10344 ; RV32-NEXT: vsll.vi v8, v12, 2
10345 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
10346 ; RV32-NEXT: vmv.v.v v8, v10
10349 ; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8f32:
10351 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
10352 ; RV64V-NEXT: vsext.vf4 v12, v8
10353 ; RV64V-NEXT: vsll.vi v12, v12, 2
10354 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
10355 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
10356 ; RV64V-NEXT: vmv.v.v v8, v10
10359 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f32:
10360 ; RV64ZVE32F: # %bb.0:
10361 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10362 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10363 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10364 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_2
10365 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
10366 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
10367 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10368 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10369 ; RV64ZVE32F-NEXT: add a2, a0, a2
10370 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10371 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
10372 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
10373 ; RV64ZVE32F-NEXT: .LBB88_2: # %else
10374 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10375 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_4
10376 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
10377 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10378 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
10379 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10380 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10381 ; RV64ZVE32F-NEXT: add a2, a0, a2
10382 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10383 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10384 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
10385 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
10386 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
10387 ; RV64ZVE32F-NEXT: .LBB88_4: # %else2
10388 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
10389 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
10390 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10391 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
10392 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10393 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_14
10394 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
10395 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10396 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_15
10397 ; RV64ZVE32F-NEXT: .LBB88_6: # %else8
10398 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10399 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_16
10400 ; RV64ZVE32F-NEXT: .LBB88_7: # %else11
10401 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10402 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_9
10403 ; RV64ZVE32F-NEXT: .LBB88_8: # %cond.load13
10404 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10405 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
10406 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10407 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10408 ; RV64ZVE32F-NEXT: add a2, a0, a2
10409 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10410 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10411 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10412 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
10413 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
10414 ; RV64ZVE32F-NEXT: .LBB88_9: # %else14
10415 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10416 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
10417 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
10418 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_11
10419 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
10420 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10421 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10422 ; RV64ZVE32F-NEXT: add a2, a0, a2
10423 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10424 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10425 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10426 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
10427 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
10428 ; RV64ZVE32F-NEXT: .LBB88_11: # %else17
10429 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10430 ; RV64ZVE32F-NEXT: beqz a1, .LBB88_13
10431 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
10432 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10433 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10434 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10435 ; RV64ZVE32F-NEXT: slli a1, a1, 2
10436 ; RV64ZVE32F-NEXT: add a0, a0, a1
10437 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
10438 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10439 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
10440 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10441 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
10442 ; RV64ZVE32F-NEXT: .LBB88_13: # %else20
10443 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10444 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
10445 ; RV64ZVE32F-NEXT: ret
10446 ; RV64ZVE32F-NEXT: .LBB88_14: # %cond.load4
10447 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10448 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10449 ; RV64ZVE32F-NEXT: add a2, a0, a2
10450 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10451 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10452 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10453 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
10454 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
10455 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10456 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_6
10457 ; RV64ZVE32F-NEXT: .LBB88_15: # %cond.load7
10458 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10459 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10460 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10461 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10462 ; RV64ZVE32F-NEXT: add a2, a0, a2
10463 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10464 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10465 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
10466 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
10467 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
10468 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10469 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_7
10470 ; RV64ZVE32F-NEXT: .LBB88_16: # %cond.load10
10471 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
10472 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10473 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10474 ; RV64ZVE32F-NEXT: add a2, a0, a2
10475 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10476 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
10477 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10478 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
10479 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
10480 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10481 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_8
10482 ; RV64ZVE32F-NEXT: j .LBB88_9
10483 %eidxs = sext <8 x i16> %idxs to <8 x i32>
10484 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
10485 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
10489 define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) {
10490 ; RV32-LABEL: mgather_baseidx_zext_v8i16_v8f32:
10492 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
10493 ; RV32-NEXT: vzext.vf2 v12, v8
10494 ; RV32-NEXT: vsll.vi v8, v12, 2
10495 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
10496 ; RV32-NEXT: vmv.v.v v8, v10
10499 ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f32:
10501 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu
10502 ; RV64V-NEXT: vzext.vf2 v12, v8
10503 ; RV64V-NEXT: vsll.vi v8, v12, 2
10504 ; RV64V-NEXT: vluxei32.v v10, (a0), v8, v0.t
10505 ; RV64V-NEXT: vmv.v.v v8, v10
10508 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f32:
10509 ; RV64ZVE32F: # %bb.0:
10510 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10511 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10512 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10513 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_2
10514 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
10515 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
10516 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10517 ; RV64ZVE32F-NEXT: slli a2, a2, 48
10518 ; RV64ZVE32F-NEXT: srli a2, a2, 46
10519 ; RV64ZVE32F-NEXT: add a2, a0, a2
10520 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10521 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
10522 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
10523 ; RV64ZVE32F-NEXT: .LBB89_2: # %else
10524 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10525 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_4
10526 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
10527 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10528 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
10529 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10530 ; RV64ZVE32F-NEXT: slli a2, a2, 48
10531 ; RV64ZVE32F-NEXT: srli a2, a2, 46
10532 ; RV64ZVE32F-NEXT: add a2, a0, a2
10533 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10534 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10535 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
10536 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
10537 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
10538 ; RV64ZVE32F-NEXT: .LBB89_4: # %else2
10539 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
10540 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
10541 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10542 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
10543 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10544 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_14
10545 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
10546 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10547 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_15
10548 ; RV64ZVE32F-NEXT: .LBB89_6: # %else8
10549 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10550 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_16
10551 ; RV64ZVE32F-NEXT: .LBB89_7: # %else11
10552 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10553 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_9
10554 ; RV64ZVE32F-NEXT: .LBB89_8: # %cond.load13
10555 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10556 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
10557 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10558 ; RV64ZVE32F-NEXT: slli a2, a2, 48
10559 ; RV64ZVE32F-NEXT: srli a2, a2, 46
10560 ; RV64ZVE32F-NEXT: add a2, a0, a2
10561 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10562 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10563 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10564 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
10565 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
10566 ; RV64ZVE32F-NEXT: .LBB89_9: # %else14
10567 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10568 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
10569 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
10570 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_11
10571 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
10572 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10573 ; RV64ZVE32F-NEXT: slli a2, a2, 48
10574 ; RV64ZVE32F-NEXT: srli a2, a2, 46
10575 ; RV64ZVE32F-NEXT: add a2, a0, a2
10576 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10577 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10578 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10579 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
10580 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
10581 ; RV64ZVE32F-NEXT: .LBB89_11: # %else17
10582 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10583 ; RV64ZVE32F-NEXT: beqz a1, .LBB89_13
10584 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
10585 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10586 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10587 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10588 ; RV64ZVE32F-NEXT: slli a1, a1, 48
10589 ; RV64ZVE32F-NEXT: srli a1, a1, 46
10590 ; RV64ZVE32F-NEXT: add a0, a0, a1
10591 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
10592 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10593 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
10594 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10595 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
10596 ; RV64ZVE32F-NEXT: .LBB89_13: # %else20
10597 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10598 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
10599 ; RV64ZVE32F-NEXT: ret
10600 ; RV64ZVE32F-NEXT: .LBB89_14: # %cond.load4
10601 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10602 ; RV64ZVE32F-NEXT: slli a2, a2, 48
10603 ; RV64ZVE32F-NEXT: srli a2, a2, 46
10604 ; RV64ZVE32F-NEXT: add a2, a0, a2
10605 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10606 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10607 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10608 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
10609 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
10610 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10611 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_6
10612 ; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load7
10613 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10614 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10615 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10616 ; RV64ZVE32F-NEXT: slli a2, a2, 48
10617 ; RV64ZVE32F-NEXT: srli a2, a2, 46
10618 ; RV64ZVE32F-NEXT: add a2, a0, a2
10619 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10620 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10621 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
10622 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
10623 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
10624 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10625 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_7
10626 ; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load10
10627 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
10628 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10629 ; RV64ZVE32F-NEXT: slli a2, a2, 48
10630 ; RV64ZVE32F-NEXT: srli a2, a2, 46
10631 ; RV64ZVE32F-NEXT: add a2, a0, a2
10632 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10633 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
10634 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10635 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
10636 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
10637 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10638 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_8
10639 ; RV64ZVE32F-NEXT: j .LBB89_9
10640 %eidxs = zext <8 x i16> %idxs to <8 x i32>
10641 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
10642 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
10646 define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x float> %passthru) {
10647 ; RV32-LABEL: mgather_baseidx_v8f32:
10649 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
10650 ; RV32-NEXT: vsll.vi v8, v8, 2
10651 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
10652 ; RV32-NEXT: vmv.v.v v8, v10
10655 ; RV64V-LABEL: mgather_baseidx_v8f32:
10657 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
10658 ; RV64V-NEXT: vsext.vf2 v12, v8
10659 ; RV64V-NEXT: vsll.vi v12, v12, 2
10660 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
10661 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
10662 ; RV64V-NEXT: vmv.v.v v8, v10
10665 ; RV64ZVE32F-LABEL: mgather_baseidx_v8f32:
10666 ; RV64ZVE32F: # %bb.0:
10667 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10668 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10669 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10670 ; RV64ZVE32F-NEXT: beqz a2, .LBB90_2
10671 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
10672 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
10673 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10674 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10675 ; RV64ZVE32F-NEXT: add a2, a0, a2
10676 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10677 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
10678 ; RV64ZVE32F-NEXT: .LBB90_2: # %else
10679 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10680 ; RV64ZVE32F-NEXT: beqz a2, .LBB90_4
10681 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
10682 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
10683 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
10684 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10685 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10686 ; RV64ZVE32F-NEXT: add a2, a0, a2
10687 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10688 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10689 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
10690 ; RV64ZVE32F-NEXT: .LBB90_4: # %else2
10691 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
10692 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
10693 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10694 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
10695 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10696 ; RV64ZVE32F-NEXT: bnez a2, .LBB90_14
10697 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
10698 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10699 ; RV64ZVE32F-NEXT: bnez a2, .LBB90_15
10700 ; RV64ZVE32F-NEXT: .LBB90_6: # %else8
10701 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10702 ; RV64ZVE32F-NEXT: bnez a2, .LBB90_16
10703 ; RV64ZVE32F-NEXT: .LBB90_7: # %else11
10704 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10705 ; RV64ZVE32F-NEXT: beqz a2, .LBB90_9
10706 ; RV64ZVE32F-NEXT: .LBB90_8: # %cond.load13
10707 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10708 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1
10709 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10710 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10711 ; RV64ZVE32F-NEXT: add a2, a0, a2
10712 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10713 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
10714 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
10715 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
10716 ; RV64ZVE32F-NEXT: .LBB90_9: # %else14
10717 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10718 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
10719 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2
10720 ; RV64ZVE32F-NEXT: beqz a2, .LBB90_11
10721 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
10722 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10723 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10724 ; RV64ZVE32F-NEXT: add a2, a0, a2
10725 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10726 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
10727 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
10728 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
10729 ; RV64ZVE32F-NEXT: .LBB90_11: # %else17
10730 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10731 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_13
10732 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
10733 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10734 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10735 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10736 ; RV64ZVE32F-NEXT: slli a1, a1, 2
10737 ; RV64ZVE32F-NEXT: add a0, a0, a1
10738 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
10739 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
10740 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10741 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
10742 ; RV64ZVE32F-NEXT: .LBB90_13: # %else20
10743 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10744 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
10745 ; RV64ZVE32F-NEXT: ret
10746 ; RV64ZVE32F-NEXT: .LBB90_14: # %cond.load4
10747 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10748 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10749 ; RV64ZVE32F-NEXT: add a2, a0, a2
10750 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10751 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
10752 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
10753 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2
10754 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10755 ; RV64ZVE32F-NEXT: beqz a2, .LBB90_6
10756 ; RV64ZVE32F-NEXT: .LBB90_15: # %cond.load7
10757 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
10758 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10759 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10760 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10761 ; RV64ZVE32F-NEXT: add a2, a0, a2
10762 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10763 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
10764 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
10765 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10766 ; RV64ZVE32F-NEXT: beqz a2, .LBB90_7
10767 ; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load10
10768 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
10769 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10770 ; RV64ZVE32F-NEXT: slli a2, a2, 2
10771 ; RV64ZVE32F-NEXT: add a2, a0, a2
10772 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
10773 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
10774 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
10775 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10776 ; RV64ZVE32F-NEXT: bnez a2, .LBB90_8
10777 ; RV64ZVE32F-NEXT: j .LBB90_9
10778 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
10779 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
10783 declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>)
10785 define <1 x double> @mgather_v1f64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x double> %passthru) {
10786 ; RV32V-LABEL: mgather_v1f64:
10788 ; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
10789 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
10790 ; RV32V-NEXT: vmv.v.v v8, v9
10793 ; RV64V-LABEL: mgather_v1f64:
10795 ; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
10796 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
10797 ; RV64V-NEXT: vmv.v.v v8, v9
10800 ; RV32ZVE32F-LABEL: mgather_v1f64:
10801 ; RV32ZVE32F: # %bb.0:
10802 ; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
10803 ; RV32ZVE32F-NEXT: vfirst.m a0, v0
10804 ; RV32ZVE32F-NEXT: bnez a0, .LBB91_2
10805 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
10806 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10807 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10808 ; RV32ZVE32F-NEXT: fld fa0, 0(a0)
10809 ; RV32ZVE32F-NEXT: .LBB91_2: # %else
10810 ; RV32ZVE32F-NEXT: ret
10812 ; RV64ZVE32F-LABEL: mgather_v1f64:
10813 ; RV64ZVE32F: # %bb.0:
10814 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
10815 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
10816 ; RV64ZVE32F-NEXT: bnez a1, .LBB91_2
10817 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
10818 ; RV64ZVE32F-NEXT: fld fa0, 0(a0)
10819 ; RV64ZVE32F-NEXT: .LBB91_2: # %else
10820 ; RV64ZVE32F-NEXT: ret
10821 %v = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> %ptrs, i32 8, <1 x i1> %m, <1 x double> %passthru)
10822 ret <1 x double> %v
10825 declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>)
10827 define <2 x double> @mgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x double> %passthru) {
10828 ; RV32V-LABEL: mgather_v2f64:
10830 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
10831 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
10832 ; RV32V-NEXT: vmv.v.v v8, v9
10835 ; RV64V-LABEL: mgather_v2f64:
10837 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
10838 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
10839 ; RV64V-NEXT: vmv.v.v v8, v9
10842 ; RV32ZVE32F-LABEL: mgather_v2f64:
10843 ; RV32ZVE32F: # %bb.0:
10844 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10845 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
10846 ; RV32ZVE32F-NEXT: andi a1, a0, 1
10847 ; RV32ZVE32F-NEXT: bnez a1, .LBB92_3
10848 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10849 ; RV32ZVE32F-NEXT: andi a0, a0, 2
10850 ; RV32ZVE32F-NEXT: bnez a0, .LBB92_4
10851 ; RV32ZVE32F-NEXT: .LBB92_2: # %else2
10852 ; RV32ZVE32F-NEXT: ret
10853 ; RV32ZVE32F-NEXT: .LBB92_3: # %cond.load
10854 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
10855 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
10856 ; RV32ZVE32F-NEXT: fld fa0, 0(a1)
10857 ; RV32ZVE32F-NEXT: andi a0, a0, 2
10858 ; RV32ZVE32F-NEXT: beqz a0, .LBB92_2
10859 ; RV32ZVE32F-NEXT: .LBB92_4: # %cond.load1
10860 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10861 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10862 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10863 ; RV32ZVE32F-NEXT: fld fa1, 0(a0)
10864 ; RV32ZVE32F-NEXT: ret
10866 ; RV64ZVE32F-LABEL: mgather_v2f64:
10867 ; RV64ZVE32F: # %bb.0:
10868 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10869 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
10870 ; RV64ZVE32F-NEXT: andi a3, a2, 1
10871 ; RV64ZVE32F-NEXT: bnez a3, .LBB92_3
10872 ; RV64ZVE32F-NEXT: # %bb.1: # %else
10873 ; RV64ZVE32F-NEXT: andi a2, a2, 2
10874 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_4
10875 ; RV64ZVE32F-NEXT: .LBB92_2: # %else2
10876 ; RV64ZVE32F-NEXT: ret
10877 ; RV64ZVE32F-NEXT: .LBB92_3: # %cond.load
10878 ; RV64ZVE32F-NEXT: fld fa0, 0(a0)
10879 ; RV64ZVE32F-NEXT: andi a2, a2, 2
10880 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_2
10881 ; RV64ZVE32F-NEXT: .LBB92_4: # %cond.load1
10882 ; RV64ZVE32F-NEXT: fld fa1, 0(a1)
10883 ; RV64ZVE32F-NEXT: ret
10884 %v = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 8, <2 x i1> %m, <2 x double> %passthru)
10885 ret <2 x double> %v
10888 declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>)
10890 define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %passthru) {
10891 ; RV32V-LABEL: mgather_v4f64:
10893 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
10894 ; RV32V-NEXT: vluxei32.v v10, (zero), v8, v0.t
10895 ; RV32V-NEXT: vmv.v.v v8, v10
10898 ; RV64V-LABEL: mgather_v4f64:
10900 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
10901 ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
10902 ; RV64V-NEXT: vmv.v.v v8, v10
10905 ; RV32ZVE32F-LABEL: mgather_v4f64:
10906 ; RV32ZVE32F: # %bb.0:
10907 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10908 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
10909 ; RV32ZVE32F-NEXT: andi a2, a1, 1
10910 ; RV32ZVE32F-NEXT: bnez a2, .LBB93_6
10911 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10912 ; RV32ZVE32F-NEXT: andi a2, a1, 2
10913 ; RV32ZVE32F-NEXT: bnez a2, .LBB93_7
10914 ; RV32ZVE32F-NEXT: .LBB93_2: # %else2
10915 ; RV32ZVE32F-NEXT: andi a2, a1, 4
10916 ; RV32ZVE32F-NEXT: bnez a2, .LBB93_8
10917 ; RV32ZVE32F-NEXT: .LBB93_3: # %else5
10918 ; RV32ZVE32F-NEXT: andi a1, a1, 8
10919 ; RV32ZVE32F-NEXT: beqz a1, .LBB93_5
10920 ; RV32ZVE32F-NEXT: .LBB93_4: # %cond.load7
10921 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10922 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
10923 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
10924 ; RV32ZVE32F-NEXT: fld fa3, 0(a1)
10925 ; RV32ZVE32F-NEXT: .LBB93_5: # %else8
10926 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
10927 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
10928 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
10929 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
10930 ; RV32ZVE32F-NEXT: ret
10931 ; RV32ZVE32F-NEXT: .LBB93_6: # %cond.load
10932 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
10933 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
10934 ; RV32ZVE32F-NEXT: fld fa0, 0(a2)
10935 ; RV32ZVE32F-NEXT: andi a2, a1, 2
10936 ; RV32ZVE32F-NEXT: beqz a2, .LBB93_2
10937 ; RV32ZVE32F-NEXT: .LBB93_7: # %cond.load1
10938 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10939 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
10940 ; RV32ZVE32F-NEXT: vmv.x.s a2, v9
10941 ; RV32ZVE32F-NEXT: fld fa1, 0(a2)
10942 ; RV32ZVE32F-NEXT: andi a2, a1, 4
10943 ; RV32ZVE32F-NEXT: beqz a2, .LBB93_3
10944 ; RV32ZVE32F-NEXT: .LBB93_8: # %cond.load4
10945 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10946 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
10947 ; RV32ZVE32F-NEXT: vmv.x.s a2, v9
10948 ; RV32ZVE32F-NEXT: fld fa2, 0(a2)
10949 ; RV32ZVE32F-NEXT: andi a1, a1, 8
10950 ; RV32ZVE32F-NEXT: bnez a1, .LBB93_4
10951 ; RV32ZVE32F-NEXT: j .LBB93_5
10953 ; RV64ZVE32F-LABEL: mgather_v4f64:
10954 ; RV64ZVE32F: # %bb.0:
10955 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10956 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
10957 ; RV64ZVE32F-NEXT: andi a3, a2, 1
10958 ; RV64ZVE32F-NEXT: bnez a3, .LBB93_6
10959 ; RV64ZVE32F-NEXT: # %bb.1: # %else
10960 ; RV64ZVE32F-NEXT: andi a3, a2, 2
10961 ; RV64ZVE32F-NEXT: bnez a3, .LBB93_7
10962 ; RV64ZVE32F-NEXT: .LBB93_2: # %else2
10963 ; RV64ZVE32F-NEXT: andi a3, a2, 4
10964 ; RV64ZVE32F-NEXT: bnez a3, .LBB93_8
10965 ; RV64ZVE32F-NEXT: .LBB93_3: # %else5
10966 ; RV64ZVE32F-NEXT: andi a2, a2, 8
10967 ; RV64ZVE32F-NEXT: beqz a2, .LBB93_5
10968 ; RV64ZVE32F-NEXT: .LBB93_4: # %cond.load7
10969 ; RV64ZVE32F-NEXT: ld a1, 24(a1)
10970 ; RV64ZVE32F-NEXT: fld fa3, 0(a1)
10971 ; RV64ZVE32F-NEXT: .LBB93_5: # %else8
10972 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
10973 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
10974 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
10975 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
10976 ; RV64ZVE32F-NEXT: ret
10977 ; RV64ZVE32F-NEXT: .LBB93_6: # %cond.load
10978 ; RV64ZVE32F-NEXT: ld a3, 0(a1)
10979 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
10980 ; RV64ZVE32F-NEXT: andi a3, a2, 2
10981 ; RV64ZVE32F-NEXT: beqz a3, .LBB93_2
10982 ; RV64ZVE32F-NEXT: .LBB93_7: # %cond.load1
10983 ; RV64ZVE32F-NEXT: ld a3, 8(a1)
10984 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
10985 ; RV64ZVE32F-NEXT: andi a3, a2, 4
10986 ; RV64ZVE32F-NEXT: beqz a3, .LBB93_3
10987 ; RV64ZVE32F-NEXT: .LBB93_8: # %cond.load4
10988 ; RV64ZVE32F-NEXT: ld a3, 16(a1)
10989 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
10990 ; RV64ZVE32F-NEXT: andi a2, a2, 8
10991 ; RV64ZVE32F-NEXT: bnez a2, .LBB93_4
10992 ; RV64ZVE32F-NEXT: j .LBB93_5
10993 %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x double> %passthru)
10994 ret <4 x double> %v
10997 define <4 x double> @mgather_truemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passthru) {
10998 ; RV32V-LABEL: mgather_truemask_v4f64:
11000 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
11001 ; RV32V-NEXT: vluxei32.v v10, (zero), v8
11002 ; RV32V-NEXT: vmv.v.v v8, v10
11005 ; RV64V-LABEL: mgather_truemask_v4f64:
11007 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
11008 ; RV64V-NEXT: vluxei64.v v8, (zero), v8
11011 ; RV32ZVE32F-LABEL: mgather_truemask_v4f64:
11012 ; RV32ZVE32F: # %bb.0:
11013 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11014 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11015 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
11016 ; RV32ZVE32F-NEXT: fld fa5, 0(a1)
11017 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
11018 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
11019 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
11020 ; RV32ZVE32F-NEXT: fld fa4, 0(a1)
11021 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
11022 ; RV32ZVE32F-NEXT: fld fa3, 0(a1)
11023 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11024 ; RV32ZVE32F-NEXT: fld fa2, 0(a1)
11025 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
11026 ; RV32ZVE32F-NEXT: fsd fa4, 8(a0)
11027 ; RV32ZVE32F-NEXT: fsd fa3, 16(a0)
11028 ; RV32ZVE32F-NEXT: fsd fa2, 24(a0)
11029 ; RV32ZVE32F-NEXT: ret
11031 ; RV64ZVE32F-LABEL: mgather_truemask_v4f64:
11032 ; RV64ZVE32F: # %bb.0:
11033 ; RV64ZVE32F-NEXT: ld a2, 0(a1)
11034 ; RV64ZVE32F-NEXT: ld a3, 8(a1)
11035 ; RV64ZVE32F-NEXT: ld a4, 16(a1)
11036 ; RV64ZVE32F-NEXT: ld a1, 24(a1)
11037 ; RV64ZVE32F-NEXT: fld fa5, 0(a2)
11038 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
11039 ; RV64ZVE32F-NEXT: fld fa3, 0(a4)
11040 ; RV64ZVE32F-NEXT: fld fa2, 0(a1)
11041 ; RV64ZVE32F-NEXT: fsd fa5, 0(a0)
11042 ; RV64ZVE32F-NEXT: fsd fa4, 8(a0)
11043 ; RV64ZVE32F-NEXT: fsd fa3, 16(a0)
11044 ; RV64ZVE32F-NEXT: fsd fa2, 24(a0)
11045 ; RV64ZVE32F-NEXT: ret
11046 %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x double> %passthru)
11047 ret <4 x double> %v
11050 define <4 x double> @mgather_falsemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passthru) {
11051 ; RV32V-LABEL: mgather_falsemask_v4f64:
11053 ; RV32V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11054 ; RV32V-NEXT: vmv2r.v v8, v10
11057 ; RV64V-LABEL: mgather_falsemask_v4f64:
11059 ; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11060 ; RV64V-NEXT: vmv2r.v v8, v10
11063 ; RV32ZVE32F-LABEL: mgather_falsemask_v4f64:
11064 ; RV32ZVE32F: # %bb.0:
11065 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11066 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
11067 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
11068 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
11069 ; RV32ZVE32F-NEXT: ret
11071 ; RV64ZVE32F-LABEL: mgather_falsemask_v4f64:
11072 ; RV64ZVE32F: # %bb.0:
11073 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
11074 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
11075 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
11076 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
11077 ; RV64ZVE32F-NEXT: ret
11078 %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x double> %passthru)
11079 ret <4 x double> %v
11082 declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>)
11084 define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %passthru) {
11085 ; RV32V-LABEL: mgather_v8f64:
11087 ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
11088 ; RV32V-NEXT: vluxei32.v v12, (zero), v8, v0.t
11089 ; RV32V-NEXT: vmv.v.v v8, v12
11092 ; RV64V-LABEL: mgather_v8f64:
11094 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
11095 ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
11096 ; RV64V-NEXT: vmv.v.v v8, v12
11099 ; RV32ZVE32F-LABEL: mgather_v8f64:
11100 ; RV32ZVE32F: # %bb.0:
11101 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11102 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
11103 ; RV32ZVE32F-NEXT: andi a2, a1, 1
11104 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_10
11105 ; RV32ZVE32F-NEXT: # %bb.1: # %else
11106 ; RV32ZVE32F-NEXT: andi a2, a1, 2
11107 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_11
11108 ; RV32ZVE32F-NEXT: .LBB96_2: # %else2
11109 ; RV32ZVE32F-NEXT: andi a2, a1, 4
11110 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_12
11111 ; RV32ZVE32F-NEXT: .LBB96_3: # %else5
11112 ; RV32ZVE32F-NEXT: andi a2, a1, 8
11113 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_13
11114 ; RV32ZVE32F-NEXT: .LBB96_4: # %else8
11115 ; RV32ZVE32F-NEXT: andi a2, a1, 16
11116 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_14
11117 ; RV32ZVE32F-NEXT: .LBB96_5: # %else11
11118 ; RV32ZVE32F-NEXT: andi a2, a1, 32
11119 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_15
11120 ; RV32ZVE32F-NEXT: .LBB96_6: # %else14
11121 ; RV32ZVE32F-NEXT: andi a2, a1, 64
11122 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_16
11123 ; RV32ZVE32F-NEXT: .LBB96_7: # %else17
11124 ; RV32ZVE32F-NEXT: andi a1, a1, -128
11125 ; RV32ZVE32F-NEXT: beqz a1, .LBB96_9
11126 ; RV32ZVE32F-NEXT: .LBB96_8: # %cond.load19
11127 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11128 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11129 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11130 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
11131 ; RV32ZVE32F-NEXT: .LBB96_9: # %else20
11132 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11133 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
11134 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
11135 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
11136 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
11137 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
11138 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
11139 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
11140 ; RV32ZVE32F-NEXT: ret
11141 ; RV32ZVE32F-NEXT: .LBB96_10: # %cond.load
11142 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
11143 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
11144 ; RV32ZVE32F-NEXT: fld fa0, 0(a2)
11145 ; RV32ZVE32F-NEXT: andi a2, a1, 2
11146 ; RV32ZVE32F-NEXT: beqz a2, .LBB96_2
11147 ; RV32ZVE32F-NEXT: .LBB96_11: # %cond.load1
11148 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11149 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11150 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11151 ; RV32ZVE32F-NEXT: fld fa1, 0(a2)
11152 ; RV32ZVE32F-NEXT: andi a2, a1, 4
11153 ; RV32ZVE32F-NEXT: beqz a2, .LBB96_3
11154 ; RV32ZVE32F-NEXT: .LBB96_12: # %cond.load4
11155 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11156 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
11157 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11158 ; RV32ZVE32F-NEXT: fld fa2, 0(a2)
11159 ; RV32ZVE32F-NEXT: andi a2, a1, 8
11160 ; RV32ZVE32F-NEXT: beqz a2, .LBB96_4
11161 ; RV32ZVE32F-NEXT: .LBB96_13: # %cond.load7
11162 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11163 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
11164 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11165 ; RV32ZVE32F-NEXT: fld fa3, 0(a2)
11166 ; RV32ZVE32F-NEXT: andi a2, a1, 16
11167 ; RV32ZVE32F-NEXT: beqz a2, .LBB96_5
11168 ; RV32ZVE32F-NEXT: .LBB96_14: # %cond.load10
11169 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11170 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11171 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11172 ; RV32ZVE32F-NEXT: fld fa4, 0(a2)
11173 ; RV32ZVE32F-NEXT: andi a2, a1, 32
11174 ; RV32ZVE32F-NEXT: beqz a2, .LBB96_6
11175 ; RV32ZVE32F-NEXT: .LBB96_15: # %cond.load13
11176 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11177 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
11178 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11179 ; RV32ZVE32F-NEXT: fld fa5, 0(a2)
11180 ; RV32ZVE32F-NEXT: andi a2, a1, 64
11181 ; RV32ZVE32F-NEXT: beqz a2, .LBB96_7
11182 ; RV32ZVE32F-NEXT: .LBB96_16: # %cond.load16
11183 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11184 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
11185 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11186 ; RV32ZVE32F-NEXT: fld fa6, 0(a2)
11187 ; RV32ZVE32F-NEXT: andi a1, a1, -128
11188 ; RV32ZVE32F-NEXT: bnez a1, .LBB96_8
11189 ; RV32ZVE32F-NEXT: j .LBB96_9
11191 ; RV64ZVE32F-LABEL: mgather_v8f64:
11192 ; RV64ZVE32F: # %bb.0:
11193 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11194 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
11195 ; RV64ZVE32F-NEXT: andi a3, a2, 1
11196 ; RV64ZVE32F-NEXT: bnez a3, .LBB96_10
11197 ; RV64ZVE32F-NEXT: # %bb.1: # %else
11198 ; RV64ZVE32F-NEXT: andi a3, a2, 2
11199 ; RV64ZVE32F-NEXT: bnez a3, .LBB96_11
11200 ; RV64ZVE32F-NEXT: .LBB96_2: # %else2
11201 ; RV64ZVE32F-NEXT: andi a3, a2, 4
11202 ; RV64ZVE32F-NEXT: bnez a3, .LBB96_12
11203 ; RV64ZVE32F-NEXT: .LBB96_3: # %else5
11204 ; RV64ZVE32F-NEXT: andi a3, a2, 8
11205 ; RV64ZVE32F-NEXT: bnez a3, .LBB96_13
11206 ; RV64ZVE32F-NEXT: .LBB96_4: # %else8
11207 ; RV64ZVE32F-NEXT: andi a3, a2, 16
11208 ; RV64ZVE32F-NEXT: bnez a3, .LBB96_14
11209 ; RV64ZVE32F-NEXT: .LBB96_5: # %else11
11210 ; RV64ZVE32F-NEXT: andi a3, a2, 32
11211 ; RV64ZVE32F-NEXT: bnez a3, .LBB96_15
11212 ; RV64ZVE32F-NEXT: .LBB96_6: # %else14
11213 ; RV64ZVE32F-NEXT: andi a3, a2, 64
11214 ; RV64ZVE32F-NEXT: bnez a3, .LBB96_16
11215 ; RV64ZVE32F-NEXT: .LBB96_7: # %else17
11216 ; RV64ZVE32F-NEXT: andi a2, a2, -128
11217 ; RV64ZVE32F-NEXT: beqz a2, .LBB96_9
11218 ; RV64ZVE32F-NEXT: .LBB96_8: # %cond.load19
11219 ; RV64ZVE32F-NEXT: ld a1, 56(a1)
11220 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
11221 ; RV64ZVE32F-NEXT: .LBB96_9: # %else20
11222 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
11223 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
11224 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
11225 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
11226 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
11227 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
11228 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
11229 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
11230 ; RV64ZVE32F-NEXT: ret
11231 ; RV64ZVE32F-NEXT: .LBB96_10: # %cond.load
11232 ; RV64ZVE32F-NEXT: ld a3, 0(a1)
11233 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
11234 ; RV64ZVE32F-NEXT: andi a3, a2, 2
11235 ; RV64ZVE32F-NEXT: beqz a3, .LBB96_2
11236 ; RV64ZVE32F-NEXT: .LBB96_11: # %cond.load1
11237 ; RV64ZVE32F-NEXT: ld a3, 8(a1)
11238 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
11239 ; RV64ZVE32F-NEXT: andi a3, a2, 4
11240 ; RV64ZVE32F-NEXT: beqz a3, .LBB96_3
11241 ; RV64ZVE32F-NEXT: .LBB96_12: # %cond.load4
11242 ; RV64ZVE32F-NEXT: ld a3, 16(a1)
11243 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
11244 ; RV64ZVE32F-NEXT: andi a3, a2, 8
11245 ; RV64ZVE32F-NEXT: beqz a3, .LBB96_4
11246 ; RV64ZVE32F-NEXT: .LBB96_13: # %cond.load7
11247 ; RV64ZVE32F-NEXT: ld a3, 24(a1)
11248 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
11249 ; RV64ZVE32F-NEXT: andi a3, a2, 16
11250 ; RV64ZVE32F-NEXT: beqz a3, .LBB96_5
11251 ; RV64ZVE32F-NEXT: .LBB96_14: # %cond.load10
11252 ; RV64ZVE32F-NEXT: ld a3, 32(a1)
11253 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
11254 ; RV64ZVE32F-NEXT: andi a3, a2, 32
11255 ; RV64ZVE32F-NEXT: beqz a3, .LBB96_6
11256 ; RV64ZVE32F-NEXT: .LBB96_15: # %cond.load13
11257 ; RV64ZVE32F-NEXT: ld a3, 40(a1)
11258 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
11259 ; RV64ZVE32F-NEXT: andi a3, a2, 64
11260 ; RV64ZVE32F-NEXT: beqz a3, .LBB96_7
11261 ; RV64ZVE32F-NEXT: .LBB96_16: # %cond.load16
11262 ; RV64ZVE32F-NEXT: ld a3, 48(a1)
11263 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
11264 ; RV64ZVE32F-NEXT: andi a2, a2, -128
11265 ; RV64ZVE32F-NEXT: bnez a2, .LBB96_8
11266 ; RV64ZVE32F-NEXT: j .LBB96_9
11267 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
11268 ret <8 x double> %v
11271 define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
11272 ; RV32V-LABEL: mgather_baseidx_v8i8_v8f64:
11274 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11275 ; RV32V-NEXT: vsext.vf4 v10, v8
11276 ; RV32V-NEXT: vsll.vi v8, v10, 3
11277 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
11278 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
11279 ; RV32V-NEXT: vmv.v.v v8, v12
11282 ; RV64V-LABEL: mgather_baseidx_v8i8_v8f64:
11284 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
11285 ; RV64V-NEXT: vsext.vf8 v16, v8
11286 ; RV64V-NEXT: vsll.vi v8, v16, 3
11287 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
11288 ; RV64V-NEXT: vmv.v.v v8, v12
11291 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64:
11292 ; RV32ZVE32F: # %bb.0:
11293 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11294 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
11295 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
11296 ; RV32ZVE32F-NEXT: vmv.x.s a2, v0
11297 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
11298 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
11299 ; RV32ZVE32F-NEXT: andi a3, a2, 1
11300 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
11301 ; RV32ZVE32F-NEXT: bnez a3, .LBB97_10
11302 ; RV32ZVE32F-NEXT: # %bb.1: # %else
11303 ; RV32ZVE32F-NEXT: andi a1, a2, 2
11304 ; RV32ZVE32F-NEXT: bnez a1, .LBB97_11
11305 ; RV32ZVE32F-NEXT: .LBB97_2: # %else2
11306 ; RV32ZVE32F-NEXT: andi a1, a2, 4
11307 ; RV32ZVE32F-NEXT: bnez a1, .LBB97_12
11308 ; RV32ZVE32F-NEXT: .LBB97_3: # %else5
11309 ; RV32ZVE32F-NEXT: andi a1, a2, 8
11310 ; RV32ZVE32F-NEXT: bnez a1, .LBB97_13
11311 ; RV32ZVE32F-NEXT: .LBB97_4: # %else8
11312 ; RV32ZVE32F-NEXT: andi a1, a2, 16
11313 ; RV32ZVE32F-NEXT: bnez a1, .LBB97_14
11314 ; RV32ZVE32F-NEXT: .LBB97_5: # %else11
11315 ; RV32ZVE32F-NEXT: andi a1, a2, 32
11316 ; RV32ZVE32F-NEXT: bnez a1, .LBB97_15
11317 ; RV32ZVE32F-NEXT: .LBB97_6: # %else14
11318 ; RV32ZVE32F-NEXT: andi a1, a2, 64
11319 ; RV32ZVE32F-NEXT: bnez a1, .LBB97_16
11320 ; RV32ZVE32F-NEXT: .LBB97_7: # %else17
11321 ; RV32ZVE32F-NEXT: andi a1, a2, -128
11322 ; RV32ZVE32F-NEXT: beqz a1, .LBB97_9
11323 ; RV32ZVE32F-NEXT: .LBB97_8: # %cond.load19
11324 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11325 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11326 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11327 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
11328 ; RV32ZVE32F-NEXT: .LBB97_9: # %else20
11329 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11330 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
11331 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
11332 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
11333 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
11334 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
11335 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
11336 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
11337 ; RV32ZVE32F-NEXT: ret
11338 ; RV32ZVE32F-NEXT: .LBB97_10: # %cond.load
11339 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11340 ; RV32ZVE32F-NEXT: fld fa0, 0(a1)
11341 ; RV32ZVE32F-NEXT: andi a1, a2, 2
11342 ; RV32ZVE32F-NEXT: beqz a1, .LBB97_2
11343 ; RV32ZVE32F-NEXT: .LBB97_11: # %cond.load1
11344 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11345 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11346 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11347 ; RV32ZVE32F-NEXT: fld fa1, 0(a1)
11348 ; RV32ZVE32F-NEXT: andi a1, a2, 4
11349 ; RV32ZVE32F-NEXT: beqz a1, .LBB97_3
11350 ; RV32ZVE32F-NEXT: .LBB97_12: # %cond.load4
11351 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11352 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
11353 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11354 ; RV32ZVE32F-NEXT: fld fa2, 0(a1)
11355 ; RV32ZVE32F-NEXT: andi a1, a2, 8
11356 ; RV32ZVE32F-NEXT: beqz a1, .LBB97_4
11357 ; RV32ZVE32F-NEXT: .LBB97_13: # %cond.load7
11358 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11359 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
11360 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11361 ; RV32ZVE32F-NEXT: fld fa3, 0(a1)
11362 ; RV32ZVE32F-NEXT: andi a1, a2, 16
11363 ; RV32ZVE32F-NEXT: beqz a1, .LBB97_5
11364 ; RV32ZVE32F-NEXT: .LBB97_14: # %cond.load10
11365 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11366 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11367 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11368 ; RV32ZVE32F-NEXT: fld fa4, 0(a1)
11369 ; RV32ZVE32F-NEXT: andi a1, a2, 32
11370 ; RV32ZVE32F-NEXT: beqz a1, .LBB97_6
11371 ; RV32ZVE32F-NEXT: .LBB97_15: # %cond.load13
11372 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11373 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
11374 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11375 ; RV32ZVE32F-NEXT: fld fa5, 0(a1)
11376 ; RV32ZVE32F-NEXT: andi a1, a2, 64
11377 ; RV32ZVE32F-NEXT: beqz a1, .LBB97_7
11378 ; RV32ZVE32F-NEXT: .LBB97_16: # %cond.load16
11379 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11380 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
11381 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11382 ; RV32ZVE32F-NEXT: fld fa6, 0(a1)
11383 ; RV32ZVE32F-NEXT: andi a1, a2, -128
11384 ; RV32ZVE32F-NEXT: bnez a1, .LBB97_8
11385 ; RV32ZVE32F-NEXT: j .LBB97_9
11387 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64:
11388 ; RV64ZVE32F: # %bb.0:
11389 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11390 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
11391 ; RV64ZVE32F-NEXT: andi a3, a2, 1
11392 ; RV64ZVE32F-NEXT: beqz a3, .LBB97_2
11393 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
11394 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11395 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11396 ; RV64ZVE32F-NEXT: add a3, a1, a3
11397 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
11398 ; RV64ZVE32F-NEXT: .LBB97_2: # %else
11399 ; RV64ZVE32F-NEXT: andi a3, a2, 2
11400 ; RV64ZVE32F-NEXT: beqz a3, .LBB97_4
11401 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
11402 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11403 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
11404 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
11405 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11406 ; RV64ZVE32F-NEXT: add a3, a1, a3
11407 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
11408 ; RV64ZVE32F-NEXT: .LBB97_4: # %else2
11409 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
11410 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
11411 ; RV64ZVE32F-NEXT: andi a3, a2, 4
11412 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
11413 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
11414 ; RV64ZVE32F-NEXT: bnez a3, .LBB97_14
11415 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
11416 ; RV64ZVE32F-NEXT: andi a3, a2, 8
11417 ; RV64ZVE32F-NEXT: bnez a3, .LBB97_15
11418 ; RV64ZVE32F-NEXT: .LBB97_6: # %else8
11419 ; RV64ZVE32F-NEXT: andi a3, a2, 16
11420 ; RV64ZVE32F-NEXT: bnez a3, .LBB97_16
11421 ; RV64ZVE32F-NEXT: .LBB97_7: # %else11
11422 ; RV64ZVE32F-NEXT: andi a3, a2, 32
11423 ; RV64ZVE32F-NEXT: beqz a3, .LBB97_9
11424 ; RV64ZVE32F-NEXT: .LBB97_8: # %cond.load13
11425 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
11426 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11427 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11428 ; RV64ZVE32F-NEXT: add a3, a1, a3
11429 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
11430 ; RV64ZVE32F-NEXT: .LBB97_9: # %else14
11431 ; RV64ZVE32F-NEXT: andi a3, a2, 64
11432 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
11433 ; RV64ZVE32F-NEXT: beqz a3, .LBB97_11
11434 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
11435 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11436 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11437 ; RV64ZVE32F-NEXT: add a3, a1, a3
11438 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
11439 ; RV64ZVE32F-NEXT: .LBB97_11: # %else17
11440 ; RV64ZVE32F-NEXT: andi a2, a2, -128
11441 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_13
11442 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
11443 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11444 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11445 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11446 ; RV64ZVE32F-NEXT: add a1, a1, a2
11447 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
11448 ; RV64ZVE32F-NEXT: .LBB97_13: # %else20
11449 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
11450 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
11451 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
11452 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
11453 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
11454 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
11455 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
11456 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
11457 ; RV64ZVE32F-NEXT: ret
11458 ; RV64ZVE32F-NEXT: .LBB97_14: # %cond.load4
11459 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11460 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11461 ; RV64ZVE32F-NEXT: add a3, a1, a3
11462 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
11463 ; RV64ZVE32F-NEXT: andi a3, a2, 8
11464 ; RV64ZVE32F-NEXT: beqz a3, .LBB97_6
11465 ; RV64ZVE32F-NEXT: .LBB97_15: # %cond.load7
11466 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11467 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11468 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11469 ; RV64ZVE32F-NEXT: add a3, a1, a3
11470 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
11471 ; RV64ZVE32F-NEXT: andi a3, a2, 16
11472 ; RV64ZVE32F-NEXT: beqz a3, .LBB97_7
11473 ; RV64ZVE32F-NEXT: .LBB97_16: # %cond.load10
11474 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
11475 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11476 ; RV64ZVE32F-NEXT: add a3, a1, a3
11477 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
11478 ; RV64ZVE32F-NEXT: andi a3, a2, 32
11479 ; RV64ZVE32F-NEXT: bnez a3, .LBB97_8
11480 ; RV64ZVE32F-NEXT: j .LBB97_9
11481 %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
11482 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
11483 ret <8 x double> %v
11486 define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
11487 ; RV32V-LABEL: mgather_baseidx_sext_v8i8_v8f64:
11489 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11490 ; RV32V-NEXT: vsext.vf4 v10, v8
11491 ; RV32V-NEXT: vsll.vi v8, v10, 3
11492 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
11493 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
11494 ; RV32V-NEXT: vmv.v.v v8, v12
11497 ; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f64:
11499 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
11500 ; RV64V-NEXT: vsext.vf8 v16, v8
11501 ; RV64V-NEXT: vsll.vi v8, v16, 3
11502 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
11503 ; RV64V-NEXT: vmv.v.v v8, v12
11506 ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64:
11507 ; RV32ZVE32F: # %bb.0:
11508 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11509 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
11510 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
11511 ; RV32ZVE32F-NEXT: vmv.x.s a2, v0
11512 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
11513 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
11514 ; RV32ZVE32F-NEXT: andi a3, a2, 1
11515 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
11516 ; RV32ZVE32F-NEXT: bnez a3, .LBB98_10
11517 ; RV32ZVE32F-NEXT: # %bb.1: # %else
11518 ; RV32ZVE32F-NEXT: andi a1, a2, 2
11519 ; RV32ZVE32F-NEXT: bnez a1, .LBB98_11
11520 ; RV32ZVE32F-NEXT: .LBB98_2: # %else2
11521 ; RV32ZVE32F-NEXT: andi a1, a2, 4
11522 ; RV32ZVE32F-NEXT: bnez a1, .LBB98_12
11523 ; RV32ZVE32F-NEXT: .LBB98_3: # %else5
11524 ; RV32ZVE32F-NEXT: andi a1, a2, 8
11525 ; RV32ZVE32F-NEXT: bnez a1, .LBB98_13
11526 ; RV32ZVE32F-NEXT: .LBB98_4: # %else8
11527 ; RV32ZVE32F-NEXT: andi a1, a2, 16
11528 ; RV32ZVE32F-NEXT: bnez a1, .LBB98_14
11529 ; RV32ZVE32F-NEXT: .LBB98_5: # %else11
11530 ; RV32ZVE32F-NEXT: andi a1, a2, 32
11531 ; RV32ZVE32F-NEXT: bnez a1, .LBB98_15
11532 ; RV32ZVE32F-NEXT: .LBB98_6: # %else14
11533 ; RV32ZVE32F-NEXT: andi a1, a2, 64
11534 ; RV32ZVE32F-NEXT: bnez a1, .LBB98_16
11535 ; RV32ZVE32F-NEXT: .LBB98_7: # %else17
11536 ; RV32ZVE32F-NEXT: andi a1, a2, -128
11537 ; RV32ZVE32F-NEXT: beqz a1, .LBB98_9
11538 ; RV32ZVE32F-NEXT: .LBB98_8: # %cond.load19
11539 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11540 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11541 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11542 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
11543 ; RV32ZVE32F-NEXT: .LBB98_9: # %else20
11544 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11545 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
11546 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
11547 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
11548 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
11549 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
11550 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
11551 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
11552 ; RV32ZVE32F-NEXT: ret
11553 ; RV32ZVE32F-NEXT: .LBB98_10: # %cond.load
11554 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11555 ; RV32ZVE32F-NEXT: fld fa0, 0(a1)
11556 ; RV32ZVE32F-NEXT: andi a1, a2, 2
11557 ; RV32ZVE32F-NEXT: beqz a1, .LBB98_2
11558 ; RV32ZVE32F-NEXT: .LBB98_11: # %cond.load1
11559 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11560 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11561 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11562 ; RV32ZVE32F-NEXT: fld fa1, 0(a1)
11563 ; RV32ZVE32F-NEXT: andi a1, a2, 4
11564 ; RV32ZVE32F-NEXT: beqz a1, .LBB98_3
11565 ; RV32ZVE32F-NEXT: .LBB98_12: # %cond.load4
11566 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11567 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
11568 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11569 ; RV32ZVE32F-NEXT: fld fa2, 0(a1)
11570 ; RV32ZVE32F-NEXT: andi a1, a2, 8
11571 ; RV32ZVE32F-NEXT: beqz a1, .LBB98_4
11572 ; RV32ZVE32F-NEXT: .LBB98_13: # %cond.load7
11573 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11574 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
11575 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11576 ; RV32ZVE32F-NEXT: fld fa3, 0(a1)
11577 ; RV32ZVE32F-NEXT: andi a1, a2, 16
11578 ; RV32ZVE32F-NEXT: beqz a1, .LBB98_5
11579 ; RV32ZVE32F-NEXT: .LBB98_14: # %cond.load10
11580 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11581 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11582 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11583 ; RV32ZVE32F-NEXT: fld fa4, 0(a1)
11584 ; RV32ZVE32F-NEXT: andi a1, a2, 32
11585 ; RV32ZVE32F-NEXT: beqz a1, .LBB98_6
11586 ; RV32ZVE32F-NEXT: .LBB98_15: # %cond.load13
11587 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11588 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
11589 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11590 ; RV32ZVE32F-NEXT: fld fa5, 0(a1)
11591 ; RV32ZVE32F-NEXT: andi a1, a2, 64
11592 ; RV32ZVE32F-NEXT: beqz a1, .LBB98_7
11593 ; RV32ZVE32F-NEXT: .LBB98_16: # %cond.load16
11594 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11595 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
11596 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11597 ; RV32ZVE32F-NEXT: fld fa6, 0(a1)
11598 ; RV32ZVE32F-NEXT: andi a1, a2, -128
11599 ; RV32ZVE32F-NEXT: bnez a1, .LBB98_8
11600 ; RV32ZVE32F-NEXT: j .LBB98_9
11602 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64:
11603 ; RV64ZVE32F: # %bb.0:
11604 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11605 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
11606 ; RV64ZVE32F-NEXT: andi a3, a2, 1
11607 ; RV64ZVE32F-NEXT: beqz a3, .LBB98_2
11608 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
11609 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11610 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11611 ; RV64ZVE32F-NEXT: add a3, a1, a3
11612 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
11613 ; RV64ZVE32F-NEXT: .LBB98_2: # %else
11614 ; RV64ZVE32F-NEXT: andi a3, a2, 2
11615 ; RV64ZVE32F-NEXT: beqz a3, .LBB98_4
11616 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
11617 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11618 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
11619 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
11620 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11621 ; RV64ZVE32F-NEXT: add a3, a1, a3
11622 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
11623 ; RV64ZVE32F-NEXT: .LBB98_4: # %else2
11624 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
11625 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
11626 ; RV64ZVE32F-NEXT: andi a3, a2, 4
11627 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
11628 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
11629 ; RV64ZVE32F-NEXT: bnez a3, .LBB98_14
11630 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
11631 ; RV64ZVE32F-NEXT: andi a3, a2, 8
11632 ; RV64ZVE32F-NEXT: bnez a3, .LBB98_15
11633 ; RV64ZVE32F-NEXT: .LBB98_6: # %else8
11634 ; RV64ZVE32F-NEXT: andi a3, a2, 16
11635 ; RV64ZVE32F-NEXT: bnez a3, .LBB98_16
11636 ; RV64ZVE32F-NEXT: .LBB98_7: # %else11
11637 ; RV64ZVE32F-NEXT: andi a3, a2, 32
11638 ; RV64ZVE32F-NEXT: beqz a3, .LBB98_9
11639 ; RV64ZVE32F-NEXT: .LBB98_8: # %cond.load13
11640 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
11641 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11642 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11643 ; RV64ZVE32F-NEXT: add a3, a1, a3
11644 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
11645 ; RV64ZVE32F-NEXT: .LBB98_9: # %else14
11646 ; RV64ZVE32F-NEXT: andi a3, a2, 64
11647 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
11648 ; RV64ZVE32F-NEXT: beqz a3, .LBB98_11
11649 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
11650 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11651 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11652 ; RV64ZVE32F-NEXT: add a3, a1, a3
11653 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
11654 ; RV64ZVE32F-NEXT: .LBB98_11: # %else17
11655 ; RV64ZVE32F-NEXT: andi a2, a2, -128
11656 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_13
11657 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
11658 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11659 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11660 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11661 ; RV64ZVE32F-NEXT: add a1, a1, a2
11662 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
11663 ; RV64ZVE32F-NEXT: .LBB98_13: # %else20
11664 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
11665 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
11666 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
11667 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
11668 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
11669 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
11670 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
11671 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
11672 ; RV64ZVE32F-NEXT: ret
11673 ; RV64ZVE32F-NEXT: .LBB98_14: # %cond.load4
11674 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11675 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11676 ; RV64ZVE32F-NEXT: add a3, a1, a3
11677 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
11678 ; RV64ZVE32F-NEXT: andi a3, a2, 8
11679 ; RV64ZVE32F-NEXT: beqz a3, .LBB98_6
11680 ; RV64ZVE32F-NEXT: .LBB98_15: # %cond.load7
11681 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11682 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11683 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11684 ; RV64ZVE32F-NEXT: add a3, a1, a3
11685 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
11686 ; RV64ZVE32F-NEXT: andi a3, a2, 16
11687 ; RV64ZVE32F-NEXT: beqz a3, .LBB98_7
11688 ; RV64ZVE32F-NEXT: .LBB98_16: # %cond.load10
11689 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
11690 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11691 ; RV64ZVE32F-NEXT: add a3, a1, a3
11692 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
11693 ; RV64ZVE32F-NEXT: andi a3, a2, 32
11694 ; RV64ZVE32F-NEXT: bnez a3, .LBB98_8
11695 ; RV64ZVE32F-NEXT: j .LBB98_9
11696 %eidxs = sext <8 x i8> %idxs to <8 x i64>
11697 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
11698 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
11699 ret <8 x double> %v
11702 define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
11703 ; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8f64:
11705 ; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11706 ; RV32V-NEXT: vzext.vf2 v9, v8
11707 ; RV32V-NEXT: vsll.vi v8, v9, 3
11708 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
11709 ; RV32V-NEXT: vluxei16.v v12, (a0), v8, v0.t
11710 ; RV32V-NEXT: vmv.v.v v8, v12
11713 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f64:
11715 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11716 ; RV64V-NEXT: vzext.vf2 v9, v8
11717 ; RV64V-NEXT: vsll.vi v8, v9, 3
11718 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
11719 ; RV64V-NEXT: vluxei16.v v12, (a0), v8, v0.t
11720 ; RV64V-NEXT: vmv.v.v v8, v12
11723 ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64:
11724 ; RV32ZVE32F: # %bb.0:
11725 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11726 ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
11727 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
11728 ; RV32ZVE32F-NEXT: vmv.x.s a2, v0
11729 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
11730 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
11731 ; RV32ZVE32F-NEXT: andi a3, a2, 1
11732 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
11733 ; RV32ZVE32F-NEXT: bnez a3, .LBB99_10
11734 ; RV32ZVE32F-NEXT: # %bb.1: # %else
11735 ; RV32ZVE32F-NEXT: andi a1, a2, 2
11736 ; RV32ZVE32F-NEXT: bnez a1, .LBB99_11
11737 ; RV32ZVE32F-NEXT: .LBB99_2: # %else2
11738 ; RV32ZVE32F-NEXT: andi a1, a2, 4
11739 ; RV32ZVE32F-NEXT: bnez a1, .LBB99_12
11740 ; RV32ZVE32F-NEXT: .LBB99_3: # %else5
11741 ; RV32ZVE32F-NEXT: andi a1, a2, 8
11742 ; RV32ZVE32F-NEXT: bnez a1, .LBB99_13
11743 ; RV32ZVE32F-NEXT: .LBB99_4: # %else8
11744 ; RV32ZVE32F-NEXT: andi a1, a2, 16
11745 ; RV32ZVE32F-NEXT: bnez a1, .LBB99_14
11746 ; RV32ZVE32F-NEXT: .LBB99_5: # %else11
11747 ; RV32ZVE32F-NEXT: andi a1, a2, 32
11748 ; RV32ZVE32F-NEXT: bnez a1, .LBB99_15
11749 ; RV32ZVE32F-NEXT: .LBB99_6: # %else14
11750 ; RV32ZVE32F-NEXT: andi a1, a2, 64
11751 ; RV32ZVE32F-NEXT: bnez a1, .LBB99_16
11752 ; RV32ZVE32F-NEXT: .LBB99_7: # %else17
11753 ; RV32ZVE32F-NEXT: andi a1, a2, -128
11754 ; RV32ZVE32F-NEXT: beqz a1, .LBB99_9
11755 ; RV32ZVE32F-NEXT: .LBB99_8: # %cond.load19
11756 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11757 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11758 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11759 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
11760 ; RV32ZVE32F-NEXT: .LBB99_9: # %else20
11761 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11762 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
11763 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
11764 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
11765 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
11766 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
11767 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
11768 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
11769 ; RV32ZVE32F-NEXT: ret
11770 ; RV32ZVE32F-NEXT: .LBB99_10: # %cond.load
11771 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11772 ; RV32ZVE32F-NEXT: fld fa0, 0(a1)
11773 ; RV32ZVE32F-NEXT: andi a1, a2, 2
11774 ; RV32ZVE32F-NEXT: beqz a1, .LBB99_2
11775 ; RV32ZVE32F-NEXT: .LBB99_11: # %cond.load1
11776 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11777 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11778 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11779 ; RV32ZVE32F-NEXT: fld fa1, 0(a1)
11780 ; RV32ZVE32F-NEXT: andi a1, a2, 4
11781 ; RV32ZVE32F-NEXT: beqz a1, .LBB99_3
11782 ; RV32ZVE32F-NEXT: .LBB99_12: # %cond.load4
11783 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11784 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
11785 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11786 ; RV32ZVE32F-NEXT: fld fa2, 0(a1)
11787 ; RV32ZVE32F-NEXT: andi a1, a2, 8
11788 ; RV32ZVE32F-NEXT: beqz a1, .LBB99_4
11789 ; RV32ZVE32F-NEXT: .LBB99_13: # %cond.load7
11790 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11791 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
11792 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11793 ; RV32ZVE32F-NEXT: fld fa3, 0(a1)
11794 ; RV32ZVE32F-NEXT: andi a1, a2, 16
11795 ; RV32ZVE32F-NEXT: beqz a1, .LBB99_5
11796 ; RV32ZVE32F-NEXT: .LBB99_14: # %cond.load10
11797 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11798 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11799 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11800 ; RV32ZVE32F-NEXT: fld fa4, 0(a1)
11801 ; RV32ZVE32F-NEXT: andi a1, a2, 32
11802 ; RV32ZVE32F-NEXT: beqz a1, .LBB99_6
11803 ; RV32ZVE32F-NEXT: .LBB99_15: # %cond.load13
11804 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11805 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
11806 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11807 ; RV32ZVE32F-NEXT: fld fa5, 0(a1)
11808 ; RV32ZVE32F-NEXT: andi a1, a2, 64
11809 ; RV32ZVE32F-NEXT: beqz a1, .LBB99_7
11810 ; RV32ZVE32F-NEXT: .LBB99_16: # %cond.load16
11811 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11812 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
11813 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
11814 ; RV32ZVE32F-NEXT: fld fa6, 0(a1)
11815 ; RV32ZVE32F-NEXT: andi a1, a2, -128
11816 ; RV32ZVE32F-NEXT: bnez a1, .LBB99_8
11817 ; RV32ZVE32F-NEXT: j .LBB99_9
11819 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64:
11820 ; RV64ZVE32F: # %bb.0:
11821 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11822 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
11823 ; RV64ZVE32F-NEXT: andi a3, a2, 1
11824 ; RV64ZVE32F-NEXT: beqz a3, .LBB99_2
11825 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
11826 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11827 ; RV64ZVE32F-NEXT: andi a3, a3, 255
11828 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11829 ; RV64ZVE32F-NEXT: add a3, a1, a3
11830 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
11831 ; RV64ZVE32F-NEXT: .LBB99_2: # %else
11832 ; RV64ZVE32F-NEXT: andi a3, a2, 2
11833 ; RV64ZVE32F-NEXT: beqz a3, .LBB99_4
11834 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
11835 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11836 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
11837 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
11838 ; RV64ZVE32F-NEXT: andi a3, a3, 255
11839 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11840 ; RV64ZVE32F-NEXT: add a3, a1, a3
11841 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
11842 ; RV64ZVE32F-NEXT: .LBB99_4: # %else2
11843 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
11844 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
11845 ; RV64ZVE32F-NEXT: andi a3, a2, 4
11846 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
11847 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
11848 ; RV64ZVE32F-NEXT: bnez a3, .LBB99_14
11849 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
11850 ; RV64ZVE32F-NEXT: andi a3, a2, 8
11851 ; RV64ZVE32F-NEXT: bnez a3, .LBB99_15
11852 ; RV64ZVE32F-NEXT: .LBB99_6: # %else8
11853 ; RV64ZVE32F-NEXT: andi a3, a2, 16
11854 ; RV64ZVE32F-NEXT: bnez a3, .LBB99_16
11855 ; RV64ZVE32F-NEXT: .LBB99_7: # %else11
11856 ; RV64ZVE32F-NEXT: andi a3, a2, 32
11857 ; RV64ZVE32F-NEXT: beqz a3, .LBB99_9
11858 ; RV64ZVE32F-NEXT: .LBB99_8: # %cond.load13
11859 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
11860 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11861 ; RV64ZVE32F-NEXT: andi a3, a3, 255
11862 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11863 ; RV64ZVE32F-NEXT: add a3, a1, a3
11864 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
11865 ; RV64ZVE32F-NEXT: .LBB99_9: # %else14
11866 ; RV64ZVE32F-NEXT: andi a3, a2, 64
11867 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
11868 ; RV64ZVE32F-NEXT: beqz a3, .LBB99_11
11869 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
11870 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11871 ; RV64ZVE32F-NEXT: andi a3, a3, 255
11872 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11873 ; RV64ZVE32F-NEXT: add a3, a1, a3
11874 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
11875 ; RV64ZVE32F-NEXT: .LBB99_11: # %else17
11876 ; RV64ZVE32F-NEXT: andi a2, a2, -128
11877 ; RV64ZVE32F-NEXT: beqz a2, .LBB99_13
11878 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
11879 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11880 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11881 ; RV64ZVE32F-NEXT: andi a2, a2, 255
11882 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11883 ; RV64ZVE32F-NEXT: add a1, a1, a2
11884 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
11885 ; RV64ZVE32F-NEXT: .LBB99_13: # %else20
11886 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
11887 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
11888 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
11889 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
11890 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
11891 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
11892 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
11893 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
11894 ; RV64ZVE32F-NEXT: ret
11895 ; RV64ZVE32F-NEXT: .LBB99_14: # %cond.load4
11896 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11897 ; RV64ZVE32F-NEXT: andi a3, a3, 255
11898 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11899 ; RV64ZVE32F-NEXT: add a3, a1, a3
11900 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
11901 ; RV64ZVE32F-NEXT: andi a3, a2, 8
11902 ; RV64ZVE32F-NEXT: beqz a3, .LBB99_6
11903 ; RV64ZVE32F-NEXT: .LBB99_15: # %cond.load7
11904 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11905 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11906 ; RV64ZVE32F-NEXT: andi a3, a3, 255
11907 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11908 ; RV64ZVE32F-NEXT: add a3, a1, a3
11909 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
11910 ; RV64ZVE32F-NEXT: andi a3, a2, 16
11911 ; RV64ZVE32F-NEXT: beqz a3, .LBB99_7
11912 ; RV64ZVE32F-NEXT: .LBB99_16: # %cond.load10
11913 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
11914 ; RV64ZVE32F-NEXT: andi a3, a3, 255
11915 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11916 ; RV64ZVE32F-NEXT: add a3, a1, a3
11917 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
11918 ; RV64ZVE32F-NEXT: andi a3, a2, 32
11919 ; RV64ZVE32F-NEXT: bnez a3, .LBB99_8
11920 ; RV64ZVE32F-NEXT: j .LBB99_9
11921 %eidxs = zext <8 x i8> %idxs to <8 x i64>
11922 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
11923 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
11924 ret <8 x double> %v
11927 define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
11928 ; RV32V-LABEL: mgather_baseidx_v8i16_v8f64:
11930 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11931 ; RV32V-NEXT: vsext.vf2 v10, v8
11932 ; RV32V-NEXT: vsll.vi v8, v10, 3
11933 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
11934 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
11935 ; RV32V-NEXT: vmv.v.v v8, v12
11938 ; RV64V-LABEL: mgather_baseidx_v8i16_v8f64:
11940 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
11941 ; RV64V-NEXT: vsext.vf4 v16, v8
11942 ; RV64V-NEXT: vsll.vi v8, v16, 3
11943 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
11944 ; RV64V-NEXT: vmv.v.v v8, v12
11947 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64:
11948 ; RV32ZVE32F: # %bb.0:
11949 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11950 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
11951 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
11952 ; RV32ZVE32F-NEXT: vmv.x.s a2, v0
11953 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
11954 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
11955 ; RV32ZVE32F-NEXT: andi a3, a2, 1
11956 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
11957 ; RV32ZVE32F-NEXT: bnez a3, .LBB100_10
11958 ; RV32ZVE32F-NEXT: # %bb.1: # %else
11959 ; RV32ZVE32F-NEXT: andi a1, a2, 2
11960 ; RV32ZVE32F-NEXT: bnez a1, .LBB100_11
11961 ; RV32ZVE32F-NEXT: .LBB100_2: # %else2
11962 ; RV32ZVE32F-NEXT: andi a1, a2, 4
11963 ; RV32ZVE32F-NEXT: bnez a1, .LBB100_12
11964 ; RV32ZVE32F-NEXT: .LBB100_3: # %else5
11965 ; RV32ZVE32F-NEXT: andi a1, a2, 8
11966 ; RV32ZVE32F-NEXT: bnez a1, .LBB100_13
11967 ; RV32ZVE32F-NEXT: .LBB100_4: # %else8
11968 ; RV32ZVE32F-NEXT: andi a1, a2, 16
11969 ; RV32ZVE32F-NEXT: bnez a1, .LBB100_14
11970 ; RV32ZVE32F-NEXT: .LBB100_5: # %else11
11971 ; RV32ZVE32F-NEXT: andi a1, a2, 32
11972 ; RV32ZVE32F-NEXT: bnez a1, .LBB100_15
11973 ; RV32ZVE32F-NEXT: .LBB100_6: # %else14
11974 ; RV32ZVE32F-NEXT: andi a1, a2, 64
11975 ; RV32ZVE32F-NEXT: bnez a1, .LBB100_16
11976 ; RV32ZVE32F-NEXT: .LBB100_7: # %else17
11977 ; RV32ZVE32F-NEXT: andi a1, a2, -128
11978 ; RV32ZVE32F-NEXT: beqz a1, .LBB100_9
11979 ; RV32ZVE32F-NEXT: .LBB100_8: # %cond.load19
11980 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11981 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11982 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11983 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
11984 ; RV32ZVE32F-NEXT: .LBB100_9: # %else20
11985 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11986 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
11987 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
11988 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
11989 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
11990 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
11991 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
11992 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
11993 ; RV32ZVE32F-NEXT: ret
11994 ; RV32ZVE32F-NEXT: .LBB100_10: # %cond.load
11995 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11996 ; RV32ZVE32F-NEXT: fld fa0, 0(a1)
11997 ; RV32ZVE32F-NEXT: andi a1, a2, 2
11998 ; RV32ZVE32F-NEXT: beqz a1, .LBB100_2
11999 ; RV32ZVE32F-NEXT: .LBB100_11: # %cond.load1
12000 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12001 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
12002 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12003 ; RV32ZVE32F-NEXT: fld fa1, 0(a1)
12004 ; RV32ZVE32F-NEXT: andi a1, a2, 4
12005 ; RV32ZVE32F-NEXT: beqz a1, .LBB100_3
12006 ; RV32ZVE32F-NEXT: .LBB100_12: # %cond.load4
12007 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12008 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
12009 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12010 ; RV32ZVE32F-NEXT: fld fa2, 0(a1)
12011 ; RV32ZVE32F-NEXT: andi a1, a2, 8
12012 ; RV32ZVE32F-NEXT: beqz a1, .LBB100_4
12013 ; RV32ZVE32F-NEXT: .LBB100_13: # %cond.load7
12014 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12015 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
12016 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12017 ; RV32ZVE32F-NEXT: fld fa3, 0(a1)
12018 ; RV32ZVE32F-NEXT: andi a1, a2, 16
12019 ; RV32ZVE32F-NEXT: beqz a1, .LBB100_5
12020 ; RV32ZVE32F-NEXT: .LBB100_14: # %cond.load10
12021 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12022 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
12023 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12024 ; RV32ZVE32F-NEXT: fld fa4, 0(a1)
12025 ; RV32ZVE32F-NEXT: andi a1, a2, 32
12026 ; RV32ZVE32F-NEXT: beqz a1, .LBB100_6
12027 ; RV32ZVE32F-NEXT: .LBB100_15: # %cond.load13
12028 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12029 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
12030 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12031 ; RV32ZVE32F-NEXT: fld fa5, 0(a1)
12032 ; RV32ZVE32F-NEXT: andi a1, a2, 64
12033 ; RV32ZVE32F-NEXT: beqz a1, .LBB100_7
12034 ; RV32ZVE32F-NEXT: .LBB100_16: # %cond.load16
12035 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12036 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
12037 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12038 ; RV32ZVE32F-NEXT: fld fa6, 0(a1)
12039 ; RV32ZVE32F-NEXT: andi a1, a2, -128
12040 ; RV32ZVE32F-NEXT: bnez a1, .LBB100_8
12041 ; RV32ZVE32F-NEXT: j .LBB100_9
12043 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64:
12044 ; RV64ZVE32F: # %bb.0:
12045 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12046 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
12047 ; RV64ZVE32F-NEXT: andi a3, a2, 1
12048 ; RV64ZVE32F-NEXT: beqz a3, .LBB100_2
12049 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
12050 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
12051 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12052 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12053 ; RV64ZVE32F-NEXT: add a3, a1, a3
12054 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
12055 ; RV64ZVE32F-NEXT: .LBB100_2: # %else
12056 ; RV64ZVE32F-NEXT: andi a3, a2, 2
12057 ; RV64ZVE32F-NEXT: beqz a3, .LBB100_4
12058 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
12059 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
12060 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
12061 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
12062 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12063 ; RV64ZVE32F-NEXT: add a3, a1, a3
12064 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
12065 ; RV64ZVE32F-NEXT: .LBB100_4: # %else2
12066 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
12067 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
12068 ; RV64ZVE32F-NEXT: andi a3, a2, 4
12069 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
12070 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
12071 ; RV64ZVE32F-NEXT: bnez a3, .LBB100_14
12072 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
12073 ; RV64ZVE32F-NEXT: andi a3, a2, 8
12074 ; RV64ZVE32F-NEXT: bnez a3, .LBB100_15
12075 ; RV64ZVE32F-NEXT: .LBB100_6: # %else8
12076 ; RV64ZVE32F-NEXT: andi a3, a2, 16
12077 ; RV64ZVE32F-NEXT: bnez a3, .LBB100_16
12078 ; RV64ZVE32F-NEXT: .LBB100_7: # %else11
12079 ; RV64ZVE32F-NEXT: andi a3, a2, 32
12080 ; RV64ZVE32F-NEXT: beqz a3, .LBB100_9
12081 ; RV64ZVE32F-NEXT: .LBB100_8: # %cond.load13
12082 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
12083 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12084 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12085 ; RV64ZVE32F-NEXT: add a3, a1, a3
12086 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
12087 ; RV64ZVE32F-NEXT: .LBB100_9: # %else14
12088 ; RV64ZVE32F-NEXT: andi a3, a2, 64
12089 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
12090 ; RV64ZVE32F-NEXT: beqz a3, .LBB100_11
12091 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
12092 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12093 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12094 ; RV64ZVE32F-NEXT: add a3, a1, a3
12095 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
12096 ; RV64ZVE32F-NEXT: .LBB100_11: # %else17
12097 ; RV64ZVE32F-NEXT: andi a2, a2, -128
12098 ; RV64ZVE32F-NEXT: beqz a2, .LBB100_13
12099 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
12100 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
12101 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12102 ; RV64ZVE32F-NEXT: slli a2, a2, 3
12103 ; RV64ZVE32F-NEXT: add a1, a1, a2
12104 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
12105 ; RV64ZVE32F-NEXT: .LBB100_13: # %else20
12106 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
12107 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
12108 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
12109 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
12110 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
12111 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
12112 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
12113 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
12114 ; RV64ZVE32F-NEXT: ret
12115 ; RV64ZVE32F-NEXT: .LBB100_14: # %cond.load4
12116 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12117 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12118 ; RV64ZVE32F-NEXT: add a3, a1, a3
12119 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
12120 ; RV64ZVE32F-NEXT: andi a3, a2, 8
12121 ; RV64ZVE32F-NEXT: beqz a3, .LBB100_6
12122 ; RV64ZVE32F-NEXT: .LBB100_15: # %cond.load7
12123 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
12124 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12125 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12126 ; RV64ZVE32F-NEXT: add a3, a1, a3
12127 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
12128 ; RV64ZVE32F-NEXT: andi a3, a2, 16
12129 ; RV64ZVE32F-NEXT: beqz a3, .LBB100_7
12130 ; RV64ZVE32F-NEXT: .LBB100_16: # %cond.load10
12131 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
12132 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12133 ; RV64ZVE32F-NEXT: add a3, a1, a3
12134 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
12135 ; RV64ZVE32F-NEXT: andi a3, a2, 32
12136 ; RV64ZVE32F-NEXT: bnez a3, .LBB100_8
12137 ; RV64ZVE32F-NEXT: j .LBB100_9
12138 %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
12139 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
12140 ret <8 x double> %v
12143 define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
12144 ; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8f64:
12146 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
12147 ; RV32V-NEXT: vsext.vf2 v10, v8
12148 ; RV32V-NEXT: vsll.vi v8, v10, 3
12149 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
12150 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
12151 ; RV32V-NEXT: vmv.v.v v8, v12
12154 ; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8f64:
12156 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
12157 ; RV64V-NEXT: vsext.vf4 v16, v8
12158 ; RV64V-NEXT: vsll.vi v8, v16, 3
12159 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
12160 ; RV64V-NEXT: vmv.v.v v8, v12
12163 ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64:
12164 ; RV32ZVE32F: # %bb.0:
12165 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
12166 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
12167 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
12168 ; RV32ZVE32F-NEXT: vmv.x.s a2, v0
12169 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
12170 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
12171 ; RV32ZVE32F-NEXT: andi a3, a2, 1
12172 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
12173 ; RV32ZVE32F-NEXT: bnez a3, .LBB101_10
12174 ; RV32ZVE32F-NEXT: # %bb.1: # %else
12175 ; RV32ZVE32F-NEXT: andi a1, a2, 2
12176 ; RV32ZVE32F-NEXT: bnez a1, .LBB101_11
12177 ; RV32ZVE32F-NEXT: .LBB101_2: # %else2
12178 ; RV32ZVE32F-NEXT: andi a1, a2, 4
12179 ; RV32ZVE32F-NEXT: bnez a1, .LBB101_12
12180 ; RV32ZVE32F-NEXT: .LBB101_3: # %else5
12181 ; RV32ZVE32F-NEXT: andi a1, a2, 8
12182 ; RV32ZVE32F-NEXT: bnez a1, .LBB101_13
12183 ; RV32ZVE32F-NEXT: .LBB101_4: # %else8
12184 ; RV32ZVE32F-NEXT: andi a1, a2, 16
12185 ; RV32ZVE32F-NEXT: bnez a1, .LBB101_14
12186 ; RV32ZVE32F-NEXT: .LBB101_5: # %else11
12187 ; RV32ZVE32F-NEXT: andi a1, a2, 32
12188 ; RV32ZVE32F-NEXT: bnez a1, .LBB101_15
12189 ; RV32ZVE32F-NEXT: .LBB101_6: # %else14
12190 ; RV32ZVE32F-NEXT: andi a1, a2, 64
12191 ; RV32ZVE32F-NEXT: bnez a1, .LBB101_16
12192 ; RV32ZVE32F-NEXT: .LBB101_7: # %else17
12193 ; RV32ZVE32F-NEXT: andi a1, a2, -128
12194 ; RV32ZVE32F-NEXT: beqz a1, .LBB101_9
12195 ; RV32ZVE32F-NEXT: .LBB101_8: # %cond.load19
12196 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12197 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
12198 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
12199 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
12200 ; RV32ZVE32F-NEXT: .LBB101_9: # %else20
12201 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
12202 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
12203 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
12204 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
12205 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
12206 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
12207 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
12208 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
12209 ; RV32ZVE32F-NEXT: ret
12210 ; RV32ZVE32F-NEXT: .LBB101_10: # %cond.load
12211 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
12212 ; RV32ZVE32F-NEXT: fld fa0, 0(a1)
12213 ; RV32ZVE32F-NEXT: andi a1, a2, 2
12214 ; RV32ZVE32F-NEXT: beqz a1, .LBB101_2
12215 ; RV32ZVE32F-NEXT: .LBB101_11: # %cond.load1
12216 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12217 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
12218 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12219 ; RV32ZVE32F-NEXT: fld fa1, 0(a1)
12220 ; RV32ZVE32F-NEXT: andi a1, a2, 4
12221 ; RV32ZVE32F-NEXT: beqz a1, .LBB101_3
12222 ; RV32ZVE32F-NEXT: .LBB101_12: # %cond.load4
12223 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12224 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
12225 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12226 ; RV32ZVE32F-NEXT: fld fa2, 0(a1)
12227 ; RV32ZVE32F-NEXT: andi a1, a2, 8
12228 ; RV32ZVE32F-NEXT: beqz a1, .LBB101_4
12229 ; RV32ZVE32F-NEXT: .LBB101_13: # %cond.load7
12230 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12231 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
12232 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12233 ; RV32ZVE32F-NEXT: fld fa3, 0(a1)
12234 ; RV32ZVE32F-NEXT: andi a1, a2, 16
12235 ; RV32ZVE32F-NEXT: beqz a1, .LBB101_5
12236 ; RV32ZVE32F-NEXT: .LBB101_14: # %cond.load10
12237 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12238 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
12239 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12240 ; RV32ZVE32F-NEXT: fld fa4, 0(a1)
12241 ; RV32ZVE32F-NEXT: andi a1, a2, 32
12242 ; RV32ZVE32F-NEXT: beqz a1, .LBB101_6
12243 ; RV32ZVE32F-NEXT: .LBB101_15: # %cond.load13
12244 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12245 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
12246 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12247 ; RV32ZVE32F-NEXT: fld fa5, 0(a1)
12248 ; RV32ZVE32F-NEXT: andi a1, a2, 64
12249 ; RV32ZVE32F-NEXT: beqz a1, .LBB101_7
12250 ; RV32ZVE32F-NEXT: .LBB101_16: # %cond.load16
12251 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12252 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
12253 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12254 ; RV32ZVE32F-NEXT: fld fa6, 0(a1)
12255 ; RV32ZVE32F-NEXT: andi a1, a2, -128
12256 ; RV32ZVE32F-NEXT: bnez a1, .LBB101_8
12257 ; RV32ZVE32F-NEXT: j .LBB101_9
12259 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64:
12260 ; RV64ZVE32F: # %bb.0:
12261 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12262 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
12263 ; RV64ZVE32F-NEXT: andi a3, a2, 1
12264 ; RV64ZVE32F-NEXT: beqz a3, .LBB101_2
12265 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
12266 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
12267 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12268 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12269 ; RV64ZVE32F-NEXT: add a3, a1, a3
12270 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
12271 ; RV64ZVE32F-NEXT: .LBB101_2: # %else
12272 ; RV64ZVE32F-NEXT: andi a3, a2, 2
12273 ; RV64ZVE32F-NEXT: beqz a3, .LBB101_4
12274 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
12275 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
12276 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
12277 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
12278 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12279 ; RV64ZVE32F-NEXT: add a3, a1, a3
12280 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
12281 ; RV64ZVE32F-NEXT: .LBB101_4: # %else2
12282 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
12283 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
12284 ; RV64ZVE32F-NEXT: andi a3, a2, 4
12285 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
12286 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
12287 ; RV64ZVE32F-NEXT: bnez a3, .LBB101_14
12288 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
12289 ; RV64ZVE32F-NEXT: andi a3, a2, 8
12290 ; RV64ZVE32F-NEXT: bnez a3, .LBB101_15
12291 ; RV64ZVE32F-NEXT: .LBB101_6: # %else8
12292 ; RV64ZVE32F-NEXT: andi a3, a2, 16
12293 ; RV64ZVE32F-NEXT: bnez a3, .LBB101_16
12294 ; RV64ZVE32F-NEXT: .LBB101_7: # %else11
12295 ; RV64ZVE32F-NEXT: andi a3, a2, 32
12296 ; RV64ZVE32F-NEXT: beqz a3, .LBB101_9
12297 ; RV64ZVE32F-NEXT: .LBB101_8: # %cond.load13
12298 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
12299 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12300 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12301 ; RV64ZVE32F-NEXT: add a3, a1, a3
12302 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
12303 ; RV64ZVE32F-NEXT: .LBB101_9: # %else14
12304 ; RV64ZVE32F-NEXT: andi a3, a2, 64
12305 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
12306 ; RV64ZVE32F-NEXT: beqz a3, .LBB101_11
12307 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
12308 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12309 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12310 ; RV64ZVE32F-NEXT: add a3, a1, a3
12311 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
12312 ; RV64ZVE32F-NEXT: .LBB101_11: # %else17
12313 ; RV64ZVE32F-NEXT: andi a2, a2, -128
12314 ; RV64ZVE32F-NEXT: beqz a2, .LBB101_13
12315 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
12316 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
12317 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12318 ; RV64ZVE32F-NEXT: slli a2, a2, 3
12319 ; RV64ZVE32F-NEXT: add a1, a1, a2
12320 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
12321 ; RV64ZVE32F-NEXT: .LBB101_13: # %else20
12322 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
12323 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
12324 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
12325 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
12326 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
12327 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
12328 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
12329 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
12330 ; RV64ZVE32F-NEXT: ret
12331 ; RV64ZVE32F-NEXT: .LBB101_14: # %cond.load4
12332 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12333 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12334 ; RV64ZVE32F-NEXT: add a3, a1, a3
12335 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
12336 ; RV64ZVE32F-NEXT: andi a3, a2, 8
12337 ; RV64ZVE32F-NEXT: beqz a3, .LBB101_6
12338 ; RV64ZVE32F-NEXT: .LBB101_15: # %cond.load7
12339 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
12340 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12341 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12342 ; RV64ZVE32F-NEXT: add a3, a1, a3
12343 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
12344 ; RV64ZVE32F-NEXT: andi a3, a2, 16
12345 ; RV64ZVE32F-NEXT: beqz a3, .LBB101_7
12346 ; RV64ZVE32F-NEXT: .LBB101_16: # %cond.load10
12347 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
12348 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12349 ; RV64ZVE32F-NEXT: add a3, a1, a3
12350 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
12351 ; RV64ZVE32F-NEXT: andi a3, a2, 32
12352 ; RV64ZVE32F-NEXT: bnez a3, .LBB101_8
12353 ; RV64ZVE32F-NEXT: j .LBB101_9
12354 %eidxs = sext <8 x i16> %idxs to <8 x i64>
12355 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
12356 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
12357 ret <8 x double> %v
12360 define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
12361 ; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8f64:
12363 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
12364 ; RV32V-NEXT: vzext.vf2 v10, v8
12365 ; RV32V-NEXT: vsll.vi v8, v10, 3
12366 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
12367 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
12368 ; RV32V-NEXT: vmv.v.v v8, v12
12371 ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f64:
12373 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
12374 ; RV64V-NEXT: vzext.vf2 v10, v8
12375 ; RV64V-NEXT: vsll.vi v8, v10, 3
12376 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
12377 ; RV64V-NEXT: vluxei32.v v12, (a0), v8, v0.t
12378 ; RV64V-NEXT: vmv.v.v v8, v12
12381 ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64:
12382 ; RV32ZVE32F: # %bb.0:
12383 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
12384 ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
12385 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
12386 ; RV32ZVE32F-NEXT: vmv.x.s a2, v0
12387 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
12388 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
12389 ; RV32ZVE32F-NEXT: andi a3, a2, 1
12390 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
12391 ; RV32ZVE32F-NEXT: bnez a3, .LBB102_10
12392 ; RV32ZVE32F-NEXT: # %bb.1: # %else
12393 ; RV32ZVE32F-NEXT: andi a1, a2, 2
12394 ; RV32ZVE32F-NEXT: bnez a1, .LBB102_11
12395 ; RV32ZVE32F-NEXT: .LBB102_2: # %else2
12396 ; RV32ZVE32F-NEXT: andi a1, a2, 4
12397 ; RV32ZVE32F-NEXT: bnez a1, .LBB102_12
12398 ; RV32ZVE32F-NEXT: .LBB102_3: # %else5
12399 ; RV32ZVE32F-NEXT: andi a1, a2, 8
12400 ; RV32ZVE32F-NEXT: bnez a1, .LBB102_13
12401 ; RV32ZVE32F-NEXT: .LBB102_4: # %else8
12402 ; RV32ZVE32F-NEXT: andi a1, a2, 16
12403 ; RV32ZVE32F-NEXT: bnez a1, .LBB102_14
12404 ; RV32ZVE32F-NEXT: .LBB102_5: # %else11
12405 ; RV32ZVE32F-NEXT: andi a1, a2, 32
12406 ; RV32ZVE32F-NEXT: bnez a1, .LBB102_15
12407 ; RV32ZVE32F-NEXT: .LBB102_6: # %else14
12408 ; RV32ZVE32F-NEXT: andi a1, a2, 64
12409 ; RV32ZVE32F-NEXT: bnez a1, .LBB102_16
12410 ; RV32ZVE32F-NEXT: .LBB102_7: # %else17
12411 ; RV32ZVE32F-NEXT: andi a1, a2, -128
12412 ; RV32ZVE32F-NEXT: beqz a1, .LBB102_9
12413 ; RV32ZVE32F-NEXT: .LBB102_8: # %cond.load19
12414 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12415 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
12416 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
12417 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
12418 ; RV32ZVE32F-NEXT: .LBB102_9: # %else20
12419 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
12420 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
12421 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
12422 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
12423 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
12424 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
12425 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
12426 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
12427 ; RV32ZVE32F-NEXT: ret
12428 ; RV32ZVE32F-NEXT: .LBB102_10: # %cond.load
12429 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
12430 ; RV32ZVE32F-NEXT: fld fa0, 0(a1)
12431 ; RV32ZVE32F-NEXT: andi a1, a2, 2
12432 ; RV32ZVE32F-NEXT: beqz a1, .LBB102_2
12433 ; RV32ZVE32F-NEXT: .LBB102_11: # %cond.load1
12434 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12435 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
12436 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12437 ; RV32ZVE32F-NEXT: fld fa1, 0(a1)
12438 ; RV32ZVE32F-NEXT: andi a1, a2, 4
12439 ; RV32ZVE32F-NEXT: beqz a1, .LBB102_3
12440 ; RV32ZVE32F-NEXT: .LBB102_12: # %cond.load4
12441 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12442 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
12443 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12444 ; RV32ZVE32F-NEXT: fld fa2, 0(a1)
12445 ; RV32ZVE32F-NEXT: andi a1, a2, 8
12446 ; RV32ZVE32F-NEXT: beqz a1, .LBB102_4
12447 ; RV32ZVE32F-NEXT: .LBB102_13: # %cond.load7
12448 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12449 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
12450 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12451 ; RV32ZVE32F-NEXT: fld fa3, 0(a1)
12452 ; RV32ZVE32F-NEXT: andi a1, a2, 16
12453 ; RV32ZVE32F-NEXT: beqz a1, .LBB102_5
12454 ; RV32ZVE32F-NEXT: .LBB102_14: # %cond.load10
12455 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12456 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
12457 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12458 ; RV32ZVE32F-NEXT: fld fa4, 0(a1)
12459 ; RV32ZVE32F-NEXT: andi a1, a2, 32
12460 ; RV32ZVE32F-NEXT: beqz a1, .LBB102_6
12461 ; RV32ZVE32F-NEXT: .LBB102_15: # %cond.load13
12462 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12463 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
12464 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12465 ; RV32ZVE32F-NEXT: fld fa5, 0(a1)
12466 ; RV32ZVE32F-NEXT: andi a1, a2, 64
12467 ; RV32ZVE32F-NEXT: beqz a1, .LBB102_7
12468 ; RV32ZVE32F-NEXT: .LBB102_16: # %cond.load16
12469 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12470 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
12471 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12472 ; RV32ZVE32F-NEXT: fld fa6, 0(a1)
12473 ; RV32ZVE32F-NEXT: andi a1, a2, -128
12474 ; RV32ZVE32F-NEXT: bnez a1, .LBB102_8
12475 ; RV32ZVE32F-NEXT: j .LBB102_9
12477 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64:
12478 ; RV64ZVE32F: # %bb.0:
12479 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12480 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
12481 ; RV64ZVE32F-NEXT: andi a3, a2, 1
12482 ; RV64ZVE32F-NEXT: beqz a3, .LBB102_2
12483 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
12484 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
12485 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12486 ; RV64ZVE32F-NEXT: slli a3, a3, 48
12487 ; RV64ZVE32F-NEXT: srli a3, a3, 45
12488 ; RV64ZVE32F-NEXT: add a3, a1, a3
12489 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
12490 ; RV64ZVE32F-NEXT: .LBB102_2: # %else
12491 ; RV64ZVE32F-NEXT: andi a3, a2, 2
12492 ; RV64ZVE32F-NEXT: beqz a3, .LBB102_4
12493 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
12494 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
12495 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
12496 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
12497 ; RV64ZVE32F-NEXT: slli a3, a3, 48
12498 ; RV64ZVE32F-NEXT: srli a3, a3, 45
12499 ; RV64ZVE32F-NEXT: add a3, a1, a3
12500 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
12501 ; RV64ZVE32F-NEXT: .LBB102_4: # %else2
12502 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
12503 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
12504 ; RV64ZVE32F-NEXT: andi a3, a2, 4
12505 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
12506 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
12507 ; RV64ZVE32F-NEXT: bnez a3, .LBB102_14
12508 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
12509 ; RV64ZVE32F-NEXT: andi a3, a2, 8
12510 ; RV64ZVE32F-NEXT: bnez a3, .LBB102_15
12511 ; RV64ZVE32F-NEXT: .LBB102_6: # %else8
12512 ; RV64ZVE32F-NEXT: andi a3, a2, 16
12513 ; RV64ZVE32F-NEXT: bnez a3, .LBB102_16
12514 ; RV64ZVE32F-NEXT: .LBB102_7: # %else11
12515 ; RV64ZVE32F-NEXT: andi a3, a2, 32
12516 ; RV64ZVE32F-NEXT: beqz a3, .LBB102_9
12517 ; RV64ZVE32F-NEXT: .LBB102_8: # %cond.load13
12518 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
12519 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12520 ; RV64ZVE32F-NEXT: slli a3, a3, 48
12521 ; RV64ZVE32F-NEXT: srli a3, a3, 45
12522 ; RV64ZVE32F-NEXT: add a3, a1, a3
12523 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
12524 ; RV64ZVE32F-NEXT: .LBB102_9: # %else14
12525 ; RV64ZVE32F-NEXT: andi a3, a2, 64
12526 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
12527 ; RV64ZVE32F-NEXT: beqz a3, .LBB102_11
12528 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
12529 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12530 ; RV64ZVE32F-NEXT: slli a3, a3, 48
12531 ; RV64ZVE32F-NEXT: srli a3, a3, 45
12532 ; RV64ZVE32F-NEXT: add a3, a1, a3
12533 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
12534 ; RV64ZVE32F-NEXT: .LBB102_11: # %else17
12535 ; RV64ZVE32F-NEXT: andi a2, a2, -128
12536 ; RV64ZVE32F-NEXT: beqz a2, .LBB102_13
12537 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
12538 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
12539 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12540 ; RV64ZVE32F-NEXT: slli a2, a2, 48
12541 ; RV64ZVE32F-NEXT: srli a2, a2, 45
12542 ; RV64ZVE32F-NEXT: add a1, a1, a2
12543 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
12544 ; RV64ZVE32F-NEXT: .LBB102_13: # %else20
12545 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
12546 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
12547 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
12548 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
12549 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
12550 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
12551 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
12552 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
12553 ; RV64ZVE32F-NEXT: ret
12554 ; RV64ZVE32F-NEXT: .LBB102_14: # %cond.load4
12555 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12556 ; RV64ZVE32F-NEXT: slli a3, a3, 48
12557 ; RV64ZVE32F-NEXT: srli a3, a3, 45
12558 ; RV64ZVE32F-NEXT: add a3, a1, a3
12559 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
12560 ; RV64ZVE32F-NEXT: andi a3, a2, 8
12561 ; RV64ZVE32F-NEXT: beqz a3, .LBB102_6
12562 ; RV64ZVE32F-NEXT: .LBB102_15: # %cond.load7
12563 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
12564 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12565 ; RV64ZVE32F-NEXT: slli a3, a3, 48
12566 ; RV64ZVE32F-NEXT: srli a3, a3, 45
12567 ; RV64ZVE32F-NEXT: add a3, a1, a3
12568 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
12569 ; RV64ZVE32F-NEXT: andi a3, a2, 16
12570 ; RV64ZVE32F-NEXT: beqz a3, .LBB102_7
12571 ; RV64ZVE32F-NEXT: .LBB102_16: # %cond.load10
12572 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
12573 ; RV64ZVE32F-NEXT: slli a3, a3, 48
12574 ; RV64ZVE32F-NEXT: srli a3, a3, 45
12575 ; RV64ZVE32F-NEXT: add a3, a1, a3
12576 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
12577 ; RV64ZVE32F-NEXT: andi a3, a2, 32
12578 ; RV64ZVE32F-NEXT: bnez a3, .LBB102_8
12579 ; RV64ZVE32F-NEXT: j .LBB102_9
12580 %eidxs = zext <8 x i16> %idxs to <8 x i64>
12581 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
12582 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
12583 ret <8 x double> %v
12586 define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
12587 ; RV32V-LABEL: mgather_baseidx_v8i32_v8f64:
12589 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
12590 ; RV32V-NEXT: vsll.vi v8, v8, 3
12591 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
12592 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
12593 ; RV32V-NEXT: vmv.v.v v8, v12
12596 ; RV64V-LABEL: mgather_baseidx_v8i32_v8f64:
12598 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
12599 ; RV64V-NEXT: vsext.vf2 v16, v8
12600 ; RV64V-NEXT: vsll.vi v8, v16, 3
12601 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
12602 ; RV64V-NEXT: vmv.v.v v8, v12
12605 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64:
12606 ; RV32ZVE32F: # %bb.0:
12607 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
12608 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
12609 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
12610 ; RV32ZVE32F-NEXT: vmv.x.s a2, v0
12611 ; RV32ZVE32F-NEXT: andi a3, a2, 1
12612 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
12613 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
12614 ; RV32ZVE32F-NEXT: bnez a3, .LBB103_10
12615 ; RV32ZVE32F-NEXT: # %bb.1: # %else
12616 ; RV32ZVE32F-NEXT: andi a1, a2, 2
12617 ; RV32ZVE32F-NEXT: bnez a1, .LBB103_11
12618 ; RV32ZVE32F-NEXT: .LBB103_2: # %else2
12619 ; RV32ZVE32F-NEXT: andi a1, a2, 4
12620 ; RV32ZVE32F-NEXT: bnez a1, .LBB103_12
12621 ; RV32ZVE32F-NEXT: .LBB103_3: # %else5
12622 ; RV32ZVE32F-NEXT: andi a1, a2, 8
12623 ; RV32ZVE32F-NEXT: bnez a1, .LBB103_13
12624 ; RV32ZVE32F-NEXT: .LBB103_4: # %else8
12625 ; RV32ZVE32F-NEXT: andi a1, a2, 16
12626 ; RV32ZVE32F-NEXT: bnez a1, .LBB103_14
12627 ; RV32ZVE32F-NEXT: .LBB103_5: # %else11
12628 ; RV32ZVE32F-NEXT: andi a1, a2, 32
12629 ; RV32ZVE32F-NEXT: bnez a1, .LBB103_15
12630 ; RV32ZVE32F-NEXT: .LBB103_6: # %else14
12631 ; RV32ZVE32F-NEXT: andi a1, a2, 64
12632 ; RV32ZVE32F-NEXT: bnez a1, .LBB103_16
12633 ; RV32ZVE32F-NEXT: .LBB103_7: # %else17
12634 ; RV32ZVE32F-NEXT: andi a1, a2, -128
12635 ; RV32ZVE32F-NEXT: beqz a1, .LBB103_9
12636 ; RV32ZVE32F-NEXT: .LBB103_8: # %cond.load19
12637 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12638 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
12639 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
12640 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
12641 ; RV32ZVE32F-NEXT: .LBB103_9: # %else20
12642 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
12643 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
12644 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
12645 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
12646 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
12647 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
12648 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
12649 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
12650 ; RV32ZVE32F-NEXT: ret
12651 ; RV32ZVE32F-NEXT: .LBB103_10: # %cond.load
12652 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
12653 ; RV32ZVE32F-NEXT: fld fa0, 0(a1)
12654 ; RV32ZVE32F-NEXT: andi a1, a2, 2
12655 ; RV32ZVE32F-NEXT: beqz a1, .LBB103_2
12656 ; RV32ZVE32F-NEXT: .LBB103_11: # %cond.load1
12657 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12658 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
12659 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12660 ; RV32ZVE32F-NEXT: fld fa1, 0(a1)
12661 ; RV32ZVE32F-NEXT: andi a1, a2, 4
12662 ; RV32ZVE32F-NEXT: beqz a1, .LBB103_3
12663 ; RV32ZVE32F-NEXT: .LBB103_12: # %cond.load4
12664 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12665 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
12666 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12667 ; RV32ZVE32F-NEXT: fld fa2, 0(a1)
12668 ; RV32ZVE32F-NEXT: andi a1, a2, 8
12669 ; RV32ZVE32F-NEXT: beqz a1, .LBB103_4
12670 ; RV32ZVE32F-NEXT: .LBB103_13: # %cond.load7
12671 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12672 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
12673 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12674 ; RV32ZVE32F-NEXT: fld fa3, 0(a1)
12675 ; RV32ZVE32F-NEXT: andi a1, a2, 16
12676 ; RV32ZVE32F-NEXT: beqz a1, .LBB103_5
12677 ; RV32ZVE32F-NEXT: .LBB103_14: # %cond.load10
12678 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12679 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
12680 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12681 ; RV32ZVE32F-NEXT: fld fa4, 0(a1)
12682 ; RV32ZVE32F-NEXT: andi a1, a2, 32
12683 ; RV32ZVE32F-NEXT: beqz a1, .LBB103_6
12684 ; RV32ZVE32F-NEXT: .LBB103_15: # %cond.load13
12685 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12686 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
12687 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12688 ; RV32ZVE32F-NEXT: fld fa5, 0(a1)
12689 ; RV32ZVE32F-NEXT: andi a1, a2, 64
12690 ; RV32ZVE32F-NEXT: beqz a1, .LBB103_7
12691 ; RV32ZVE32F-NEXT: .LBB103_16: # %cond.load16
12692 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12693 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
12694 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12695 ; RV32ZVE32F-NEXT: fld fa6, 0(a1)
12696 ; RV32ZVE32F-NEXT: andi a1, a2, -128
12697 ; RV32ZVE32F-NEXT: bnez a1, .LBB103_8
12698 ; RV32ZVE32F-NEXT: j .LBB103_9
12700 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64:
12701 ; RV64ZVE32F: # %bb.0:
12702 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12703 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
12704 ; RV64ZVE32F-NEXT: andi a3, a2, 1
12705 ; RV64ZVE32F-NEXT: beqz a3, .LBB103_2
12706 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
12707 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
12708 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12709 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12710 ; RV64ZVE32F-NEXT: add a3, a1, a3
12711 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
12712 ; RV64ZVE32F-NEXT: .LBB103_2: # %else
12713 ; RV64ZVE32F-NEXT: andi a3, a2, 2
12714 ; RV64ZVE32F-NEXT: beqz a3, .LBB103_4
12715 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
12716 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12717 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
12718 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
12719 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12720 ; RV64ZVE32F-NEXT: add a3, a1, a3
12721 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
12722 ; RV64ZVE32F-NEXT: .LBB103_4: # %else2
12723 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
12724 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
12725 ; RV64ZVE32F-NEXT: andi a3, a2, 4
12726 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
12727 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
12728 ; RV64ZVE32F-NEXT: bnez a3, .LBB103_14
12729 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
12730 ; RV64ZVE32F-NEXT: andi a3, a2, 8
12731 ; RV64ZVE32F-NEXT: bnez a3, .LBB103_15
12732 ; RV64ZVE32F-NEXT: .LBB103_6: # %else8
12733 ; RV64ZVE32F-NEXT: andi a3, a2, 16
12734 ; RV64ZVE32F-NEXT: bnez a3, .LBB103_16
12735 ; RV64ZVE32F-NEXT: .LBB103_7: # %else11
12736 ; RV64ZVE32F-NEXT: andi a3, a2, 32
12737 ; RV64ZVE32F-NEXT: beqz a3, .LBB103_9
12738 ; RV64ZVE32F-NEXT: .LBB103_8: # %cond.load13
12739 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
12740 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12741 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12742 ; RV64ZVE32F-NEXT: add a3, a1, a3
12743 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
12744 ; RV64ZVE32F-NEXT: .LBB103_9: # %else14
12745 ; RV64ZVE32F-NEXT: andi a3, a2, 64
12746 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
12747 ; RV64ZVE32F-NEXT: beqz a3, .LBB103_11
12748 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
12749 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12750 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12751 ; RV64ZVE32F-NEXT: add a3, a1, a3
12752 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
12753 ; RV64ZVE32F-NEXT: .LBB103_11: # %else17
12754 ; RV64ZVE32F-NEXT: andi a2, a2, -128
12755 ; RV64ZVE32F-NEXT: beqz a2, .LBB103_13
12756 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
12757 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
12758 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12759 ; RV64ZVE32F-NEXT: slli a2, a2, 3
12760 ; RV64ZVE32F-NEXT: add a1, a1, a2
12761 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
12762 ; RV64ZVE32F-NEXT: .LBB103_13: # %else20
12763 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
12764 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
12765 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
12766 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
12767 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
12768 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
12769 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
12770 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
12771 ; RV64ZVE32F-NEXT: ret
12772 ; RV64ZVE32F-NEXT: .LBB103_14: # %cond.load4
12773 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12774 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12775 ; RV64ZVE32F-NEXT: add a3, a1, a3
12776 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
12777 ; RV64ZVE32F-NEXT: andi a3, a2, 8
12778 ; RV64ZVE32F-NEXT: beqz a3, .LBB103_6
12779 ; RV64ZVE32F-NEXT: .LBB103_15: # %cond.load7
12780 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
12781 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12782 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12783 ; RV64ZVE32F-NEXT: add a3, a1, a3
12784 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
12785 ; RV64ZVE32F-NEXT: andi a3, a2, 16
12786 ; RV64ZVE32F-NEXT: beqz a3, .LBB103_7
12787 ; RV64ZVE32F-NEXT: .LBB103_16: # %cond.load10
12788 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
12789 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12790 ; RV64ZVE32F-NEXT: add a3, a1, a3
12791 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
12792 ; RV64ZVE32F-NEXT: andi a3, a2, 32
12793 ; RV64ZVE32F-NEXT: bnez a3, .LBB103_8
12794 ; RV64ZVE32F-NEXT: j .LBB103_9
12795 %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
12796 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
12797 ret <8 x double> %v
12800 define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
12801 ; RV32V-LABEL: mgather_baseidx_sext_v8i32_v8f64:
12803 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
12804 ; RV32V-NEXT: vsll.vi v8, v8, 3
12805 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
12806 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
12807 ; RV32V-NEXT: vmv.v.v v8, v12
12810 ; RV64V-LABEL: mgather_baseidx_sext_v8i32_v8f64:
12812 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
12813 ; RV64V-NEXT: vsext.vf2 v16, v8
12814 ; RV64V-NEXT: vsll.vi v8, v16, 3
12815 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
12816 ; RV64V-NEXT: vmv.v.v v8, v12
12819 ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64:
12820 ; RV32ZVE32F: # %bb.0:
12821 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
12822 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
12823 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
12824 ; RV32ZVE32F-NEXT: vmv.x.s a2, v0
12825 ; RV32ZVE32F-NEXT: andi a3, a2, 1
12826 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
12827 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
12828 ; RV32ZVE32F-NEXT: bnez a3, .LBB104_10
12829 ; RV32ZVE32F-NEXT: # %bb.1: # %else
12830 ; RV32ZVE32F-NEXT: andi a1, a2, 2
12831 ; RV32ZVE32F-NEXT: bnez a1, .LBB104_11
12832 ; RV32ZVE32F-NEXT: .LBB104_2: # %else2
12833 ; RV32ZVE32F-NEXT: andi a1, a2, 4
12834 ; RV32ZVE32F-NEXT: bnez a1, .LBB104_12
12835 ; RV32ZVE32F-NEXT: .LBB104_3: # %else5
12836 ; RV32ZVE32F-NEXT: andi a1, a2, 8
12837 ; RV32ZVE32F-NEXT: bnez a1, .LBB104_13
12838 ; RV32ZVE32F-NEXT: .LBB104_4: # %else8
12839 ; RV32ZVE32F-NEXT: andi a1, a2, 16
12840 ; RV32ZVE32F-NEXT: bnez a1, .LBB104_14
12841 ; RV32ZVE32F-NEXT: .LBB104_5: # %else11
12842 ; RV32ZVE32F-NEXT: andi a1, a2, 32
12843 ; RV32ZVE32F-NEXT: bnez a1, .LBB104_15
12844 ; RV32ZVE32F-NEXT: .LBB104_6: # %else14
12845 ; RV32ZVE32F-NEXT: andi a1, a2, 64
12846 ; RV32ZVE32F-NEXT: bnez a1, .LBB104_16
12847 ; RV32ZVE32F-NEXT: .LBB104_7: # %else17
12848 ; RV32ZVE32F-NEXT: andi a1, a2, -128
12849 ; RV32ZVE32F-NEXT: beqz a1, .LBB104_9
12850 ; RV32ZVE32F-NEXT: .LBB104_8: # %cond.load19
12851 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12852 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
12853 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
12854 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
12855 ; RV32ZVE32F-NEXT: .LBB104_9: # %else20
12856 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
12857 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
12858 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
12859 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
12860 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
12861 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
12862 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
12863 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
12864 ; RV32ZVE32F-NEXT: ret
12865 ; RV32ZVE32F-NEXT: .LBB104_10: # %cond.load
12866 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
12867 ; RV32ZVE32F-NEXT: fld fa0, 0(a1)
12868 ; RV32ZVE32F-NEXT: andi a1, a2, 2
12869 ; RV32ZVE32F-NEXT: beqz a1, .LBB104_2
12870 ; RV32ZVE32F-NEXT: .LBB104_11: # %cond.load1
12871 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12872 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
12873 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12874 ; RV32ZVE32F-NEXT: fld fa1, 0(a1)
12875 ; RV32ZVE32F-NEXT: andi a1, a2, 4
12876 ; RV32ZVE32F-NEXT: beqz a1, .LBB104_3
12877 ; RV32ZVE32F-NEXT: .LBB104_12: # %cond.load4
12878 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12879 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
12880 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12881 ; RV32ZVE32F-NEXT: fld fa2, 0(a1)
12882 ; RV32ZVE32F-NEXT: andi a1, a2, 8
12883 ; RV32ZVE32F-NEXT: beqz a1, .LBB104_4
12884 ; RV32ZVE32F-NEXT: .LBB104_13: # %cond.load7
12885 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12886 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
12887 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12888 ; RV32ZVE32F-NEXT: fld fa3, 0(a1)
12889 ; RV32ZVE32F-NEXT: andi a1, a2, 16
12890 ; RV32ZVE32F-NEXT: beqz a1, .LBB104_5
12891 ; RV32ZVE32F-NEXT: .LBB104_14: # %cond.load10
12892 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12893 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
12894 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12895 ; RV32ZVE32F-NEXT: fld fa4, 0(a1)
12896 ; RV32ZVE32F-NEXT: andi a1, a2, 32
12897 ; RV32ZVE32F-NEXT: beqz a1, .LBB104_6
12898 ; RV32ZVE32F-NEXT: .LBB104_15: # %cond.load13
12899 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12900 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
12901 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12902 ; RV32ZVE32F-NEXT: fld fa5, 0(a1)
12903 ; RV32ZVE32F-NEXT: andi a1, a2, 64
12904 ; RV32ZVE32F-NEXT: beqz a1, .LBB104_7
12905 ; RV32ZVE32F-NEXT: .LBB104_16: # %cond.load16
12906 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12907 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
12908 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
12909 ; RV32ZVE32F-NEXT: fld fa6, 0(a1)
12910 ; RV32ZVE32F-NEXT: andi a1, a2, -128
12911 ; RV32ZVE32F-NEXT: bnez a1, .LBB104_8
12912 ; RV32ZVE32F-NEXT: j .LBB104_9
12914 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64:
12915 ; RV64ZVE32F: # %bb.0:
12916 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12917 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
12918 ; RV64ZVE32F-NEXT: andi a3, a2, 1
12919 ; RV64ZVE32F-NEXT: beqz a3, .LBB104_2
12920 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
12921 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
12922 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12923 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12924 ; RV64ZVE32F-NEXT: add a3, a1, a3
12925 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
12926 ; RV64ZVE32F-NEXT: .LBB104_2: # %else
12927 ; RV64ZVE32F-NEXT: andi a3, a2, 2
12928 ; RV64ZVE32F-NEXT: beqz a3, .LBB104_4
12929 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
12930 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12931 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
12932 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
12933 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12934 ; RV64ZVE32F-NEXT: add a3, a1, a3
12935 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
12936 ; RV64ZVE32F-NEXT: .LBB104_4: # %else2
12937 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
12938 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
12939 ; RV64ZVE32F-NEXT: andi a3, a2, 4
12940 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
12941 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
12942 ; RV64ZVE32F-NEXT: bnez a3, .LBB104_14
12943 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
12944 ; RV64ZVE32F-NEXT: andi a3, a2, 8
12945 ; RV64ZVE32F-NEXT: bnez a3, .LBB104_15
12946 ; RV64ZVE32F-NEXT: .LBB104_6: # %else8
12947 ; RV64ZVE32F-NEXT: andi a3, a2, 16
12948 ; RV64ZVE32F-NEXT: bnez a3, .LBB104_16
12949 ; RV64ZVE32F-NEXT: .LBB104_7: # %else11
12950 ; RV64ZVE32F-NEXT: andi a3, a2, 32
12951 ; RV64ZVE32F-NEXT: beqz a3, .LBB104_9
12952 ; RV64ZVE32F-NEXT: .LBB104_8: # %cond.load13
12953 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
12954 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12955 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12956 ; RV64ZVE32F-NEXT: add a3, a1, a3
12957 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
12958 ; RV64ZVE32F-NEXT: .LBB104_9: # %else14
12959 ; RV64ZVE32F-NEXT: andi a3, a2, 64
12960 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
12961 ; RV64ZVE32F-NEXT: beqz a3, .LBB104_11
12962 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
12963 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12964 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12965 ; RV64ZVE32F-NEXT: add a3, a1, a3
12966 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
12967 ; RV64ZVE32F-NEXT: .LBB104_11: # %else17
12968 ; RV64ZVE32F-NEXT: andi a2, a2, -128
12969 ; RV64ZVE32F-NEXT: beqz a2, .LBB104_13
12970 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
12971 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
12972 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12973 ; RV64ZVE32F-NEXT: slli a2, a2, 3
12974 ; RV64ZVE32F-NEXT: add a1, a1, a2
12975 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
12976 ; RV64ZVE32F-NEXT: .LBB104_13: # %else20
12977 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
12978 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
12979 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
12980 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
12981 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
12982 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
12983 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
12984 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
12985 ; RV64ZVE32F-NEXT: ret
12986 ; RV64ZVE32F-NEXT: .LBB104_14: # %cond.load4
12987 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12988 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12989 ; RV64ZVE32F-NEXT: add a3, a1, a3
12990 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
12991 ; RV64ZVE32F-NEXT: andi a3, a2, 8
12992 ; RV64ZVE32F-NEXT: beqz a3, .LBB104_6
12993 ; RV64ZVE32F-NEXT: .LBB104_15: # %cond.load7
12994 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
12995 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
12996 ; RV64ZVE32F-NEXT: slli a3, a3, 3
12997 ; RV64ZVE32F-NEXT: add a3, a1, a3
12998 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
12999 ; RV64ZVE32F-NEXT: andi a3, a2, 16
13000 ; RV64ZVE32F-NEXT: beqz a3, .LBB104_7
13001 ; RV64ZVE32F-NEXT: .LBB104_16: # %cond.load10
13002 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
13003 ; RV64ZVE32F-NEXT: slli a3, a3, 3
13004 ; RV64ZVE32F-NEXT: add a3, a1, a3
13005 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
13006 ; RV64ZVE32F-NEXT: andi a3, a2, 32
13007 ; RV64ZVE32F-NEXT: bnez a3, .LBB104_8
13008 ; RV64ZVE32F-NEXT: j .LBB104_9
13009 %eidxs = sext <8 x i32> %idxs to <8 x i64>
13010 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
13011 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
13012 ret <8 x double> %v
13015 define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
13016 ; RV32V-LABEL: mgather_baseidx_zext_v8i32_v8f64:
13018 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
13019 ; RV32V-NEXT: vsll.vi v8, v8, 3
13020 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
13021 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
13022 ; RV32V-NEXT: vmv.v.v v8, v12
13025 ; RV64V-LABEL: mgather_baseidx_zext_v8i32_v8f64:
13027 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
13028 ; RV64V-NEXT: vzext.vf2 v16, v8
13029 ; RV64V-NEXT: vsll.vi v8, v16, 3
13030 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
13031 ; RV64V-NEXT: vmv.v.v v8, v12
13034 ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64:
13035 ; RV32ZVE32F: # %bb.0:
13036 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
13037 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
13038 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
13039 ; RV32ZVE32F-NEXT: vmv.x.s a2, v0
13040 ; RV32ZVE32F-NEXT: andi a3, a2, 1
13041 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
13042 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
13043 ; RV32ZVE32F-NEXT: bnez a3, .LBB105_10
13044 ; RV32ZVE32F-NEXT: # %bb.1: # %else
13045 ; RV32ZVE32F-NEXT: andi a1, a2, 2
13046 ; RV32ZVE32F-NEXT: bnez a1, .LBB105_11
13047 ; RV32ZVE32F-NEXT: .LBB105_2: # %else2
13048 ; RV32ZVE32F-NEXT: andi a1, a2, 4
13049 ; RV32ZVE32F-NEXT: bnez a1, .LBB105_12
13050 ; RV32ZVE32F-NEXT: .LBB105_3: # %else5
13051 ; RV32ZVE32F-NEXT: andi a1, a2, 8
13052 ; RV32ZVE32F-NEXT: bnez a1, .LBB105_13
13053 ; RV32ZVE32F-NEXT: .LBB105_4: # %else8
13054 ; RV32ZVE32F-NEXT: andi a1, a2, 16
13055 ; RV32ZVE32F-NEXT: bnez a1, .LBB105_14
13056 ; RV32ZVE32F-NEXT: .LBB105_5: # %else11
13057 ; RV32ZVE32F-NEXT: andi a1, a2, 32
13058 ; RV32ZVE32F-NEXT: bnez a1, .LBB105_15
13059 ; RV32ZVE32F-NEXT: .LBB105_6: # %else14
13060 ; RV32ZVE32F-NEXT: andi a1, a2, 64
13061 ; RV32ZVE32F-NEXT: bnez a1, .LBB105_16
13062 ; RV32ZVE32F-NEXT: .LBB105_7: # %else17
13063 ; RV32ZVE32F-NEXT: andi a1, a2, -128
13064 ; RV32ZVE32F-NEXT: beqz a1, .LBB105_9
13065 ; RV32ZVE32F-NEXT: .LBB105_8: # %cond.load19
13066 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13067 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
13068 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
13069 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
13070 ; RV32ZVE32F-NEXT: .LBB105_9: # %else20
13071 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
13072 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
13073 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
13074 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
13075 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
13076 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
13077 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
13078 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
13079 ; RV32ZVE32F-NEXT: ret
13080 ; RV32ZVE32F-NEXT: .LBB105_10: # %cond.load
13081 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
13082 ; RV32ZVE32F-NEXT: fld fa0, 0(a1)
13083 ; RV32ZVE32F-NEXT: andi a1, a2, 2
13084 ; RV32ZVE32F-NEXT: beqz a1, .LBB105_2
13085 ; RV32ZVE32F-NEXT: .LBB105_11: # %cond.load1
13086 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
13087 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
13088 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
13089 ; RV32ZVE32F-NEXT: fld fa1, 0(a1)
13090 ; RV32ZVE32F-NEXT: andi a1, a2, 4
13091 ; RV32ZVE32F-NEXT: beqz a1, .LBB105_3
13092 ; RV32ZVE32F-NEXT: .LBB105_12: # %cond.load4
13093 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
13094 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
13095 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
13096 ; RV32ZVE32F-NEXT: fld fa2, 0(a1)
13097 ; RV32ZVE32F-NEXT: andi a1, a2, 8
13098 ; RV32ZVE32F-NEXT: beqz a1, .LBB105_4
13099 ; RV32ZVE32F-NEXT: .LBB105_13: # %cond.load7
13100 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
13101 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
13102 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
13103 ; RV32ZVE32F-NEXT: fld fa3, 0(a1)
13104 ; RV32ZVE32F-NEXT: andi a1, a2, 16
13105 ; RV32ZVE32F-NEXT: beqz a1, .LBB105_5
13106 ; RV32ZVE32F-NEXT: .LBB105_14: # %cond.load10
13107 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13108 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
13109 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
13110 ; RV32ZVE32F-NEXT: fld fa4, 0(a1)
13111 ; RV32ZVE32F-NEXT: andi a1, a2, 32
13112 ; RV32ZVE32F-NEXT: beqz a1, .LBB105_6
13113 ; RV32ZVE32F-NEXT: .LBB105_15: # %cond.load13
13114 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13115 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
13116 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
13117 ; RV32ZVE32F-NEXT: fld fa5, 0(a1)
13118 ; RV32ZVE32F-NEXT: andi a1, a2, 64
13119 ; RV32ZVE32F-NEXT: beqz a1, .LBB105_7
13120 ; RV32ZVE32F-NEXT: .LBB105_16: # %cond.load16
13121 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13122 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
13123 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
13124 ; RV32ZVE32F-NEXT: fld fa6, 0(a1)
13125 ; RV32ZVE32F-NEXT: andi a1, a2, -128
13126 ; RV32ZVE32F-NEXT: bnez a1, .LBB105_8
13127 ; RV32ZVE32F-NEXT: j .LBB105_9
13129 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64:
13130 ; RV64ZVE32F: # %bb.0:
13131 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
13132 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
13133 ; RV64ZVE32F-NEXT: andi a3, a2, 1
13134 ; RV64ZVE32F-NEXT: beqz a3, .LBB105_2
13135 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
13136 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
13137 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
13138 ; RV64ZVE32F-NEXT: slli a3, a3, 32
13139 ; RV64ZVE32F-NEXT: srli a3, a3, 29
13140 ; RV64ZVE32F-NEXT: add a3, a1, a3
13141 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
13142 ; RV64ZVE32F-NEXT: .LBB105_2: # %else
13143 ; RV64ZVE32F-NEXT: andi a3, a2, 2
13144 ; RV64ZVE32F-NEXT: beqz a3, .LBB105_4
13145 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
13146 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
13147 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
13148 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
13149 ; RV64ZVE32F-NEXT: slli a3, a3, 32
13150 ; RV64ZVE32F-NEXT: srli a3, a3, 29
13151 ; RV64ZVE32F-NEXT: add a3, a1, a3
13152 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
13153 ; RV64ZVE32F-NEXT: .LBB105_4: # %else2
13154 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
13155 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
13156 ; RV64ZVE32F-NEXT: andi a3, a2, 4
13157 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
13158 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
13159 ; RV64ZVE32F-NEXT: bnez a3, .LBB105_14
13160 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
13161 ; RV64ZVE32F-NEXT: andi a3, a2, 8
13162 ; RV64ZVE32F-NEXT: bnez a3, .LBB105_15
13163 ; RV64ZVE32F-NEXT: .LBB105_6: # %else8
13164 ; RV64ZVE32F-NEXT: andi a3, a2, 16
13165 ; RV64ZVE32F-NEXT: bnez a3, .LBB105_16
13166 ; RV64ZVE32F-NEXT: .LBB105_7: # %else11
13167 ; RV64ZVE32F-NEXT: andi a3, a2, 32
13168 ; RV64ZVE32F-NEXT: beqz a3, .LBB105_9
13169 ; RV64ZVE32F-NEXT: .LBB105_8: # %cond.load13
13170 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
13171 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
13172 ; RV64ZVE32F-NEXT: slli a3, a3, 32
13173 ; RV64ZVE32F-NEXT: srli a3, a3, 29
13174 ; RV64ZVE32F-NEXT: add a3, a1, a3
13175 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
13176 ; RV64ZVE32F-NEXT: .LBB105_9: # %else14
13177 ; RV64ZVE32F-NEXT: andi a3, a2, 64
13178 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
13179 ; RV64ZVE32F-NEXT: beqz a3, .LBB105_11
13180 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
13181 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
13182 ; RV64ZVE32F-NEXT: slli a3, a3, 32
13183 ; RV64ZVE32F-NEXT: srli a3, a3, 29
13184 ; RV64ZVE32F-NEXT: add a3, a1, a3
13185 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
13186 ; RV64ZVE32F-NEXT: .LBB105_11: # %else17
13187 ; RV64ZVE32F-NEXT: andi a2, a2, -128
13188 ; RV64ZVE32F-NEXT: beqz a2, .LBB105_13
13189 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
13190 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
13191 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
13192 ; RV64ZVE32F-NEXT: slli a2, a2, 32
13193 ; RV64ZVE32F-NEXT: srli a2, a2, 29
13194 ; RV64ZVE32F-NEXT: add a1, a1, a2
13195 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
13196 ; RV64ZVE32F-NEXT: .LBB105_13: # %else20
13197 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
13198 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
13199 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
13200 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
13201 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
13202 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
13203 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
13204 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
13205 ; RV64ZVE32F-NEXT: ret
13206 ; RV64ZVE32F-NEXT: .LBB105_14: # %cond.load4
13207 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
13208 ; RV64ZVE32F-NEXT: slli a3, a3, 32
13209 ; RV64ZVE32F-NEXT: srli a3, a3, 29
13210 ; RV64ZVE32F-NEXT: add a3, a1, a3
13211 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
13212 ; RV64ZVE32F-NEXT: andi a3, a2, 8
13213 ; RV64ZVE32F-NEXT: beqz a3, .LBB105_6
13214 ; RV64ZVE32F-NEXT: .LBB105_15: # %cond.load7
13215 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
13216 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
13217 ; RV64ZVE32F-NEXT: slli a3, a3, 32
13218 ; RV64ZVE32F-NEXT: srli a3, a3, 29
13219 ; RV64ZVE32F-NEXT: add a3, a1, a3
13220 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
13221 ; RV64ZVE32F-NEXT: andi a3, a2, 16
13222 ; RV64ZVE32F-NEXT: beqz a3, .LBB105_7
13223 ; RV64ZVE32F-NEXT: .LBB105_16: # %cond.load10
13224 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
13225 ; RV64ZVE32F-NEXT: slli a3, a3, 32
13226 ; RV64ZVE32F-NEXT: srli a3, a3, 29
13227 ; RV64ZVE32F-NEXT: add a3, a1, a3
13228 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
13229 ; RV64ZVE32F-NEXT: andi a3, a2, 32
13230 ; RV64ZVE32F-NEXT: bnez a3, .LBB105_8
13231 ; RV64ZVE32F-NEXT: j .LBB105_9
13232 %eidxs = zext <8 x i32> %idxs to <8 x i64>
13233 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
13234 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
13235 ret <8 x double> %v
13238 define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, <8 x double> %passthru) {
13239 ; RV32V-LABEL: mgather_baseidx_v8f64:
13241 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
13242 ; RV32V-NEXT: vnsrl.wi v16, v8, 0
13243 ; RV32V-NEXT: vsll.vi v8, v16, 3
13244 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
13245 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
13246 ; RV32V-NEXT: vmv.v.v v8, v12
13249 ; RV64V-LABEL: mgather_baseidx_v8f64:
13251 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
13252 ; RV64V-NEXT: vsll.vi v8, v8, 3
13253 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
13254 ; RV64V-NEXT: vmv.v.v v8, v12
13257 ; RV32ZVE32F-LABEL: mgather_baseidx_v8f64:
13258 ; RV32ZVE32F: # %bb.0:
13259 ; RV32ZVE32F-NEXT: lw a3, 32(a2)
13260 ; RV32ZVE32F-NEXT: lw a4, 40(a2)
13261 ; RV32ZVE32F-NEXT: lw a5, 48(a2)
13262 ; RV32ZVE32F-NEXT: lw a6, 56(a2)
13263 ; RV32ZVE32F-NEXT: lw a7, 0(a2)
13264 ; RV32ZVE32F-NEXT: lw t0, 8(a2)
13265 ; RV32ZVE32F-NEXT: lw t1, 16(a2)
13266 ; RV32ZVE32F-NEXT: lw t2, 24(a2)
13267 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
13268 ; RV32ZVE32F-NEXT: vmv.v.x v8, a7
13269 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
13270 ; RV32ZVE32F-NEXT: vmv.x.s a2, v0
13271 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
13272 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
13273 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
13274 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t2
13275 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3
13276 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
13277 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
13278 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
13279 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
13280 ; RV32ZVE32F-NEXT: andi a3, a2, 1
13281 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
13282 ; RV32ZVE32F-NEXT: bnez a3, .LBB106_10
13283 ; RV32ZVE32F-NEXT: # %bb.1: # %else
13284 ; RV32ZVE32F-NEXT: andi a1, a2, 2
13285 ; RV32ZVE32F-NEXT: bnez a1, .LBB106_11
13286 ; RV32ZVE32F-NEXT: .LBB106_2: # %else2
13287 ; RV32ZVE32F-NEXT: andi a1, a2, 4
13288 ; RV32ZVE32F-NEXT: bnez a1, .LBB106_12
13289 ; RV32ZVE32F-NEXT: .LBB106_3: # %else5
13290 ; RV32ZVE32F-NEXT: andi a1, a2, 8
13291 ; RV32ZVE32F-NEXT: bnez a1, .LBB106_13
13292 ; RV32ZVE32F-NEXT: .LBB106_4: # %else8
13293 ; RV32ZVE32F-NEXT: andi a1, a2, 16
13294 ; RV32ZVE32F-NEXT: bnez a1, .LBB106_14
13295 ; RV32ZVE32F-NEXT: .LBB106_5: # %else11
13296 ; RV32ZVE32F-NEXT: andi a1, a2, 32
13297 ; RV32ZVE32F-NEXT: bnez a1, .LBB106_15
13298 ; RV32ZVE32F-NEXT: .LBB106_6: # %else14
13299 ; RV32ZVE32F-NEXT: andi a1, a2, 64
13300 ; RV32ZVE32F-NEXT: bnez a1, .LBB106_16
13301 ; RV32ZVE32F-NEXT: .LBB106_7: # %else17
13302 ; RV32ZVE32F-NEXT: andi a1, a2, -128
13303 ; RV32ZVE32F-NEXT: beqz a1, .LBB106_9
13304 ; RV32ZVE32F-NEXT: .LBB106_8: # %cond.load19
13305 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13306 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
13307 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
13308 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
13309 ; RV32ZVE32F-NEXT: .LBB106_9: # %else20
13310 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
13311 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
13312 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
13313 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
13314 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
13315 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
13316 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
13317 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
13318 ; RV32ZVE32F-NEXT: ret
13319 ; RV32ZVE32F-NEXT: .LBB106_10: # %cond.load
13320 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
13321 ; RV32ZVE32F-NEXT: fld fa0, 0(a1)
13322 ; RV32ZVE32F-NEXT: andi a1, a2, 2
13323 ; RV32ZVE32F-NEXT: beqz a1, .LBB106_2
13324 ; RV32ZVE32F-NEXT: .LBB106_11: # %cond.load1
13325 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
13326 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
13327 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
13328 ; RV32ZVE32F-NEXT: fld fa1, 0(a1)
13329 ; RV32ZVE32F-NEXT: andi a1, a2, 4
13330 ; RV32ZVE32F-NEXT: beqz a1, .LBB106_3
13331 ; RV32ZVE32F-NEXT: .LBB106_12: # %cond.load4
13332 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
13333 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
13334 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
13335 ; RV32ZVE32F-NEXT: fld fa2, 0(a1)
13336 ; RV32ZVE32F-NEXT: andi a1, a2, 8
13337 ; RV32ZVE32F-NEXT: beqz a1, .LBB106_4
13338 ; RV32ZVE32F-NEXT: .LBB106_13: # %cond.load7
13339 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
13340 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
13341 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
13342 ; RV32ZVE32F-NEXT: fld fa3, 0(a1)
13343 ; RV32ZVE32F-NEXT: andi a1, a2, 16
13344 ; RV32ZVE32F-NEXT: beqz a1, .LBB106_5
13345 ; RV32ZVE32F-NEXT: .LBB106_14: # %cond.load10
13346 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13347 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
13348 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
13349 ; RV32ZVE32F-NEXT: fld fa4, 0(a1)
13350 ; RV32ZVE32F-NEXT: andi a1, a2, 32
13351 ; RV32ZVE32F-NEXT: beqz a1, .LBB106_6
13352 ; RV32ZVE32F-NEXT: .LBB106_15: # %cond.load13
13353 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13354 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
13355 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
13356 ; RV32ZVE32F-NEXT: fld fa5, 0(a1)
13357 ; RV32ZVE32F-NEXT: andi a1, a2, 64
13358 ; RV32ZVE32F-NEXT: beqz a1, .LBB106_7
13359 ; RV32ZVE32F-NEXT: .LBB106_16: # %cond.load16
13360 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13361 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
13362 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
13363 ; RV32ZVE32F-NEXT: fld fa6, 0(a1)
13364 ; RV32ZVE32F-NEXT: andi a1, a2, -128
13365 ; RV32ZVE32F-NEXT: bnez a1, .LBB106_8
13366 ; RV32ZVE32F-NEXT: j .LBB106_9
13368 ; RV64ZVE32F-LABEL: mgather_baseidx_v8f64:
13369 ; RV64ZVE32F: # %bb.0:
13370 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
13371 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
13372 ; RV64ZVE32F-NEXT: andi a4, a3, 1
13373 ; RV64ZVE32F-NEXT: bnez a4, .LBB106_10
13374 ; RV64ZVE32F-NEXT: # %bb.1: # %else
13375 ; RV64ZVE32F-NEXT: andi a4, a3, 2
13376 ; RV64ZVE32F-NEXT: bnez a4, .LBB106_11
13377 ; RV64ZVE32F-NEXT: .LBB106_2: # %else2
13378 ; RV64ZVE32F-NEXT: andi a4, a3, 4
13379 ; RV64ZVE32F-NEXT: bnez a4, .LBB106_12
13380 ; RV64ZVE32F-NEXT: .LBB106_3: # %else5
13381 ; RV64ZVE32F-NEXT: andi a4, a3, 8
13382 ; RV64ZVE32F-NEXT: bnez a4, .LBB106_13
13383 ; RV64ZVE32F-NEXT: .LBB106_4: # %else8
13384 ; RV64ZVE32F-NEXT: andi a4, a3, 16
13385 ; RV64ZVE32F-NEXT: bnez a4, .LBB106_14
13386 ; RV64ZVE32F-NEXT: .LBB106_5: # %else11
13387 ; RV64ZVE32F-NEXT: andi a4, a3, 32
13388 ; RV64ZVE32F-NEXT: bnez a4, .LBB106_15
13389 ; RV64ZVE32F-NEXT: .LBB106_6: # %else14
13390 ; RV64ZVE32F-NEXT: andi a4, a3, 64
13391 ; RV64ZVE32F-NEXT: bnez a4, .LBB106_16
13392 ; RV64ZVE32F-NEXT: .LBB106_7: # %else17
13393 ; RV64ZVE32F-NEXT: andi a3, a3, -128
13394 ; RV64ZVE32F-NEXT: beqz a3, .LBB106_9
13395 ; RV64ZVE32F-NEXT: .LBB106_8: # %cond.load19
13396 ; RV64ZVE32F-NEXT: ld a2, 56(a2)
13397 ; RV64ZVE32F-NEXT: slli a2, a2, 3
13398 ; RV64ZVE32F-NEXT: add a1, a1, a2
13399 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
13400 ; RV64ZVE32F-NEXT: .LBB106_9: # %else20
13401 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
13402 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
13403 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
13404 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
13405 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
13406 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
13407 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
13408 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
13409 ; RV64ZVE32F-NEXT: ret
13410 ; RV64ZVE32F-NEXT: .LBB106_10: # %cond.load
13411 ; RV64ZVE32F-NEXT: ld a4, 0(a2)
13412 ; RV64ZVE32F-NEXT: slli a4, a4, 3
13413 ; RV64ZVE32F-NEXT: add a4, a1, a4
13414 ; RV64ZVE32F-NEXT: fld fa0, 0(a4)
13415 ; RV64ZVE32F-NEXT: andi a4, a3, 2
13416 ; RV64ZVE32F-NEXT: beqz a4, .LBB106_2
13417 ; RV64ZVE32F-NEXT: .LBB106_11: # %cond.load1
13418 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
13419 ; RV64ZVE32F-NEXT: slli a4, a4, 3
13420 ; RV64ZVE32F-NEXT: add a4, a1, a4
13421 ; RV64ZVE32F-NEXT: fld fa1, 0(a4)
13422 ; RV64ZVE32F-NEXT: andi a4, a3, 4
13423 ; RV64ZVE32F-NEXT: beqz a4, .LBB106_3
13424 ; RV64ZVE32F-NEXT: .LBB106_12: # %cond.load4
13425 ; RV64ZVE32F-NEXT: ld a4, 16(a2)
13426 ; RV64ZVE32F-NEXT: slli a4, a4, 3
13427 ; RV64ZVE32F-NEXT: add a4, a1, a4
13428 ; RV64ZVE32F-NEXT: fld fa2, 0(a4)
13429 ; RV64ZVE32F-NEXT: andi a4, a3, 8
13430 ; RV64ZVE32F-NEXT: beqz a4, .LBB106_4
13431 ; RV64ZVE32F-NEXT: .LBB106_13: # %cond.load7
13432 ; RV64ZVE32F-NEXT: ld a4, 24(a2)
13433 ; RV64ZVE32F-NEXT: slli a4, a4, 3
13434 ; RV64ZVE32F-NEXT: add a4, a1, a4
13435 ; RV64ZVE32F-NEXT: fld fa3, 0(a4)
13436 ; RV64ZVE32F-NEXT: andi a4, a3, 16
13437 ; RV64ZVE32F-NEXT: beqz a4, .LBB106_5
13438 ; RV64ZVE32F-NEXT: .LBB106_14: # %cond.load10
13439 ; RV64ZVE32F-NEXT: ld a4, 32(a2)
13440 ; RV64ZVE32F-NEXT: slli a4, a4, 3
13441 ; RV64ZVE32F-NEXT: add a4, a1, a4
13442 ; RV64ZVE32F-NEXT: fld fa4, 0(a4)
13443 ; RV64ZVE32F-NEXT: andi a4, a3, 32
13444 ; RV64ZVE32F-NEXT: beqz a4, .LBB106_6
13445 ; RV64ZVE32F-NEXT: .LBB106_15: # %cond.load13
13446 ; RV64ZVE32F-NEXT: ld a4, 40(a2)
13447 ; RV64ZVE32F-NEXT: slli a4, a4, 3
13448 ; RV64ZVE32F-NEXT: add a4, a1, a4
13449 ; RV64ZVE32F-NEXT: fld fa5, 0(a4)
13450 ; RV64ZVE32F-NEXT: andi a4, a3, 64
13451 ; RV64ZVE32F-NEXT: beqz a4, .LBB106_7
13452 ; RV64ZVE32F-NEXT: .LBB106_16: # %cond.load16
13453 ; RV64ZVE32F-NEXT: ld a4, 48(a2)
13454 ; RV64ZVE32F-NEXT: slli a4, a4, 3
13455 ; RV64ZVE32F-NEXT: add a4, a1, a4
13456 ; RV64ZVE32F-NEXT: fld fa6, 0(a4)
13457 ; RV64ZVE32F-NEXT: andi a3, a3, -128
13458 ; RV64ZVE32F-NEXT: bnez a3, .LBB106_8
13459 ; RV64ZVE32F-NEXT: j .LBB106_9
13460 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
13461 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
13462 ret <8 x double> %v
13465 declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>)
13467 define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m, <16 x i8> %passthru) {
13468 ; RV32-LABEL: mgather_baseidx_v16i8:
13470 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
13471 ; RV32-NEXT: vsext.vf4 v12, v8
13472 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
13473 ; RV32-NEXT: vluxei32.v v9, (a0), v12, v0.t
13474 ; RV32-NEXT: vmv.v.v v8, v9
13477 ; RV64V-LABEL: mgather_baseidx_v16i8:
13479 ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
13480 ; RV64V-NEXT: vsext.vf8 v16, v8
13481 ; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu
13482 ; RV64V-NEXT: vluxei64.v v9, (a0), v16, v0.t
13483 ; RV64V-NEXT: vmv.v.v v8, v9
13486 ; RV64ZVE32F-LABEL: mgather_baseidx_v16i8:
13487 ; RV64ZVE32F: # %bb.0:
13488 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
13489 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
13490 ; RV64ZVE32F-NEXT: andi a2, a1, 1
13491 ; RV64ZVE32F-NEXT: beqz a2, .LBB107_2
13492 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
13493 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, tu, ma
13494 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
13495 ; RV64ZVE32F-NEXT: add a2, a0, a2
13496 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13497 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13498 ; RV64ZVE32F-NEXT: .LBB107_2: # %else
13499 ; RV64ZVE32F-NEXT: andi a2, a1, 2
13500 ; RV64ZVE32F-NEXT: beqz a2, .LBB107_4
13501 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
13502 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13503 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
13504 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
13505 ; RV64ZVE32F-NEXT: add a2, a0, a2
13506 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13507 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
13508 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
13509 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
13510 ; RV64ZVE32F-NEXT: .LBB107_4: # %else2
13511 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
13512 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
13513 ; RV64ZVE32F-NEXT: andi a2, a1, 4
13514 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
13515 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
13516 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_25
13517 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
13518 ; RV64ZVE32F-NEXT: andi a2, a1, 8
13519 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_26
13520 ; RV64ZVE32F-NEXT: .LBB107_6: # %else8
13521 ; RV64ZVE32F-NEXT: andi a2, a1, 16
13522 ; RV64ZVE32F-NEXT: beqz a2, .LBB107_8
13523 ; RV64ZVE32F-NEXT: .LBB107_7: # %cond.load10
13524 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma
13525 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
13526 ; RV64ZVE32F-NEXT: add a2, a0, a2
13527 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13528 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
13529 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 4
13530 ; RV64ZVE32F-NEXT: .LBB107_8: # %else11
13531 ; RV64ZVE32F-NEXT: andi a2, a1, 32
13532 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
13533 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8
13534 ; RV64ZVE32F-NEXT: beqz a2, .LBB107_10
13535 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
13536 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13537 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
13538 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
13539 ; RV64ZVE32F-NEXT: add a2, a0, a2
13540 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13541 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
13542 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma
13543 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 5
13544 ; RV64ZVE32F-NEXT: .LBB107_10: # %else14
13545 ; RV64ZVE32F-NEXT: andi a2, a1, 64
13546 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
13547 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
13548 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_27
13549 ; RV64ZVE32F-NEXT: # %bb.11: # %else17
13550 ; RV64ZVE32F-NEXT: andi a2, a1, 128
13551 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_28
13552 ; RV64ZVE32F-NEXT: .LBB107_12: # %else20
13553 ; RV64ZVE32F-NEXT: andi a2, a1, 256
13554 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_29
13555 ; RV64ZVE32F-NEXT: .LBB107_13: # %else23
13556 ; RV64ZVE32F-NEXT: andi a2, a1, 512
13557 ; RV64ZVE32F-NEXT: beqz a2, .LBB107_15
13558 ; RV64ZVE32F-NEXT: .LBB107_14: # %cond.load25
13559 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13560 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
13561 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
13562 ; RV64ZVE32F-NEXT: add a2, a0, a2
13563 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13564 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
13565 ; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma
13566 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 9
13567 ; RV64ZVE32F-NEXT: .LBB107_15: # %else26
13568 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
13569 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
13570 ; RV64ZVE32F-NEXT: andi a2, a1, 1024
13571 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
13572 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
13573 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_30
13574 ; RV64ZVE32F-NEXT: # %bb.16: # %else29
13575 ; RV64ZVE32F-NEXT: slli a2, a1, 52
13576 ; RV64ZVE32F-NEXT: bltz a2, .LBB107_31
13577 ; RV64ZVE32F-NEXT: .LBB107_17: # %else32
13578 ; RV64ZVE32F-NEXT: slli a2, a1, 51
13579 ; RV64ZVE32F-NEXT: bltz a2, .LBB107_32
13580 ; RV64ZVE32F-NEXT: .LBB107_18: # %else35
13581 ; RV64ZVE32F-NEXT: slli a2, a1, 50
13582 ; RV64ZVE32F-NEXT: bgez a2, .LBB107_20
13583 ; RV64ZVE32F-NEXT: .LBB107_19: # %cond.load37
13584 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13585 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
13586 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
13587 ; RV64ZVE32F-NEXT: add a2, a0, a2
13588 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13589 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
13590 ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma
13591 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 13
13592 ; RV64ZVE32F-NEXT: .LBB107_20: # %else38
13593 ; RV64ZVE32F-NEXT: slli a2, a1, 49
13594 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
13595 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
13596 ; RV64ZVE32F-NEXT: bgez a2, .LBB107_22
13597 ; RV64ZVE32F-NEXT: # %bb.21: # %cond.load40
13598 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
13599 ; RV64ZVE32F-NEXT: add a2, a0, a2
13600 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13601 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
13602 ; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma
13603 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 14
13604 ; RV64ZVE32F-NEXT: .LBB107_22: # %else41
13605 ; RV64ZVE32F-NEXT: lui a2, 1048568
13606 ; RV64ZVE32F-NEXT: and a1, a1, a2
13607 ; RV64ZVE32F-NEXT: beqz a1, .LBB107_24
13608 ; RV64ZVE32F-NEXT: # %bb.23: # %cond.load43
13609 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13610 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
13611 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
13612 ; RV64ZVE32F-NEXT: add a0, a0, a1
13613 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
13614 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
13615 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma
13616 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 15
13617 ; RV64ZVE32F-NEXT: .LBB107_24: # %else44
13618 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
13619 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
13620 ; RV64ZVE32F-NEXT: ret
13621 ; RV64ZVE32F-NEXT: .LBB107_25: # %cond.load4
13622 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
13623 ; RV64ZVE32F-NEXT: add a2, a0, a2
13624 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13625 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
13626 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma
13627 ; RV64ZVE32F-NEXT: vslideup.vi v9, v12, 2
13628 ; RV64ZVE32F-NEXT: andi a2, a1, 8
13629 ; RV64ZVE32F-NEXT: beqz a2, .LBB107_6
13630 ; RV64ZVE32F-NEXT: .LBB107_26: # %cond.load7
13631 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13632 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
13633 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
13634 ; RV64ZVE32F-NEXT: add a2, a0, a2
13635 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13636 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
13637 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma
13638 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 3
13639 ; RV64ZVE32F-NEXT: andi a2, a1, 16
13640 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_7
13641 ; RV64ZVE32F-NEXT: j .LBB107_8
13642 ; RV64ZVE32F-NEXT: .LBB107_27: # %cond.load16
13643 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
13644 ; RV64ZVE32F-NEXT: add a2, a0, a2
13645 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13646 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
13647 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma
13648 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 6
13649 ; RV64ZVE32F-NEXT: andi a2, a1, 128
13650 ; RV64ZVE32F-NEXT: beqz a2, .LBB107_12
13651 ; RV64ZVE32F-NEXT: .LBB107_28: # %cond.load19
13652 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13653 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
13654 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
13655 ; RV64ZVE32F-NEXT: add a2, a0, a2
13656 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13657 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
13658 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
13659 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 7
13660 ; RV64ZVE32F-NEXT: andi a2, a1, 256
13661 ; RV64ZVE32F-NEXT: beqz a2, .LBB107_13
13662 ; RV64ZVE32F-NEXT: .LBB107_29: # %cond.load22
13663 ; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
13664 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
13665 ; RV64ZVE32F-NEXT: add a2, a0, a2
13666 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13667 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
13668 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 8
13669 ; RV64ZVE32F-NEXT: andi a2, a1, 512
13670 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_14
13671 ; RV64ZVE32F-NEXT: j .LBB107_15
13672 ; RV64ZVE32F-NEXT: .LBB107_30: # %cond.load28
13673 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
13674 ; RV64ZVE32F-NEXT: add a2, a0, a2
13675 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13676 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
13677 ; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma
13678 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 10
13679 ; RV64ZVE32F-NEXT: slli a2, a1, 52
13680 ; RV64ZVE32F-NEXT: bgez a2, .LBB107_17
13681 ; RV64ZVE32F-NEXT: .LBB107_31: # %cond.load31
13682 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13683 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
13684 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
13685 ; RV64ZVE32F-NEXT: add a2, a0, a2
13686 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13687 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
13688 ; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma
13689 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 11
13690 ; RV64ZVE32F-NEXT: slli a2, a1, 51
13691 ; RV64ZVE32F-NEXT: bgez a2, .LBB107_18
13692 ; RV64ZVE32F-NEXT: .LBB107_32: # %cond.load34
13693 ; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma
13694 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
13695 ; RV64ZVE32F-NEXT: add a2, a0, a2
13696 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13697 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
13698 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 12
13699 ; RV64ZVE32F-NEXT: slli a2, a1, 50
13700 ; RV64ZVE32F-NEXT: bltz a2, .LBB107_19
13701 ; RV64ZVE32F-NEXT: j .LBB107_20
13702 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs
13703 %v = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 2, <16 x i1> %m, <16 x i8> %passthru)
13707 declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>)
13709 define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m, <32 x i8> %passthru) {
13710 ; RV32-LABEL: mgather_baseidx_v32i8:
13712 ; RV32-NEXT: li a1, 32
13713 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
13714 ; RV32-NEXT: vsext.vf4 v16, v8
13715 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu
13716 ; RV32-NEXT: vluxei32.v v10, (a0), v16, v0.t
13717 ; RV32-NEXT: vmv.v.v v8, v10
13720 ; RV64V-LABEL: mgather_baseidx_v32i8:
13722 ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
13723 ; RV64V-NEXT: vsext.vf8 v16, v8
13724 ; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma
13725 ; RV64V-NEXT: vslidedown.vi v12, v10, 16
13726 ; RV64V-NEXT: vslidedown.vi v14, v8, 16
13727 ; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
13728 ; RV64V-NEXT: vslidedown.vi v8, v0, 2
13729 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu
13730 ; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t
13731 ; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma
13732 ; RV64V-NEXT: vsext.vf8 v16, v14
13733 ; RV64V-NEXT: vmv1r.v v0, v8
13734 ; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu
13735 ; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t
13736 ; RV64V-NEXT: li a0, 32
13737 ; RV64V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
13738 ; RV64V-NEXT: vslideup.vi v10, v12, 16
13739 ; RV64V-NEXT: vmv.v.v v8, v10
13742 ; RV64ZVE32F-LABEL: mgather_baseidx_v32i8:
13743 ; RV64ZVE32F: # %bb.0:
13744 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
13745 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
13746 ; RV64ZVE32F-NEXT: andi a2, a1, 1
13747 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_2
13748 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
13749 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, tu, ma
13750 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
13751 ; RV64ZVE32F-NEXT: add a2, a0, a2
13752 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13753 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
13754 ; RV64ZVE32F-NEXT: .LBB108_2: # %else
13755 ; RV64ZVE32F-NEXT: andi a2, a1, 2
13756 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_4
13757 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
13758 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
13759 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
13760 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
13761 ; RV64ZVE32F-NEXT: add a2, a0, a2
13762 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13763 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
13764 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
13765 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
13766 ; RV64ZVE32F-NEXT: .LBB108_4: # %else2
13767 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
13768 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 4
13769 ; RV64ZVE32F-NEXT: andi a2, a1, 4
13770 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
13771 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
13772 ; RV64ZVE32F-NEXT: bnez a2, .LBB108_49
13773 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
13774 ; RV64ZVE32F-NEXT: andi a2, a1, 8
13775 ; RV64ZVE32F-NEXT: bnez a2, .LBB108_50
13776 ; RV64ZVE32F-NEXT: .LBB108_6: # %else8
13777 ; RV64ZVE32F-NEXT: andi a2, a1, 16
13778 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_8
13779 ; RV64ZVE32F-NEXT: .LBB108_7: # %cond.load10
13780 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma
13781 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
13782 ; RV64ZVE32F-NEXT: add a2, a0, a2
13783 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13784 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
13785 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
13786 ; RV64ZVE32F-NEXT: .LBB108_8: # %else11
13787 ; RV64ZVE32F-NEXT: andi a2, a1, 32
13788 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
13789 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 8
13790 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_10
13791 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
13792 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13793 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v13, 1
13794 ; RV64ZVE32F-NEXT: vmv.x.s a2, v14
13795 ; RV64ZVE32F-NEXT: add a2, a0, a2
13796 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13797 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2
13798 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma
13799 ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 5
13800 ; RV64ZVE32F-NEXT: .LBB108_10: # %else14
13801 ; RV64ZVE32F-NEXT: andi a2, a1, 64
13802 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
13803 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
13804 ; RV64ZVE32F-NEXT: bnez a2, .LBB108_51
13805 ; RV64ZVE32F-NEXT: # %bb.11: # %else17
13806 ; RV64ZVE32F-NEXT: andi a2, a1, 128
13807 ; RV64ZVE32F-NEXT: bnez a2, .LBB108_52
13808 ; RV64ZVE32F-NEXT: .LBB108_12: # %else20
13809 ; RV64ZVE32F-NEXT: andi a2, a1, 256
13810 ; RV64ZVE32F-NEXT: bnez a2, .LBB108_53
13811 ; RV64ZVE32F-NEXT: .LBB108_13: # %else23
13812 ; RV64ZVE32F-NEXT: andi a2, a1, 512
13813 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_15
13814 ; RV64ZVE32F-NEXT: .LBB108_14: # %cond.load25
13815 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13816 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
13817 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
13818 ; RV64ZVE32F-NEXT: add a2, a0, a2
13819 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13820 ; RV64ZVE32F-NEXT: vmv.s.x v13, a2
13821 ; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma
13822 ; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 9
13823 ; RV64ZVE32F-NEXT: .LBB108_15: # %else26
13824 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
13825 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4
13826 ; RV64ZVE32F-NEXT: andi a2, a1, 1024
13827 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
13828 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
13829 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_17
13830 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.load28
13831 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
13832 ; RV64ZVE32F-NEXT: add a2, a0, a2
13833 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13834 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2
13835 ; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma
13836 ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 10
13837 ; RV64ZVE32F-NEXT: .LBB108_17: # %else29
13838 ; RV64ZVE32F-NEXT: slli a2, a1, 52
13839 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_19
13840 ; RV64ZVE32F-NEXT: # %bb.18: # %cond.load31
13841 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13842 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
13843 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
13844 ; RV64ZVE32F-NEXT: add a2, a0, a2
13845 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13846 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
13847 ; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma
13848 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 11
13849 ; RV64ZVE32F-NEXT: .LBB108_19: # %else32
13850 ; RV64ZVE32F-NEXT: slli a2, a1, 51
13851 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma
13852 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 16
13853 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_21
13854 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34
13855 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
13856 ; RV64ZVE32F-NEXT: add a2, a0, a2
13857 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13858 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13859 ; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma
13860 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 12
13861 ; RV64ZVE32F-NEXT: .LBB108_21: # %else35
13862 ; RV64ZVE32F-NEXT: slli a2, a1, 50
13863 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_23
13864 ; RV64ZVE32F-NEXT: # %bb.22: # %cond.load37
13865 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13866 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 1
13867 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
13868 ; RV64ZVE32F-NEXT: add a2, a0, a2
13869 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13870 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13871 ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma
13872 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 13
13873 ; RV64ZVE32F-NEXT: .LBB108_23: # %else38
13874 ; RV64ZVE32F-NEXT: slli a2, a1, 49
13875 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
13876 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 2
13877 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_54
13878 ; RV64ZVE32F-NEXT: # %bb.24: # %else41
13879 ; RV64ZVE32F-NEXT: slli a2, a1, 48
13880 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_55
13881 ; RV64ZVE32F-NEXT: .LBB108_25: # %else44
13882 ; RV64ZVE32F-NEXT: slli a2, a1, 47
13883 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_56
13884 ; RV64ZVE32F-NEXT: .LBB108_26: # %else47
13885 ; RV64ZVE32F-NEXT: slli a2, a1, 46
13886 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_28
13887 ; RV64ZVE32F-NEXT: .LBB108_27: # %cond.load49
13888 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13889 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
13890 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
13891 ; RV64ZVE32F-NEXT: add a2, a0, a2
13892 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13893 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
13894 ; RV64ZVE32F-NEXT: vsetivli zero, 18, e8, m2, tu, ma
13895 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 17
13896 ; RV64ZVE32F-NEXT: .LBB108_28: # %else50
13897 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
13898 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
13899 ; RV64ZVE32F-NEXT: slli a2, a1, 45
13900 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
13901 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
13902 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_57
13903 ; RV64ZVE32F-NEXT: # %bb.29: # %else53
13904 ; RV64ZVE32F-NEXT: slli a2, a1, 44
13905 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_58
13906 ; RV64ZVE32F-NEXT: .LBB108_30: # %else56
13907 ; RV64ZVE32F-NEXT: slli a2, a1, 43
13908 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_32
13909 ; RV64ZVE32F-NEXT: .LBB108_31: # %cond.load58
13910 ; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, ma
13911 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
13912 ; RV64ZVE32F-NEXT: add a2, a0, a2
13913 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13914 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
13915 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 20
13916 ; RV64ZVE32F-NEXT: .LBB108_32: # %else59
13917 ; RV64ZVE32F-NEXT: slli a2, a1, 42
13918 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
13919 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8
13920 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_34
13921 ; RV64ZVE32F-NEXT: # %bb.33: # %cond.load61
13922 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13923 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 1
13924 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
13925 ; RV64ZVE32F-NEXT: add a2, a0, a2
13926 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13927 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
13928 ; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, ma
13929 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 21
13930 ; RV64ZVE32F-NEXT: .LBB108_34: # %else62
13931 ; RV64ZVE32F-NEXT: slli a2, a1, 41
13932 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
13933 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
13934 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_59
13935 ; RV64ZVE32F-NEXT: # %bb.35: # %else65
13936 ; RV64ZVE32F-NEXT: slli a2, a1, 40
13937 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_60
13938 ; RV64ZVE32F-NEXT: .LBB108_36: # %else68
13939 ; RV64ZVE32F-NEXT: slli a2, a1, 39
13940 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_61
13941 ; RV64ZVE32F-NEXT: .LBB108_37: # %else71
13942 ; RV64ZVE32F-NEXT: slli a2, a1, 38
13943 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_39
13944 ; RV64ZVE32F-NEXT: .LBB108_38: # %cond.load73
13945 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13946 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
13947 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
13948 ; RV64ZVE32F-NEXT: add a2, a0, a2
13949 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13950 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
13951 ; RV64ZVE32F-NEXT: vsetivli zero, 26, e8, m2, tu, ma
13952 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 25
13953 ; RV64ZVE32F-NEXT: .LBB108_39: # %else74
13954 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
13955 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
13956 ; RV64ZVE32F-NEXT: slli a2, a1, 37
13957 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
13958 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
13959 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_62
13960 ; RV64ZVE32F-NEXT: # %bb.40: # %else77
13961 ; RV64ZVE32F-NEXT: slli a2, a1, 36
13962 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_63
13963 ; RV64ZVE32F-NEXT: .LBB108_41: # %else80
13964 ; RV64ZVE32F-NEXT: slli a2, a1, 35
13965 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_64
13966 ; RV64ZVE32F-NEXT: .LBB108_42: # %else83
13967 ; RV64ZVE32F-NEXT: slli a2, a1, 34
13968 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_44
13969 ; RV64ZVE32F-NEXT: .LBB108_43: # %cond.load85
13970 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13971 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
13972 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
13973 ; RV64ZVE32F-NEXT: add a2, a0, a2
13974 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13975 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
13976 ; RV64ZVE32F-NEXT: vsetivli zero, 30, e8, m2, tu, ma
13977 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 29
13978 ; RV64ZVE32F-NEXT: .LBB108_44: # %else86
13979 ; RV64ZVE32F-NEXT: slli a2, a1, 33
13980 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
13981 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
13982 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_46
13983 ; RV64ZVE32F-NEXT: # %bb.45: # %cond.load88
13984 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
13985 ; RV64ZVE32F-NEXT: add a2, a0, a2
13986 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
13987 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
13988 ; RV64ZVE32F-NEXT: vsetivli zero, 31, e8, m2, tu, ma
13989 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 30
13990 ; RV64ZVE32F-NEXT: .LBB108_46: # %else89
13991 ; RV64ZVE32F-NEXT: lui a2, 524288
13992 ; RV64ZVE32F-NEXT: and a1, a1, a2
13993 ; RV64ZVE32F-NEXT: beqz a1, .LBB108_48
13994 ; RV64ZVE32F-NEXT: # %bb.47: # %cond.load91
13995 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13996 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
13997 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
13998 ; RV64ZVE32F-NEXT: add a0, a0, a1
13999 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
14000 ; RV64ZVE32F-NEXT: li a1, 32
14001 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
14002 ; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m2, ta, ma
14003 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 31
14004 ; RV64ZVE32F-NEXT: .LBB108_48: # %else92
14005 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
14006 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
14007 ; RV64ZVE32F-NEXT: ret
14008 ; RV64ZVE32F-NEXT: .LBB108_49: # %cond.load4
14009 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
14010 ; RV64ZVE32F-NEXT: add a2, a0, a2
14011 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14012 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2
14013 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma
14014 ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2
14015 ; RV64ZVE32F-NEXT: andi a2, a1, 8
14016 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_6
14017 ; RV64ZVE32F-NEXT: .LBB108_50: # %cond.load7
14018 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
14019 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
14020 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
14021 ; RV64ZVE32F-NEXT: add a2, a0, a2
14022 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14023 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
14024 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma
14025 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
14026 ; RV64ZVE32F-NEXT: andi a2, a1, 16
14027 ; RV64ZVE32F-NEXT: bnez a2, .LBB108_7
14028 ; RV64ZVE32F-NEXT: j .LBB108_8
14029 ; RV64ZVE32F-NEXT: .LBB108_51: # %cond.load16
14030 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
14031 ; RV64ZVE32F-NEXT: add a2, a0, a2
14032 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14033 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2
14034 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma
14035 ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6
14036 ; RV64ZVE32F-NEXT: andi a2, a1, 128
14037 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_12
14038 ; RV64ZVE32F-NEXT: .LBB108_52: # %cond.load19
14039 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
14040 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
14041 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
14042 ; RV64ZVE32F-NEXT: add a2, a0, a2
14043 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14044 ; RV64ZVE32F-NEXT: vmv.s.x v13, a2
14045 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
14046 ; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 7
14047 ; RV64ZVE32F-NEXT: andi a2, a1, 256
14048 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_13
14049 ; RV64ZVE32F-NEXT: .LBB108_53: # %cond.load22
14050 ; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
14051 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
14052 ; RV64ZVE32F-NEXT: add a2, a0, a2
14053 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14054 ; RV64ZVE32F-NEXT: vmv.s.x v13, a2
14055 ; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 8
14056 ; RV64ZVE32F-NEXT: andi a2, a1, 512
14057 ; RV64ZVE32F-NEXT: bnez a2, .LBB108_14
14058 ; RV64ZVE32F-NEXT: j .LBB108_15
14059 ; RV64ZVE32F-NEXT: .LBB108_54: # %cond.load40
14060 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
14061 ; RV64ZVE32F-NEXT: add a2, a0, a2
14062 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14063 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
14064 ; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma
14065 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 14
14066 ; RV64ZVE32F-NEXT: slli a2, a1, 48
14067 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_25
14068 ; RV64ZVE32F-NEXT: .LBB108_55: # %cond.load43
14069 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
14070 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
14071 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
14072 ; RV64ZVE32F-NEXT: add a2, a0, a2
14073 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14074 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14075 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, ma
14076 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 15
14077 ; RV64ZVE32F-NEXT: slli a2, a1, 47
14078 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_26
14079 ; RV64ZVE32F-NEXT: .LBB108_56: # %cond.load46
14080 ; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, ma
14081 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
14082 ; RV64ZVE32F-NEXT: add a2, a0, a2
14083 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14084 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
14085 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16
14086 ; RV64ZVE32F-NEXT: slli a2, a1, 46
14087 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_27
14088 ; RV64ZVE32F-NEXT: j .LBB108_28
14089 ; RV64ZVE32F-NEXT: .LBB108_57: # %cond.load52
14090 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
14091 ; RV64ZVE32F-NEXT: add a2, a0, a2
14092 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14093 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2
14094 ; RV64ZVE32F-NEXT: vsetivli zero, 19, e8, m2, tu, ma
14095 ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 18
14096 ; RV64ZVE32F-NEXT: slli a2, a1, 44
14097 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_30
14098 ; RV64ZVE32F-NEXT: .LBB108_58: # %cond.load55
14099 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
14100 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
14101 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
14102 ; RV64ZVE32F-NEXT: add a2, a0, a2
14103 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14104 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
14105 ; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, ma
14106 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 19
14107 ; RV64ZVE32F-NEXT: slli a2, a1, 43
14108 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_31
14109 ; RV64ZVE32F-NEXT: j .LBB108_32
14110 ; RV64ZVE32F-NEXT: .LBB108_59: # %cond.load64
14111 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
14112 ; RV64ZVE32F-NEXT: add a2, a0, a2
14113 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14114 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
14115 ; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, ma
14116 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22
14117 ; RV64ZVE32F-NEXT: slli a2, a1, 40
14118 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_36
14119 ; RV64ZVE32F-NEXT: .LBB108_60: # %cond.load67
14120 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
14121 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
14122 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
14123 ; RV64ZVE32F-NEXT: add a2, a0, a2
14124 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14125 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
14126 ; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, ma
14127 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23
14128 ; RV64ZVE32F-NEXT: slli a2, a1, 39
14129 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_37
14130 ; RV64ZVE32F-NEXT: .LBB108_61: # %cond.load70
14131 ; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, ma
14132 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
14133 ; RV64ZVE32F-NEXT: add a2, a0, a2
14134 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14135 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
14136 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24
14137 ; RV64ZVE32F-NEXT: slli a2, a1, 38
14138 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_38
14139 ; RV64ZVE32F-NEXT: j .LBB108_39
14140 ; RV64ZVE32F-NEXT: .LBB108_62: # %cond.load76
14141 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
14142 ; RV64ZVE32F-NEXT: add a2, a0, a2
14143 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14144 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
14145 ; RV64ZVE32F-NEXT: vsetivli zero, 27, e8, m2, tu, ma
14146 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 26
14147 ; RV64ZVE32F-NEXT: slli a2, a1, 36
14148 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_41
14149 ; RV64ZVE32F-NEXT: .LBB108_63: # %cond.load79
14150 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
14151 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
14152 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
14153 ; RV64ZVE32F-NEXT: add a2, a0, a2
14154 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14155 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
14156 ; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, ma
14157 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27
14158 ; RV64ZVE32F-NEXT: slli a2, a1, 35
14159 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_42
14160 ; RV64ZVE32F-NEXT: .LBB108_64: # %cond.load82
14161 ; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, ma
14162 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
14163 ; RV64ZVE32F-NEXT: add a2, a0, a2
14164 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
14165 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
14166 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28
14167 ; RV64ZVE32F-NEXT: slli a2, a1, 34
14168 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_43
14169 ; RV64ZVE32F-NEXT: j .LBB108_44
14170 %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
14171 %v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru)
14176 define <4 x i32> @mgather_broadcast_load_unmasked(ptr %base) {
14177 ; CHECK-LABEL: mgather_broadcast_load_unmasked:
14179 ; CHECK-NEXT: lw a0, 0(a0)
14180 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14181 ; CHECK-NEXT: vmv.v.x v8, a0
14183 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer
14184 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
14188 ; Same as previous, but use an explicit splat instead of splat-via-gep
14189 define <4 x i32> @mgather_broadcast_load_unmasked2(ptr %base) {
14190 ; CHECK-LABEL: mgather_broadcast_load_unmasked2:
14192 ; CHECK-NEXT: lw a0, 0(a0)
14193 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14194 ; CHECK-NEXT: vmv.v.x v8, a0
14196 %ptrhead = insertelement <4 x ptr> poison, ptr %base, i32 0
14197 %ptrs = shufflevector <4 x ptr> %ptrhead, <4 x ptr> poison, <4 x i32> zeroinitializer
14198 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
14202 define <4 x i32> @mgather_broadcast_load_masked(ptr %base, <4 x i1> %m) {
14203 ; CHECK-LABEL: mgather_broadcast_load_masked:
14205 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14206 ; CHECK-NEXT: vlse32.v v8, (a0), zero, v0.t
14208 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer
14209 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x i32> poison)
14213 define <4 x i32> @mgather_unit_stride_load(ptr %base) {
14214 ; CHECK-LABEL: mgather_unit_stride_load:
14216 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14217 ; CHECK-NEXT: vle32.v v8, (a0)
14219 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14220 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
14224 define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) {
14225 ; CHECK-LABEL: mgather_unit_stride_load_with_offset:
14227 ; CHECK-NEXT: addi a0, a0, 16
14228 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14229 ; CHECK-NEXT: vle32.v v8, (a0)
14231 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
14232 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
14236 define <4 x i32> @mgather_unit_stride_load_narrow_idx(ptr %base) {
14237 ; CHECK-LABEL: mgather_unit_stride_load_narrow_idx:
14239 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14240 ; CHECK-NEXT: vle32.v v8, (a0)
14242 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> <i8 0, i8 1, i8 2, i8 3>
14243 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
14247 define <4 x i32> @mgather_unit_stride_load_wide_idx(ptr %base) {
14248 ; CHECK-LABEL: mgather_unit_stride_load_wide_idx:
14250 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14251 ; CHECK-NEXT: vle32.v v8, (a0)
14253 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i128> <i128 0, i128 1, i128 2, i128 3>
14254 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
14258 ; This looks like a strided load (at i8), but isn't at index type.
14259 define <4 x i32> @mgather_narrow_edge_case(ptr %base) {
14260 ; RV32-LABEL: mgather_narrow_edge_case:
14262 ; RV32-NEXT: li a1, -512
14263 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14264 ; RV32-NEXT: vmv.v.i v0, 5
14265 ; RV32-NEXT: vmv.v.x v8, a1
14266 ; RV32-NEXT: vmerge.vim v8, v8, 0, v0
14267 ; RV32-NEXT: vluxei32.v v8, (a0), v8
14270 ; RV64V-LABEL: mgather_narrow_edge_case:
14272 ; RV64V-NEXT: li a1, -512
14273 ; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
14274 ; RV64V-NEXT: vmv.v.i v0, 5
14275 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
14276 ; RV64V-NEXT: vmv.v.x v8, a1
14277 ; RV64V-NEXT: vmerge.vim v10, v8, 0, v0
14278 ; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
14279 ; RV64V-NEXT: vluxei64.v v8, (a0), v10
14282 ; RV64ZVE32F-LABEL: mgather_narrow_edge_case:
14283 ; RV64ZVE32F: # %bb.0:
14284 ; RV64ZVE32F-NEXT: lw a1, -512(a0)
14285 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
14286 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14287 ; RV64ZVE32F-NEXT: vmv.v.i v0, 5
14288 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1
14289 ; RV64ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0
14290 ; RV64ZVE32F-NEXT: ret
14291 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> <i8 0, i8 128, i8 0, i8 128>
14292 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
14296 define <8 x i16> @mgather_strided_unaligned(ptr %base) {
14297 ; RV32-LABEL: mgather_strided_unaligned:
14299 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
14300 ; RV32-NEXT: vid.v v8
14301 ; RV32-NEXT: vsll.vi v8, v8, 2
14302 ; RV32-NEXT: vadd.vx v8, v8, a0
14303 ; RV32-NEXT: vmv.x.s a0, v8
14304 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
14305 ; RV32-NEXT: vslidedown.vi v10, v8, 1
14306 ; RV32-NEXT: vslidedown.vi v11, v8, 2
14307 ; RV32-NEXT: vmv.x.s a1, v10
14308 ; RV32-NEXT: vslidedown.vi v10, v8, 3
14309 ; RV32-NEXT: vmv.x.s a2, v11
14310 ; RV32-NEXT: vmv.x.s a3, v10
14311 ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
14312 ; RV32-NEXT: vslidedown.vi v10, v8, 4
14313 ; RV32-NEXT: vmv.x.s a4, v10
14314 ; RV32-NEXT: vslidedown.vi v10, v8, 5
14315 ; RV32-NEXT: vmv.x.s a5, v10
14316 ; RV32-NEXT: vslidedown.vi v10, v8, 6
14317 ; RV32-NEXT: vslidedown.vi v8, v8, 7
14318 ; RV32-NEXT: lbu a6, 0(a0)
14319 ; RV32-NEXT: lbu a0, 1(a0)
14320 ; RV32-NEXT: vmv.x.s a7, v10
14321 ; RV32-NEXT: vmv.x.s t0, v8
14322 ; RV32-NEXT: lbu t1, 0(a1)
14323 ; RV32-NEXT: lbu a1, 1(a1)
14324 ; RV32-NEXT: lbu t2, 0(a2)
14325 ; RV32-NEXT: lbu a2, 1(a2)
14326 ; RV32-NEXT: slli a0, a0, 8
14327 ; RV32-NEXT: or a0, a0, a6
14328 ; RV32-NEXT: lbu a6, 0(a3)
14329 ; RV32-NEXT: lbu a3, 1(a3)
14330 ; RV32-NEXT: slli a1, a1, 8
14331 ; RV32-NEXT: or a1, a1, t1
14332 ; RV32-NEXT: lbu t1, 0(a4)
14333 ; RV32-NEXT: lbu a4, 1(a4)
14334 ; RV32-NEXT: slli a2, a2, 8
14335 ; RV32-NEXT: or a2, a2, t2
14336 ; RV32-NEXT: lbu t2, 0(a5)
14337 ; RV32-NEXT: lbu a5, 1(a5)
14338 ; RV32-NEXT: slli a3, a3, 8
14339 ; RV32-NEXT: or a3, a3, a6
14340 ; RV32-NEXT: lbu a6, 0(a7)
14341 ; RV32-NEXT: lbu a7, 1(a7)
14342 ; RV32-NEXT: slli a4, a4, 8
14343 ; RV32-NEXT: or a4, a4, t1
14344 ; RV32-NEXT: lbu t1, 0(t0)
14345 ; RV32-NEXT: lbu t0, 1(t0)
14346 ; RV32-NEXT: slli a5, a5, 8
14347 ; RV32-NEXT: or a5, a5, t2
14348 ; RV32-NEXT: slli a7, a7, 8
14349 ; RV32-NEXT: or a6, a7, a6
14350 ; RV32-NEXT: slli t0, t0, 8
14351 ; RV32-NEXT: or a7, t0, t1
14352 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
14353 ; RV32-NEXT: vmv.v.x v8, a0
14354 ; RV32-NEXT: vslide1down.vx v8, v8, a1
14355 ; RV32-NEXT: vmv.v.x v9, a4
14356 ; RV32-NEXT: vslide1down.vx v8, v8, a2
14357 ; RV32-NEXT: vslide1down.vx v9, v9, a5
14358 ; RV32-NEXT: vslide1down.vx v10, v8, a3
14359 ; RV32-NEXT: vslide1down.vx v8, v9, a6
14360 ; RV32-NEXT: vmv.v.i v0, 15
14361 ; RV32-NEXT: vslide1down.vx v8, v8, a7
14362 ; RV32-NEXT: vslidedown.vi v8, v10, 4, v0.t
14365 ; RV64V-LABEL: mgather_strided_unaligned:
14367 ; RV64V-NEXT: addi sp, sp, -128
14368 ; RV64V-NEXT: .cfi_def_cfa_offset 128
14369 ; RV64V-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
14370 ; RV64V-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
14371 ; RV64V-NEXT: .cfi_offset ra, -8
14372 ; RV64V-NEXT: .cfi_offset s0, -16
14373 ; RV64V-NEXT: addi s0, sp, 128
14374 ; RV64V-NEXT: .cfi_def_cfa s0, 0
14375 ; RV64V-NEXT: andi sp, sp, -64
14376 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
14377 ; RV64V-NEXT: vid.v v8
14378 ; RV64V-NEXT: mv a1, sp
14379 ; RV64V-NEXT: vsll.vi v8, v8, 2
14380 ; RV64V-NEXT: vadd.vx v8, v8, a0
14381 ; RV64V-NEXT: vmv.x.s a0, v8
14382 ; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
14383 ; RV64V-NEXT: vslidedown.vi v12, v8, 1
14384 ; RV64V-NEXT: vmv.x.s a2, v12
14385 ; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
14386 ; RV64V-NEXT: vslidedown.vi v12, v8, 2
14387 ; RV64V-NEXT: vmv.x.s a3, v12
14388 ; RV64V-NEXT: vslidedown.vi v12, v8, 3
14389 ; RV64V-NEXT: lbu a4, 0(a0)
14390 ; RV64V-NEXT: lbu a0, 1(a0)
14391 ; RV64V-NEXT: vmv.x.s a5, v12
14392 ; RV64V-NEXT: lbu a6, 0(a2)
14393 ; RV64V-NEXT: lbu a2, 1(a2)
14394 ; RV64V-NEXT: lbu a7, 0(a3)
14395 ; RV64V-NEXT: lbu a3, 1(a3)
14396 ; RV64V-NEXT: lbu t0, 0(a5)
14397 ; RV64V-NEXT: lbu a5, 1(a5)
14398 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
14399 ; RV64V-NEXT: vse64.v v8, (a1)
14400 ; RV64V-NEXT: slli a0, a0, 8
14401 ; RV64V-NEXT: or a0, a0, a4
14402 ; RV64V-NEXT: slli a2, a2, 8
14403 ; RV64V-NEXT: slli a3, a3, 8
14404 ; RV64V-NEXT: or a1, a2, a6
14405 ; RV64V-NEXT: or a2, a3, a7
14406 ; RV64V-NEXT: ld a3, 32(sp)
14407 ; RV64V-NEXT: ld a4, 40(sp)
14408 ; RV64V-NEXT: ld a6, 48(sp)
14409 ; RV64V-NEXT: ld a7, 56(sp)
14410 ; RV64V-NEXT: slli a5, a5, 8
14411 ; RV64V-NEXT: or a5, a5, t0
14412 ; RV64V-NEXT: lbu t0, 0(a3)
14413 ; RV64V-NEXT: lbu a3, 1(a3)
14414 ; RV64V-NEXT: vmv.v.x v8, a0
14415 ; RV64V-NEXT: lbu a0, 0(a4)
14416 ; RV64V-NEXT: lbu a4, 1(a4)
14417 ; RV64V-NEXT: vslide1down.vx v8, v8, a1
14418 ; RV64V-NEXT: lbu a1, 0(a6)
14419 ; RV64V-NEXT: lbu a6, 1(a6)
14420 ; RV64V-NEXT: vslide1down.vx v8, v8, a2
14421 ; RV64V-NEXT: lbu a2, 0(a7)
14422 ; RV64V-NEXT: lbu a7, 1(a7)
14423 ; RV64V-NEXT: vslide1down.vx v9, v8, a5
14424 ; RV64V-NEXT: slli a3, a3, 8
14425 ; RV64V-NEXT: slli a4, a4, 8
14426 ; RV64V-NEXT: slli a6, a6, 8
14427 ; RV64V-NEXT: slli a7, a7, 8
14428 ; RV64V-NEXT: or a3, a3, t0
14429 ; RV64V-NEXT: or a0, a4, a0
14430 ; RV64V-NEXT: or a1, a6, a1
14431 ; RV64V-NEXT: or a2, a7, a2
14432 ; RV64V-NEXT: vmv.v.x v8, a3
14433 ; RV64V-NEXT: vslide1down.vx v8, v8, a0
14434 ; RV64V-NEXT: vslide1down.vx v8, v8, a1
14435 ; RV64V-NEXT: vmv.v.i v0, 15
14436 ; RV64V-NEXT: vslide1down.vx v8, v8, a2
14437 ; RV64V-NEXT: vslidedown.vi v8, v9, 4, v0.t
14438 ; RV64V-NEXT: addi sp, s0, -128
14439 ; RV64V-NEXT: .cfi_def_cfa sp, 128
14440 ; RV64V-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
14441 ; RV64V-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
14442 ; RV64V-NEXT: .cfi_restore ra
14443 ; RV64V-NEXT: .cfi_restore s0
14444 ; RV64V-NEXT: addi sp, sp, 128
14445 ; RV64V-NEXT: .cfi_def_cfa_offset 0
14448 ; RV64ZVE32F-LABEL: mgather_strided_unaligned:
14449 ; RV64ZVE32F: # %bb.0:
14450 ; RV64ZVE32F-NEXT: lbu a1, 0(a0)
14451 ; RV64ZVE32F-NEXT: lbu a2, 1(a0)
14452 ; RV64ZVE32F-NEXT: lbu a3, 4(a0)
14453 ; RV64ZVE32F-NEXT: lbu a4, 5(a0)
14454 ; RV64ZVE32F-NEXT: lbu a5, 8(a0)
14455 ; RV64ZVE32F-NEXT: lbu a6, 9(a0)
14456 ; RV64ZVE32F-NEXT: lbu a7, 12(a0)
14457 ; RV64ZVE32F-NEXT: lbu t0, 13(a0)
14458 ; RV64ZVE32F-NEXT: slli a2, a2, 8
14459 ; RV64ZVE32F-NEXT: slli a4, a4, 8
14460 ; RV64ZVE32F-NEXT: or a1, a2, a1
14461 ; RV64ZVE32F-NEXT: or a3, a4, a3
14462 ; RV64ZVE32F-NEXT: lbu a2, 16(a0)
14463 ; RV64ZVE32F-NEXT: lbu a4, 17(a0)
14464 ; RV64ZVE32F-NEXT: lbu t1, 20(a0)
14465 ; RV64ZVE32F-NEXT: lbu t2, 21(a0)
14466 ; RV64ZVE32F-NEXT: slli a6, a6, 8
14467 ; RV64ZVE32F-NEXT: or a5, a6, a5
14468 ; RV64ZVE32F-NEXT: slli t0, t0, 8
14469 ; RV64ZVE32F-NEXT: slli a4, a4, 8
14470 ; RV64ZVE32F-NEXT: slli t2, t2, 8
14471 ; RV64ZVE32F-NEXT: or a6, t0, a7
14472 ; RV64ZVE32F-NEXT: or a2, a4, a2
14473 ; RV64ZVE32F-NEXT: lbu a4, 24(a0)
14474 ; RV64ZVE32F-NEXT: lbu a7, 25(a0)
14475 ; RV64ZVE32F-NEXT: or t0, t2, t1
14476 ; RV64ZVE32F-NEXT: lbu t1, 28(a0)
14477 ; RV64ZVE32F-NEXT: lbu a0, 29(a0)
14478 ; RV64ZVE32F-NEXT: slli a7, a7, 8
14479 ; RV64ZVE32F-NEXT: or a4, a7, a4
14480 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
14481 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15
14482 ; RV64ZVE32F-NEXT: slli a0, a0, 8
14483 ; RV64ZVE32F-NEXT: or a0, a0, t1
14484 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1
14485 ; RV64ZVE32F-NEXT: vmv.v.x v9, a2
14486 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
14487 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, t0
14488 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
14489 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4
14490 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a6
14491 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
14492 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
14493 ; RV64ZVE32F-NEXT: ret
14494 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
14495 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> splat (i1 true), <8 x i16> poison)
14499 ; TODO: Recognize as strided load with SEW=32
14500 define <8 x i16> @mgather_strided_2xSEW(ptr %base) {
14501 ; RV32-LABEL: mgather_strided_2xSEW:
14503 ; RV32-NEXT: li a1, 8
14504 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14505 ; RV32-NEXT: vlse32.v v8, (a0), a1
14508 ; RV64V-LABEL: mgather_strided_2xSEW:
14510 ; RV64V-NEXT: li a1, 8
14511 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14512 ; RV64V-NEXT: vlse32.v v8, (a0), a1
14515 ; RV64ZVE32F-LABEL: mgather_strided_2xSEW:
14516 ; RV64ZVE32F: # %bb.0:
14517 ; RV64ZVE32F-NEXT: lh a1, 0(a0)
14518 ; RV64ZVE32F-NEXT: lh a2, 2(a0)
14519 ; RV64ZVE32F-NEXT: lh a3, 8(a0)
14520 ; RV64ZVE32F-NEXT: lh a4, 10(a0)
14521 ; RV64ZVE32F-NEXT: lh a5, 16(a0)
14522 ; RV64ZVE32F-NEXT: lh a6, 18(a0)
14523 ; RV64ZVE32F-NEXT: lh a7, 24(a0)
14524 ; RV64ZVE32F-NEXT: lh a0, 26(a0)
14525 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
14526 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15
14527 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1
14528 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5
14529 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
14530 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
14531 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
14532 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
14533 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
14534 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
14535 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
14536 ; RV64ZVE32F-NEXT: ret
14537 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
14538 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14542 ; TODO: Recognize as strided load with SEW=32
14543 define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) {
14544 ; RV32-LABEL: mgather_strided_2xSEW_with_offset:
14546 ; RV32-NEXT: addi a0, a0, 4
14547 ; RV32-NEXT: li a1, 8
14548 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14549 ; RV32-NEXT: vlse32.v v8, (a0), a1
14552 ; RV64V-LABEL: mgather_strided_2xSEW_with_offset:
14554 ; RV64V-NEXT: addi a0, a0, 4
14555 ; RV64V-NEXT: li a1, 8
14556 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14557 ; RV64V-NEXT: vlse32.v v8, (a0), a1
14560 ; RV64ZVE32F-LABEL: mgather_strided_2xSEW_with_offset:
14561 ; RV64ZVE32F: # %bb.0:
14562 ; RV64ZVE32F-NEXT: lh a1, 4(a0)
14563 ; RV64ZVE32F-NEXT: lh a2, 6(a0)
14564 ; RV64ZVE32F-NEXT: lh a3, 12(a0)
14565 ; RV64ZVE32F-NEXT: lh a4, 14(a0)
14566 ; RV64ZVE32F-NEXT: lh a5, 20(a0)
14567 ; RV64ZVE32F-NEXT: lh a6, 22(a0)
14568 ; RV64ZVE32F-NEXT: lh a7, 28(a0)
14569 ; RV64ZVE32F-NEXT: lh a0, 30(a0)
14570 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
14571 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15
14572 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1
14573 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5
14574 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
14575 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
14576 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
14577 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
14578 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
14579 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
14580 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
14581 ; RV64ZVE32F-NEXT: ret
14582 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 2, i64 3, i64 6, i64 7, i64 10, i64 11, i64 14, i64 15>
14583 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14587 ; TODO: Recognize as strided load with SEW=32
14588 define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) {
14589 ; RV32-LABEL: mgather_reverse_unit_strided_2xSEW:
14591 ; RV32-NEXT: addi a0, a0, 28
14592 ; RV32-NEXT: li a1, -4
14593 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14594 ; RV32-NEXT: vlse32.v v8, (a0), a1
14597 ; RV64V-LABEL: mgather_reverse_unit_strided_2xSEW:
14599 ; RV64V-NEXT: addi a0, a0, 28
14600 ; RV64V-NEXT: li a1, -4
14601 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14602 ; RV64V-NEXT: vlse32.v v8, (a0), a1
14605 ; RV64ZVE32F-LABEL: mgather_reverse_unit_strided_2xSEW:
14606 ; RV64ZVE32F: # %bb.0:
14607 ; RV64ZVE32F-NEXT: lh a1, 24(a0)
14608 ; RV64ZVE32F-NEXT: lh a2, 26(a0)
14609 ; RV64ZVE32F-NEXT: lh a3, 28(a0)
14610 ; RV64ZVE32F-NEXT: lh a4, 30(a0)
14611 ; RV64ZVE32F-NEXT: lh a5, 16(a0)
14612 ; RV64ZVE32F-NEXT: lh a6, 18(a0)
14613 ; RV64ZVE32F-NEXT: lh a7, 20(a0)
14614 ; RV64ZVE32F-NEXT: lh a0, 22(a0)
14615 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
14616 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15
14617 ; RV64ZVE32F-NEXT: vmv.v.x v8, a3
14618 ; RV64ZVE32F-NEXT: vmv.v.x v9, a7
14619 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4
14620 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a0
14621 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
14622 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5
14623 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a2
14624 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6
14625 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
14626 ; RV64ZVE32F-NEXT: ret
14627 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 12, i64 13, i64 10, i64 11, i64 8, i64 9>
14628 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14632 ; TODO: Recognize as strided load with SEW=32
14633 define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) {
14634 ; RV32-LABEL: mgather_reverse_strided_2xSEW:
14636 ; RV32-NEXT: addi a0, a0, 28
14637 ; RV32-NEXT: li a1, -8
14638 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14639 ; RV32-NEXT: vlse32.v v8, (a0), a1
14642 ; RV64V-LABEL: mgather_reverse_strided_2xSEW:
14644 ; RV64V-NEXT: addi a0, a0, 28
14645 ; RV64V-NEXT: li a1, -8
14646 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14647 ; RV64V-NEXT: vlse32.v v8, (a0), a1
14650 ; RV64ZVE32F-LABEL: mgather_reverse_strided_2xSEW:
14651 ; RV64ZVE32F: # %bb.0:
14652 ; RV64ZVE32F-NEXT: lh a1, 20(a0)
14653 ; RV64ZVE32F-NEXT: lh a2, 22(a0)
14654 ; RV64ZVE32F-NEXT: lh a3, 28(a0)
14655 ; RV64ZVE32F-NEXT: lh a4, 30(a0)
14656 ; RV64ZVE32F-NEXT: lh a5, 4(a0)
14657 ; RV64ZVE32F-NEXT: lh a6, 6(a0)
14658 ; RV64ZVE32F-NEXT: lh a7, 12(a0)
14659 ; RV64ZVE32F-NEXT: lh a0, 14(a0)
14660 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
14661 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15
14662 ; RV64ZVE32F-NEXT: vmv.v.x v8, a3
14663 ; RV64ZVE32F-NEXT: vmv.v.x v9, a7
14664 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4
14665 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a0
14666 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
14667 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5
14668 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a2
14669 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6
14670 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
14671 ; RV64ZVE32F-NEXT: ret
14672 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 10, i64 11, i64 6, i64 7, i64 2, i64 3>
14673 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14677 define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
14678 ; RV32-LABEL: mgather_gather_2xSEW:
14680 ; RV32-NEXT: lui a1, 16513
14681 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14682 ; RV32-NEXT: vmv.s.x v9, a1
14683 ; RV32-NEXT: vluxei8.v v8, (a0), v9
14686 ; RV64V-LABEL: mgather_gather_2xSEW:
14688 ; RV64V-NEXT: lui a1, 16513
14689 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14690 ; RV64V-NEXT: vmv.s.x v9, a1
14691 ; RV64V-NEXT: vluxei8.v v8, (a0), v9
14694 ; RV64ZVE32F-LABEL: mgather_gather_2xSEW:
14695 ; RV64ZVE32F: # %bb.0:
14696 ; RV64ZVE32F-NEXT: lh a1, 8(a0)
14697 ; RV64ZVE32F-NEXT: lh a2, 10(a0)
14698 ; RV64ZVE32F-NEXT: lh a3, 16(a0)
14699 ; RV64ZVE32F-NEXT: lh a4, 18(a0)
14700 ; RV64ZVE32F-NEXT: lh a5, 0(a0)
14701 ; RV64ZVE32F-NEXT: lh a6, 2(a0)
14702 ; RV64ZVE32F-NEXT: lh a7, 4(a0)
14703 ; RV64ZVE32F-NEXT: lh a0, 6(a0)
14704 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
14705 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15
14706 ; RV64ZVE32F-NEXT: vmv.v.x v8, a5
14707 ; RV64ZVE32F-NEXT: vmv.v.x v9, a1
14708 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
14709 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
14710 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
14711 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
14712 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
14713 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
14714 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
14715 ; RV64ZVE32F-NEXT: ret
14716 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 2, i32 3>
14717 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14721 ; Base pointer isn't sufficiently aligned to form gather with e32
14722 define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
14723 ; RV32-LABEL: mgather_gather_2xSEW_unaligned:
14725 ; RV32-NEXT: lui a1, %hi(.LCPI123_0)
14726 ; RV32-NEXT: addi a1, a1, %lo(.LCPI123_0)
14727 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14728 ; RV32-NEXT: vle8.v v9, (a1)
14729 ; RV32-NEXT: vluxei8.v v8, (a0), v9
14732 ; RV64V-LABEL: mgather_gather_2xSEW_unaligned:
14734 ; RV64V-NEXT: lui a1, %hi(.LCPI123_0)
14735 ; RV64V-NEXT: addi a1, a1, %lo(.LCPI123_0)
14736 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14737 ; RV64V-NEXT: vle8.v v9, (a1)
14738 ; RV64V-NEXT: vluxei8.v v8, (a0), v9
14741 ; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned:
14742 ; RV64ZVE32F: # %bb.0:
14743 ; RV64ZVE32F-NEXT: lh a1, 8(a0)
14744 ; RV64ZVE32F-NEXT: lh a2, 10(a0)
14745 ; RV64ZVE32F-NEXT: lh a3, 18(a0)
14746 ; RV64ZVE32F-NEXT: lh a4, 20(a0)
14747 ; RV64ZVE32F-NEXT: lh a5, 0(a0)
14748 ; RV64ZVE32F-NEXT: lh a6, 2(a0)
14749 ; RV64ZVE32F-NEXT: lh a7, 4(a0)
14750 ; RV64ZVE32F-NEXT: lh a0, 6(a0)
14751 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
14752 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15
14753 ; RV64ZVE32F-NEXT: vmv.v.x v8, a5
14754 ; RV64ZVE32F-NEXT: vmv.v.x v9, a1
14755 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
14756 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
14757 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
14758 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
14759 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
14760 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
14761 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
14762 ; RV64ZVE32F-NEXT: ret
14763 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
14764 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true), <8 x i16> poison)
14768 ; Despite sufficient starting alignment, the index values aren't properly
14770 define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
14771 ; RV32-LABEL: mgather_gather_2xSEW_unaligned2:
14773 ; RV32-NEXT: lui a1, %hi(.LCPI124_0)
14774 ; RV32-NEXT: addi a1, a1, %lo(.LCPI124_0)
14775 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14776 ; RV32-NEXT: vle8.v v9, (a1)
14777 ; RV32-NEXT: vluxei8.v v8, (a0), v9
14780 ; RV64V-LABEL: mgather_gather_2xSEW_unaligned2:
14782 ; RV64V-NEXT: lui a1, %hi(.LCPI124_0)
14783 ; RV64V-NEXT: addi a1, a1, %lo(.LCPI124_0)
14784 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14785 ; RV64V-NEXT: vle8.v v9, (a1)
14786 ; RV64V-NEXT: vluxei8.v v8, (a0), v9
14789 ; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned2:
14790 ; RV64ZVE32F: # %bb.0:
14791 ; RV64ZVE32F-NEXT: lh a1, 2(a0)
14792 ; RV64ZVE32F-NEXT: lh a2, 4(a0)
14793 ; RV64ZVE32F-NEXT: lh a3, 6(a0)
14794 ; RV64ZVE32F-NEXT: lh a4, 8(a0)
14795 ; RV64ZVE32F-NEXT: lh a5, 10(a0)
14796 ; RV64ZVE32F-NEXT: lh a6, 18(a0)
14797 ; RV64ZVE32F-NEXT: lh a0, 20(a0)
14798 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
14799 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15
14800 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1
14801 ; RV64ZVE32F-NEXT: vmv.v.x v9, a4
14802 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
14803 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5
14804 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
14805 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
14806 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a0
14807 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a3
14808 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
14809 ; RV64ZVE32F-NEXT: ret
14810 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
14811 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14815 define <8 x i16> @mgather_gather_4xSEW(ptr %base) {
14816 ; RV32V-LABEL: mgather_gather_4xSEW:
14818 ; RV32V-NEXT: li a1, 16
14819 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
14820 ; RV32V-NEXT: vlse64.v v8, (a0), a1
14823 ; RV64V-LABEL: mgather_gather_4xSEW:
14825 ; RV64V-NEXT: li a1, 16
14826 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
14827 ; RV64V-NEXT: vlse64.v v8, (a0), a1
14830 ; RV32ZVE32F-LABEL: mgather_gather_4xSEW:
14831 ; RV32ZVE32F: # %bb.0:
14832 ; RV32ZVE32F-NEXT: lui a1, 82176
14833 ; RV32ZVE32F-NEXT: addi a1, a1, 1024
14834 ; RV32ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14835 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
14836 ; RV32ZVE32F-NEXT: vluxei8.v v8, (a0), v9
14837 ; RV32ZVE32F-NEXT: ret
14839 ; RV64ZVE32F-LABEL: mgather_gather_4xSEW:
14840 ; RV64ZVE32F: # %bb.0:
14841 ; RV64ZVE32F-NEXT: lh a1, 0(a0)
14842 ; RV64ZVE32F-NEXT: lh a2, 2(a0)
14843 ; RV64ZVE32F-NEXT: lh a3, 4(a0)
14844 ; RV64ZVE32F-NEXT: lh a4, 6(a0)
14845 ; RV64ZVE32F-NEXT: lh a5, 16(a0)
14846 ; RV64ZVE32F-NEXT: lh a6, 18(a0)
14847 ; RV64ZVE32F-NEXT: lh a7, 20(a0)
14848 ; RV64ZVE32F-NEXT: lh a0, 22(a0)
14849 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
14850 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15
14851 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1
14852 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5
14853 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
14854 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
14855 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
14856 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
14857 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
14858 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
14859 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
14860 ; RV64ZVE32F-NEXT: ret
14861 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
14862 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> splat (i1 true), <8 x i16> poison)
14866 ; This is a case where we'd be able to do 4xSEW if we had proper alignment
14867 ; but we only have sufficient alignment for 2xSEW.
14868 define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) {
14869 ; RV32-LABEL: mgather_gather_4xSEW_partial_align:
14871 ; RV32-NEXT: lui a1, 82176
14872 ; RV32-NEXT: addi a1, a1, 1024
14873 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14874 ; RV32-NEXT: vmv.s.x v9, a1
14875 ; RV32-NEXT: vluxei8.v v8, (a0), v9
14878 ; RV64V-LABEL: mgather_gather_4xSEW_partial_align:
14880 ; RV64V-NEXT: lui a1, 82176
14881 ; RV64V-NEXT: addi a1, a1, 1024
14882 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14883 ; RV64V-NEXT: vmv.s.x v9, a1
14884 ; RV64V-NEXT: vluxei8.v v8, (a0), v9
14887 ; RV64ZVE32F-LABEL: mgather_gather_4xSEW_partial_align:
14888 ; RV64ZVE32F: # %bb.0:
14889 ; RV64ZVE32F-NEXT: lh a1, 0(a0)
14890 ; RV64ZVE32F-NEXT: lh a2, 2(a0)
14891 ; RV64ZVE32F-NEXT: lh a3, 4(a0)
14892 ; RV64ZVE32F-NEXT: lh a4, 6(a0)
14893 ; RV64ZVE32F-NEXT: lh a5, 16(a0)
14894 ; RV64ZVE32F-NEXT: lh a6, 18(a0)
14895 ; RV64ZVE32F-NEXT: lh a7, 20(a0)
14896 ; RV64ZVE32F-NEXT: lh a0, 22(a0)
14897 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
14898 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15
14899 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1
14900 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5
14901 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
14902 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
14903 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
14904 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
14905 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
14906 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
14907 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
14908 ; RV64ZVE32F-NEXT: ret
14909 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
14910 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14914 define <8 x i16> @mgather_shuffle_reverse(ptr %base) {
14915 ; CHECK-LABEL: mgather_shuffle_reverse:
14917 ; CHECK-NEXT: addi a0, a0, 14
14918 ; CHECK-NEXT: li a1, -2
14919 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14920 ; CHECK-NEXT: vlse16.v v8, (a0), a1
14922 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>
14923 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14927 define <8 x i16> @mgather_shuffle_rotate(ptr %base) {
14928 ; RV32-LABEL: mgather_shuffle_rotate:
14930 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14931 ; RV32-NEXT: vle16.v v9, (a0)
14932 ; RV32-NEXT: vslidedown.vi v8, v9, 4
14933 ; RV32-NEXT: vslideup.vi v8, v9, 4
14936 ; RV64V-LABEL: mgather_shuffle_rotate:
14938 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14939 ; RV64V-NEXT: vle16.v v9, (a0)
14940 ; RV64V-NEXT: vslidedown.vi v8, v9, 4
14941 ; RV64V-NEXT: vslideup.vi v8, v9, 4
14944 ; RV64ZVE32F-LABEL: mgather_shuffle_rotate:
14945 ; RV64ZVE32F: # %bb.0:
14946 ; RV64ZVE32F-NEXT: lh a1, 8(a0)
14947 ; RV64ZVE32F-NEXT: lh a2, 10(a0)
14948 ; RV64ZVE32F-NEXT: lh a3, 12(a0)
14949 ; RV64ZVE32F-NEXT: lh a4, 14(a0)
14950 ; RV64ZVE32F-NEXT: lh a5, 0(a0)
14951 ; RV64ZVE32F-NEXT: lh a6, 2(a0)
14952 ; RV64ZVE32F-NEXT: lh a7, 4(a0)
14953 ; RV64ZVE32F-NEXT: lh a0, 6(a0)
14954 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
14955 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15
14956 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1
14957 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5
14958 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
14959 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
14960 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
14961 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
14962 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
14963 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
14964 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
14965 ; RV64ZVE32F-NEXT: ret
14966 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 4, i64 5, i64 6, i64 7, i64 0, i64 1, i64 2, i64 3>
14967 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
14971 define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
14972 ; RV32-LABEL: mgather_shuffle_vrgather:
14974 ; RV32-NEXT: lui a1, %hi(.LCPI129_0)
14975 ; RV32-NEXT: addi a1, a1, %lo(.LCPI129_0)
14976 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14977 ; RV32-NEXT: vle16.v v9, (a1)
14978 ; RV32-NEXT: vle16.v v10, (a0)
14979 ; RV32-NEXT: vrgather.vv v8, v10, v9
14982 ; RV64V-LABEL: mgather_shuffle_vrgather:
14984 ; RV64V-NEXT: lui a1, %hi(.LCPI129_0)
14985 ; RV64V-NEXT: addi a1, a1, %lo(.LCPI129_0)
14986 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14987 ; RV64V-NEXT: vle16.v v9, (a1)
14988 ; RV64V-NEXT: vle16.v v10, (a0)
14989 ; RV64V-NEXT: vrgather.vv v8, v10, v9
14992 ; RV64ZVE32F-LABEL: mgather_shuffle_vrgather:
14993 ; RV64ZVE32F: # %bb.0:
14994 ; RV64ZVE32F-NEXT: lh a1, 0(a0)
14995 ; RV64ZVE32F-NEXT: lh a2, 2(a0)
14996 ; RV64ZVE32F-NEXT: lh a3, 4(a0)
14997 ; RV64ZVE32F-NEXT: lh a4, 6(a0)
14998 ; RV64ZVE32F-NEXT: lh a5, 8(a0)
14999 ; RV64ZVE32F-NEXT: lh a6, 10(a0)
15000 ; RV64ZVE32F-NEXT: lh a7, 12(a0)
15001 ; RV64ZVE32F-NEXT: lh a0, 14(a0)
15002 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
15003 ; RV64ZVE32F-NEXT: vmv.v.i v0, 15
15004 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1
15005 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5
15006 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
15007 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
15008 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4
15009 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
15010 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a2
15011 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
15012 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
15013 ; RV64ZVE32F-NEXT: ret
15014 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 0, i64 2, i64 3, i64 1, i64 4, i64 5, i64 6, i64 7>
15015 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
15019 ; v32i64 is not a legal type, so make sure we don't try to combine the mgather
15020 ; to a vlse intrinsic until it is legalized and split.
15021 define <32 x i64> @mgather_strided_split(ptr %base) {
15022 ; RV32V-LABEL: mgather_strided_split:
15024 ; RV32V-NEXT: li a1, 16
15025 ; RV32V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
15026 ; RV32V-NEXT: vlse64.v v8, (a0), a1
15027 ; RV32V-NEXT: addi a0, a0, 256
15028 ; RV32V-NEXT: vlse64.v v16, (a0), a1
15031 ; RV64V-LABEL: mgather_strided_split:
15033 ; RV64V-NEXT: li a1, 16
15034 ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
15035 ; RV64V-NEXT: vlse64.v v8, (a0), a1
15036 ; RV64V-NEXT: addi a0, a0, 256
15037 ; RV64V-NEXT: vlse64.v v16, (a0), a1
15040 ; RV32ZVE32F-LABEL: mgather_strided_split:
15041 ; RV32ZVE32F: # %bb.0:
15042 ; RV32ZVE32F-NEXT: addi sp, sp, -512
15043 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 512
15044 ; RV32ZVE32F-NEXT: sw ra, 508(sp) # 4-byte Folded Spill
15045 ; RV32ZVE32F-NEXT: sw s0, 504(sp) # 4-byte Folded Spill
15046 ; RV32ZVE32F-NEXT: sw s2, 500(sp) # 4-byte Folded Spill
15047 ; RV32ZVE32F-NEXT: sw s3, 496(sp) # 4-byte Folded Spill
15048 ; RV32ZVE32F-NEXT: sw s4, 492(sp) # 4-byte Folded Spill
15049 ; RV32ZVE32F-NEXT: sw s5, 488(sp) # 4-byte Folded Spill
15050 ; RV32ZVE32F-NEXT: sw s6, 484(sp) # 4-byte Folded Spill
15051 ; RV32ZVE32F-NEXT: sw s7, 480(sp) # 4-byte Folded Spill
15052 ; RV32ZVE32F-NEXT: sw s8, 476(sp) # 4-byte Folded Spill
15053 ; RV32ZVE32F-NEXT: sw s9, 472(sp) # 4-byte Folded Spill
15054 ; RV32ZVE32F-NEXT: sw s10, 468(sp) # 4-byte Folded Spill
15055 ; RV32ZVE32F-NEXT: sw s11, 464(sp) # 4-byte Folded Spill
15056 ; RV32ZVE32F-NEXT: .cfi_offset ra, -4
15057 ; RV32ZVE32F-NEXT: .cfi_offset s0, -8
15058 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
15059 ; RV32ZVE32F-NEXT: .cfi_offset s3, -16
15060 ; RV32ZVE32F-NEXT: .cfi_offset s4, -20
15061 ; RV32ZVE32F-NEXT: .cfi_offset s5, -24
15062 ; RV32ZVE32F-NEXT: .cfi_offset s6, -28
15063 ; RV32ZVE32F-NEXT: .cfi_offset s7, -32
15064 ; RV32ZVE32F-NEXT: .cfi_offset s8, -36
15065 ; RV32ZVE32F-NEXT: .cfi_offset s9, -40
15066 ; RV32ZVE32F-NEXT: .cfi_offset s10, -44
15067 ; RV32ZVE32F-NEXT: .cfi_offset s11, -48
15068 ; RV32ZVE32F-NEXT: addi s0, sp, 512
15069 ; RV32ZVE32F-NEXT: .cfi_def_cfa s0, 0
15070 ; RV32ZVE32F-NEXT: andi sp, sp, -128
15071 ; RV32ZVE32F-NEXT: li a2, 32
15072 ; RV32ZVE32F-NEXT: lw a3, 0(a1)
15073 ; RV32ZVE32F-NEXT: sw a3, 236(sp) # 4-byte Folded Spill
15074 ; RV32ZVE32F-NEXT: lw a3, 4(a1)
15075 ; RV32ZVE32F-NEXT: sw a3, 232(sp) # 4-byte Folded Spill
15076 ; RV32ZVE32F-NEXT: addi a3, sp, 256
15077 ; RV32ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma
15078 ; RV32ZVE32F-NEXT: vid.v v8
15079 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 4
15080 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
15081 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
15082 ; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 1
15083 ; RV32ZVE32F-NEXT: vslidedown.vi v17, v8, 2
15084 ; RV32ZVE32F-NEXT: vmv.x.s a1, v16
15085 ; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 3
15086 ; RV32ZVE32F-NEXT: vmv.x.s a4, v17
15087 ; RV32ZVE32F-NEXT: vmv.x.s a5, v16
15088 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
15089 ; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 4
15090 ; RV32ZVE32F-NEXT: vmv.x.s a6, v16
15091 ; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 5
15092 ; RV32ZVE32F-NEXT: vmv.x.s a7, v16
15093 ; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 6
15094 ; RV32ZVE32F-NEXT: vmv.x.s t0, v16
15095 ; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 7
15096 ; RV32ZVE32F-NEXT: vmv.x.s t1, v16
15097 ; RV32ZVE32F-NEXT: lw t2, 0(a1)
15098 ; RV32ZVE32F-NEXT: sw t2, 196(sp) # 4-byte Folded Spill
15099 ; RV32ZVE32F-NEXT: lw a1, 4(a1)
15100 ; RV32ZVE32F-NEXT: sw a1, 192(sp) # 4-byte Folded Spill
15101 ; RV32ZVE32F-NEXT: lw ra, 0(a4)
15102 ; RV32ZVE32F-NEXT: lw a1, 4(a4)
15103 ; RV32ZVE32F-NEXT: sw a1, 172(sp) # 4-byte Folded Spill
15104 ; RV32ZVE32F-NEXT: lw a1, 0(a5)
15105 ; RV32ZVE32F-NEXT: sw a1, 168(sp) # 4-byte Folded Spill
15106 ; RV32ZVE32F-NEXT: lw a1, 4(a5)
15107 ; RV32ZVE32F-NEXT: sw a1, 164(sp) # 4-byte Folded Spill
15108 ; RV32ZVE32F-NEXT: lw a1, 0(a6)
15109 ; RV32ZVE32F-NEXT: sw a1, 252(sp) # 4-byte Folded Spill
15110 ; RV32ZVE32F-NEXT: lw a1, 4(a6)
15111 ; RV32ZVE32F-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
15112 ; RV32ZVE32F-NEXT: lw a1, 0(a7)
15113 ; RV32ZVE32F-NEXT: sw a1, 244(sp) # 4-byte Folded Spill
15114 ; RV32ZVE32F-NEXT: lw a1, 4(a7)
15115 ; RV32ZVE32F-NEXT: sw a1, 240(sp) # 4-byte Folded Spill
15116 ; RV32ZVE32F-NEXT: lw a1, 0(t0)
15117 ; RV32ZVE32F-NEXT: sw a1, 188(sp) # 4-byte Folded Spill
15118 ; RV32ZVE32F-NEXT: lw a1, 4(t0)
15119 ; RV32ZVE32F-NEXT: sw a1, 184(sp) # 4-byte Folded Spill
15120 ; RV32ZVE32F-NEXT: lw a1, 0(t1)
15121 ; RV32ZVE32F-NEXT: sw a1, 180(sp) # 4-byte Folded Spill
15122 ; RV32ZVE32F-NEXT: lw a1, 4(t1)
15123 ; RV32ZVE32F-NEXT: sw a1, 176(sp) # 4-byte Folded Spill
15124 ; RV32ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma
15125 ; RV32ZVE32F-NEXT: vse32.v v8, (a3)
15126 ; RV32ZVE32F-NEXT: lw a1, 288(sp)
15127 ; RV32ZVE32F-NEXT: lw a2, 292(sp)
15128 ; RV32ZVE32F-NEXT: lw a3, 296(sp)
15129 ; RV32ZVE32F-NEXT: lw a4, 300(sp)
15130 ; RV32ZVE32F-NEXT: lw a5, 0(a1)
15131 ; RV32ZVE32F-NEXT: sw a5, 228(sp) # 4-byte Folded Spill
15132 ; RV32ZVE32F-NEXT: lw a1, 4(a1)
15133 ; RV32ZVE32F-NEXT: sw a1, 224(sp) # 4-byte Folded Spill
15134 ; RV32ZVE32F-NEXT: lw a1, 0(a2)
15135 ; RV32ZVE32F-NEXT: sw a1, 220(sp) # 4-byte Folded Spill
15136 ; RV32ZVE32F-NEXT: lw a1, 4(a2)
15137 ; RV32ZVE32F-NEXT: sw a1, 216(sp) # 4-byte Folded Spill
15138 ; RV32ZVE32F-NEXT: lw a1, 0(a3)
15139 ; RV32ZVE32F-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
15140 ; RV32ZVE32F-NEXT: lw a1, 4(a3)
15141 ; RV32ZVE32F-NEXT: sw a1, 208(sp) # 4-byte Folded Spill
15142 ; RV32ZVE32F-NEXT: lw a1, 0(a4)
15143 ; RV32ZVE32F-NEXT: sw a1, 204(sp) # 4-byte Folded Spill
15144 ; RV32ZVE32F-NEXT: lw a1, 4(a4)
15145 ; RV32ZVE32F-NEXT: sw a1, 200(sp) # 4-byte Folded Spill
15146 ; RV32ZVE32F-NEXT: lw a1, 304(sp)
15147 ; RV32ZVE32F-NEXT: lw a2, 308(sp)
15148 ; RV32ZVE32F-NEXT: lw a3, 312(sp)
15149 ; RV32ZVE32F-NEXT: lw a4, 316(sp)
15150 ; RV32ZVE32F-NEXT: lw a5, 0(a1)
15151 ; RV32ZVE32F-NEXT: sw a5, 160(sp) # 4-byte Folded Spill
15152 ; RV32ZVE32F-NEXT: lw a1, 4(a1)
15153 ; RV32ZVE32F-NEXT: sw a1, 156(sp) # 4-byte Folded Spill
15154 ; RV32ZVE32F-NEXT: lw a1, 0(a2)
15155 ; RV32ZVE32F-NEXT: sw a1, 152(sp) # 4-byte Folded Spill
15156 ; RV32ZVE32F-NEXT: lw a1, 4(a2)
15157 ; RV32ZVE32F-NEXT: sw a1, 148(sp) # 4-byte Folded Spill
15158 ; RV32ZVE32F-NEXT: lw a1, 0(a3)
15159 ; RV32ZVE32F-NEXT: sw a1, 144(sp) # 4-byte Folded Spill
15160 ; RV32ZVE32F-NEXT: lw a1, 4(a3)
15161 ; RV32ZVE32F-NEXT: sw a1, 140(sp) # 4-byte Folded Spill
15162 ; RV32ZVE32F-NEXT: lw a1, 0(a4)
15163 ; RV32ZVE32F-NEXT: sw a1, 136(sp) # 4-byte Folded Spill
15164 ; RV32ZVE32F-NEXT: lw a1, 4(a4)
15165 ; RV32ZVE32F-NEXT: sw a1, 132(sp) # 4-byte Folded Spill
15166 ; RV32ZVE32F-NEXT: lw a1, 320(sp)
15167 ; RV32ZVE32F-NEXT: lw a2, 324(sp)
15168 ; RV32ZVE32F-NEXT: lw a3, 328(sp)
15169 ; RV32ZVE32F-NEXT: lw a4, 332(sp)
15170 ; RV32ZVE32F-NEXT: lw a5, 0(a1)
15171 ; RV32ZVE32F-NEXT: sw a5, 128(sp) # 4-byte Folded Spill
15172 ; RV32ZVE32F-NEXT: lw a1, 4(a1)
15173 ; RV32ZVE32F-NEXT: sw a1, 124(sp) # 4-byte Folded Spill
15174 ; RV32ZVE32F-NEXT: lw a1, 0(a2)
15175 ; RV32ZVE32F-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
15176 ; RV32ZVE32F-NEXT: lw a1, 4(a2)
15177 ; RV32ZVE32F-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
15178 ; RV32ZVE32F-NEXT: lw s8, 0(a3)
15179 ; RV32ZVE32F-NEXT: lw s9, 4(a3)
15180 ; RV32ZVE32F-NEXT: lw s10, 0(a4)
15181 ; RV32ZVE32F-NEXT: lw s11, 4(a4)
15182 ; RV32ZVE32F-NEXT: lw a1, 336(sp)
15183 ; RV32ZVE32F-NEXT: lw a2, 340(sp)
15184 ; RV32ZVE32F-NEXT: lw a3, 344(sp)
15185 ; RV32ZVE32F-NEXT: lw a4, 348(sp)
15186 ; RV32ZVE32F-NEXT: lw t5, 0(a1)
15187 ; RV32ZVE32F-NEXT: lw t6, 4(a1)
15188 ; RV32ZVE32F-NEXT: lw s2, 0(a2)
15189 ; RV32ZVE32F-NEXT: lw s3, 4(a2)
15190 ; RV32ZVE32F-NEXT: lw a5, 0(a3)
15191 ; RV32ZVE32F-NEXT: lw a6, 4(a3)
15192 ; RV32ZVE32F-NEXT: lw a7, 0(a4)
15193 ; RV32ZVE32F-NEXT: lw t0, 4(a4)
15194 ; RV32ZVE32F-NEXT: lw a1, 352(sp)
15195 ; RV32ZVE32F-NEXT: lw a2, 356(sp)
15196 ; RV32ZVE32F-NEXT: lw a3, 360(sp)
15197 ; RV32ZVE32F-NEXT: lw a4, 364(sp)
15198 ; RV32ZVE32F-NEXT: lw t1, 0(a1)
15199 ; RV32ZVE32F-NEXT: sw t1, 112(sp) # 4-byte Folded Spill
15200 ; RV32ZVE32F-NEXT: lw a1, 4(a1)
15201 ; RV32ZVE32F-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
15202 ; RV32ZVE32F-NEXT: lw a1, 0(a2)
15203 ; RV32ZVE32F-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
15204 ; RV32ZVE32F-NEXT: lw a1, 4(a2)
15205 ; RV32ZVE32F-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
15206 ; RV32ZVE32F-NEXT: lw s4, 0(a3)
15207 ; RV32ZVE32F-NEXT: lw s5, 4(a3)
15208 ; RV32ZVE32F-NEXT: lw s6, 0(a4)
15209 ; RV32ZVE32F-NEXT: lw s7, 4(a4)
15210 ; RV32ZVE32F-NEXT: lw a1, 368(sp)
15211 ; RV32ZVE32F-NEXT: lw a2, 372(sp)
15212 ; RV32ZVE32F-NEXT: lw a3, 376(sp)
15213 ; RV32ZVE32F-NEXT: lw a4, 380(sp)
15214 ; RV32ZVE32F-NEXT: lw t1, 0(a1)
15215 ; RV32ZVE32F-NEXT: lw t2, 4(a1)
15216 ; RV32ZVE32F-NEXT: lw t3, 0(a2)
15217 ; RV32ZVE32F-NEXT: lw t4, 4(a2)
15218 ; RV32ZVE32F-NEXT: lw a1, 0(a3)
15219 ; RV32ZVE32F-NEXT: lw a2, 4(a3)
15220 ; RV32ZVE32F-NEXT: lw a3, 0(a4)
15221 ; RV32ZVE32F-NEXT: lw a4, 4(a4)
15222 ; RV32ZVE32F-NEXT: sw ra, 16(a0)
15223 ; RV32ZVE32F-NEXT: lw ra, 172(sp) # 4-byte Folded Reload
15224 ; RV32ZVE32F-NEXT: sw ra, 20(a0)
15225 ; RV32ZVE32F-NEXT: lw ra, 168(sp) # 4-byte Folded Reload
15226 ; RV32ZVE32F-NEXT: sw ra, 24(a0)
15227 ; RV32ZVE32F-NEXT: lw ra, 164(sp) # 4-byte Folded Reload
15228 ; RV32ZVE32F-NEXT: sw ra, 28(a0)
15229 ; RV32ZVE32F-NEXT: lw ra, 236(sp) # 4-byte Folded Reload
15230 ; RV32ZVE32F-NEXT: sw ra, 0(a0)
15231 ; RV32ZVE32F-NEXT: lw ra, 232(sp) # 4-byte Folded Reload
15232 ; RV32ZVE32F-NEXT: sw ra, 4(a0)
15233 ; RV32ZVE32F-NEXT: lw ra, 196(sp) # 4-byte Folded Reload
15234 ; RV32ZVE32F-NEXT: sw ra, 8(a0)
15235 ; RV32ZVE32F-NEXT: lw ra, 192(sp) # 4-byte Folded Reload
15236 ; RV32ZVE32F-NEXT: sw ra, 12(a0)
15237 ; RV32ZVE32F-NEXT: lw ra, 188(sp) # 4-byte Folded Reload
15238 ; RV32ZVE32F-NEXT: sw ra, 48(a0)
15239 ; RV32ZVE32F-NEXT: lw ra, 184(sp) # 4-byte Folded Reload
15240 ; RV32ZVE32F-NEXT: sw ra, 52(a0)
15241 ; RV32ZVE32F-NEXT: lw ra, 180(sp) # 4-byte Folded Reload
15242 ; RV32ZVE32F-NEXT: sw ra, 56(a0)
15243 ; RV32ZVE32F-NEXT: lw ra, 176(sp) # 4-byte Folded Reload
15244 ; RV32ZVE32F-NEXT: sw ra, 60(a0)
15245 ; RV32ZVE32F-NEXT: sw a5, 176(a0)
15246 ; RV32ZVE32F-NEXT: sw a6, 180(a0)
15247 ; RV32ZVE32F-NEXT: sw a7, 184(a0)
15248 ; RV32ZVE32F-NEXT: sw t0, 188(a0)
15249 ; RV32ZVE32F-NEXT: sw t5, 160(a0)
15250 ; RV32ZVE32F-NEXT: sw t6, 164(a0)
15251 ; RV32ZVE32F-NEXT: sw s2, 168(a0)
15252 ; RV32ZVE32F-NEXT: sw s3, 172(a0)
15253 ; RV32ZVE32F-NEXT: sw s8, 144(a0)
15254 ; RV32ZVE32F-NEXT: sw s9, 148(a0)
15255 ; RV32ZVE32F-NEXT: sw s10, 152(a0)
15256 ; RV32ZVE32F-NEXT: sw s11, 156(a0)
15257 ; RV32ZVE32F-NEXT: lw a5, 128(sp) # 4-byte Folded Reload
15258 ; RV32ZVE32F-NEXT: sw a5, 128(a0)
15259 ; RV32ZVE32F-NEXT: lw a5, 124(sp) # 4-byte Folded Reload
15260 ; RV32ZVE32F-NEXT: sw a5, 132(a0)
15261 ; RV32ZVE32F-NEXT: lw a5, 120(sp) # 4-byte Folded Reload
15262 ; RV32ZVE32F-NEXT: sw a5, 136(a0)
15263 ; RV32ZVE32F-NEXT: lw a5, 116(sp) # 4-byte Folded Reload
15264 ; RV32ZVE32F-NEXT: sw a5, 140(a0)
15265 ; RV32ZVE32F-NEXT: lw a5, 144(sp) # 4-byte Folded Reload
15266 ; RV32ZVE32F-NEXT: sw a5, 112(a0)
15267 ; RV32ZVE32F-NEXT: lw a5, 140(sp) # 4-byte Folded Reload
15268 ; RV32ZVE32F-NEXT: sw a5, 116(a0)
15269 ; RV32ZVE32F-NEXT: lw a5, 136(sp) # 4-byte Folded Reload
15270 ; RV32ZVE32F-NEXT: sw a5, 120(a0)
15271 ; RV32ZVE32F-NEXT: lw a5, 132(sp) # 4-byte Folded Reload
15272 ; RV32ZVE32F-NEXT: sw a5, 124(a0)
15273 ; RV32ZVE32F-NEXT: lw a5, 160(sp) # 4-byte Folded Reload
15274 ; RV32ZVE32F-NEXT: sw a5, 96(a0)
15275 ; RV32ZVE32F-NEXT: lw a5, 156(sp) # 4-byte Folded Reload
15276 ; RV32ZVE32F-NEXT: sw a5, 100(a0)
15277 ; RV32ZVE32F-NEXT: lw a5, 152(sp) # 4-byte Folded Reload
15278 ; RV32ZVE32F-NEXT: sw a5, 104(a0)
15279 ; RV32ZVE32F-NEXT: lw a5, 148(sp) # 4-byte Folded Reload
15280 ; RV32ZVE32F-NEXT: sw a5, 108(a0)
15281 ; RV32ZVE32F-NEXT: lw a5, 212(sp) # 4-byte Folded Reload
15282 ; RV32ZVE32F-NEXT: sw a5, 80(a0)
15283 ; RV32ZVE32F-NEXT: lw a5, 208(sp) # 4-byte Folded Reload
15284 ; RV32ZVE32F-NEXT: sw a5, 84(a0)
15285 ; RV32ZVE32F-NEXT: lw a5, 204(sp) # 4-byte Folded Reload
15286 ; RV32ZVE32F-NEXT: sw a5, 88(a0)
15287 ; RV32ZVE32F-NEXT: lw a5, 200(sp) # 4-byte Folded Reload
15288 ; RV32ZVE32F-NEXT: sw a5, 92(a0)
15289 ; RV32ZVE32F-NEXT: lw a5, 228(sp) # 4-byte Folded Reload
15290 ; RV32ZVE32F-NEXT: sw a5, 64(a0)
15291 ; RV32ZVE32F-NEXT: lw a5, 224(sp) # 4-byte Folded Reload
15292 ; RV32ZVE32F-NEXT: sw a5, 68(a0)
15293 ; RV32ZVE32F-NEXT: lw a5, 220(sp) # 4-byte Folded Reload
15294 ; RV32ZVE32F-NEXT: sw a5, 72(a0)
15295 ; RV32ZVE32F-NEXT: lw a5, 216(sp) # 4-byte Folded Reload
15296 ; RV32ZVE32F-NEXT: sw a5, 76(a0)
15297 ; RV32ZVE32F-NEXT: sw a1, 240(a0)
15298 ; RV32ZVE32F-NEXT: sw a2, 244(a0)
15299 ; RV32ZVE32F-NEXT: sw a3, 248(a0)
15300 ; RV32ZVE32F-NEXT: sw a4, 252(a0)
15301 ; RV32ZVE32F-NEXT: sw t1, 224(a0)
15302 ; RV32ZVE32F-NEXT: sw t2, 228(a0)
15303 ; RV32ZVE32F-NEXT: sw t3, 232(a0)
15304 ; RV32ZVE32F-NEXT: sw t4, 236(a0)
15305 ; RV32ZVE32F-NEXT: sw s4, 208(a0)
15306 ; RV32ZVE32F-NEXT: sw s5, 212(a0)
15307 ; RV32ZVE32F-NEXT: sw s6, 216(a0)
15308 ; RV32ZVE32F-NEXT: sw s7, 220(a0)
15309 ; RV32ZVE32F-NEXT: lw a1, 112(sp) # 4-byte Folded Reload
15310 ; RV32ZVE32F-NEXT: sw a1, 192(a0)
15311 ; RV32ZVE32F-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
15312 ; RV32ZVE32F-NEXT: sw a1, 196(a0)
15313 ; RV32ZVE32F-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
15314 ; RV32ZVE32F-NEXT: sw a1, 200(a0)
15315 ; RV32ZVE32F-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
15316 ; RV32ZVE32F-NEXT: sw a1, 204(a0)
15317 ; RV32ZVE32F-NEXT: lw a1, 252(sp) # 4-byte Folded Reload
15318 ; RV32ZVE32F-NEXT: sw a1, 32(a0)
15319 ; RV32ZVE32F-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
15320 ; RV32ZVE32F-NEXT: sw a1, 36(a0)
15321 ; RV32ZVE32F-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
15322 ; RV32ZVE32F-NEXT: sw a1, 40(a0)
15323 ; RV32ZVE32F-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
15324 ; RV32ZVE32F-NEXT: sw a1, 44(a0)
15325 ; RV32ZVE32F-NEXT: addi sp, s0, -512
15326 ; RV32ZVE32F-NEXT: .cfi_def_cfa sp, 512
15327 ; RV32ZVE32F-NEXT: lw ra, 508(sp) # 4-byte Folded Reload
15328 ; RV32ZVE32F-NEXT: lw s0, 504(sp) # 4-byte Folded Reload
15329 ; RV32ZVE32F-NEXT: lw s2, 500(sp) # 4-byte Folded Reload
15330 ; RV32ZVE32F-NEXT: lw s3, 496(sp) # 4-byte Folded Reload
15331 ; RV32ZVE32F-NEXT: lw s4, 492(sp) # 4-byte Folded Reload
15332 ; RV32ZVE32F-NEXT: lw s5, 488(sp) # 4-byte Folded Reload
15333 ; RV32ZVE32F-NEXT: lw s6, 484(sp) # 4-byte Folded Reload
15334 ; RV32ZVE32F-NEXT: lw s7, 480(sp) # 4-byte Folded Reload
15335 ; RV32ZVE32F-NEXT: lw s8, 476(sp) # 4-byte Folded Reload
15336 ; RV32ZVE32F-NEXT: lw s9, 472(sp) # 4-byte Folded Reload
15337 ; RV32ZVE32F-NEXT: lw s10, 468(sp) # 4-byte Folded Reload
15338 ; RV32ZVE32F-NEXT: lw s11, 464(sp) # 4-byte Folded Reload
15339 ; RV32ZVE32F-NEXT: .cfi_restore ra
15340 ; RV32ZVE32F-NEXT: .cfi_restore s0
15341 ; RV32ZVE32F-NEXT: .cfi_restore s2
15342 ; RV32ZVE32F-NEXT: .cfi_restore s3
15343 ; RV32ZVE32F-NEXT: .cfi_restore s4
15344 ; RV32ZVE32F-NEXT: .cfi_restore s5
15345 ; RV32ZVE32F-NEXT: .cfi_restore s6
15346 ; RV32ZVE32F-NEXT: .cfi_restore s7
15347 ; RV32ZVE32F-NEXT: .cfi_restore s8
15348 ; RV32ZVE32F-NEXT: .cfi_restore s9
15349 ; RV32ZVE32F-NEXT: .cfi_restore s10
15350 ; RV32ZVE32F-NEXT: .cfi_restore s11
15351 ; RV32ZVE32F-NEXT: addi sp, sp, 512
15352 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
15353 ; RV32ZVE32F-NEXT: ret
15355 ; RV64ZVE32F-LABEL: mgather_strided_split:
15356 ; RV64ZVE32F: # %bb.0:
15357 ; RV64ZVE32F-NEXT: addi sp, sp, -144
15358 ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 144
15359 ; RV64ZVE32F-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
15360 ; RV64ZVE32F-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
15361 ; RV64ZVE32F-NEXT: sd s1, 120(sp) # 8-byte Folded Spill
15362 ; RV64ZVE32F-NEXT: sd s2, 112(sp) # 8-byte Folded Spill
15363 ; RV64ZVE32F-NEXT: sd s3, 104(sp) # 8-byte Folded Spill
15364 ; RV64ZVE32F-NEXT: sd s4, 96(sp) # 8-byte Folded Spill
15365 ; RV64ZVE32F-NEXT: sd s5, 88(sp) # 8-byte Folded Spill
15366 ; RV64ZVE32F-NEXT: sd s6, 80(sp) # 8-byte Folded Spill
15367 ; RV64ZVE32F-NEXT: sd s7, 72(sp) # 8-byte Folded Spill
15368 ; RV64ZVE32F-NEXT: sd s8, 64(sp) # 8-byte Folded Spill
15369 ; RV64ZVE32F-NEXT: sd s9, 56(sp) # 8-byte Folded Spill
15370 ; RV64ZVE32F-NEXT: sd s10, 48(sp) # 8-byte Folded Spill
15371 ; RV64ZVE32F-NEXT: sd s11, 40(sp) # 8-byte Folded Spill
15372 ; RV64ZVE32F-NEXT: .cfi_offset ra, -8
15373 ; RV64ZVE32F-NEXT: .cfi_offset s0, -16
15374 ; RV64ZVE32F-NEXT: .cfi_offset s1, -24
15375 ; RV64ZVE32F-NEXT: .cfi_offset s2, -32
15376 ; RV64ZVE32F-NEXT: .cfi_offset s3, -40
15377 ; RV64ZVE32F-NEXT: .cfi_offset s4, -48
15378 ; RV64ZVE32F-NEXT: .cfi_offset s5, -56
15379 ; RV64ZVE32F-NEXT: .cfi_offset s6, -64
15380 ; RV64ZVE32F-NEXT: .cfi_offset s7, -72
15381 ; RV64ZVE32F-NEXT: .cfi_offset s8, -80
15382 ; RV64ZVE32F-NEXT: .cfi_offset s9, -88
15383 ; RV64ZVE32F-NEXT: .cfi_offset s10, -96
15384 ; RV64ZVE32F-NEXT: .cfi_offset s11, -104
15385 ; RV64ZVE32F-NEXT: ld a2, 0(a1)
15386 ; RV64ZVE32F-NEXT: sd a2, 32(sp) # 8-byte Folded Spill
15387 ; RV64ZVE32F-NEXT: ld a2, 16(a1)
15388 ; RV64ZVE32F-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
15389 ; RV64ZVE32F-NEXT: ld a2, 32(a1)
15390 ; RV64ZVE32F-NEXT: sd a2, 16(sp) # 8-byte Folded Spill
15391 ; RV64ZVE32F-NEXT: ld a2, 48(a1)
15392 ; RV64ZVE32F-NEXT: sd a2, 8(sp) # 8-byte Folded Spill
15393 ; RV64ZVE32F-NEXT: ld a2, 64(a1)
15394 ; RV64ZVE32F-NEXT: sd a2, 0(sp) # 8-byte Folded Spill
15395 ; RV64ZVE32F-NEXT: ld a7, 80(a1)
15396 ; RV64ZVE32F-NEXT: ld t0, 96(a1)
15397 ; RV64ZVE32F-NEXT: ld t1, 112(a1)
15398 ; RV64ZVE32F-NEXT: ld t2, 128(a1)
15399 ; RV64ZVE32F-NEXT: ld t3, 144(a1)
15400 ; RV64ZVE32F-NEXT: ld t4, 160(a1)
15401 ; RV64ZVE32F-NEXT: ld t5, 176(a1)
15402 ; RV64ZVE32F-NEXT: ld t6, 192(a1)
15403 ; RV64ZVE32F-NEXT: ld s0, 208(a1)
15404 ; RV64ZVE32F-NEXT: ld s1, 224(a1)
15405 ; RV64ZVE32F-NEXT: ld s2, 240(a1)
15406 ; RV64ZVE32F-NEXT: ld s3, 256(a1)
15407 ; RV64ZVE32F-NEXT: ld s4, 272(a1)
15408 ; RV64ZVE32F-NEXT: ld s5, 288(a1)
15409 ; RV64ZVE32F-NEXT: ld s6, 304(a1)
15410 ; RV64ZVE32F-NEXT: ld s7, 320(a1)
15411 ; RV64ZVE32F-NEXT: ld s8, 336(a1)
15412 ; RV64ZVE32F-NEXT: ld s9, 352(a1)
15413 ; RV64ZVE32F-NEXT: ld s10, 368(a1)
15414 ; RV64ZVE32F-NEXT: ld s11, 384(a1)
15415 ; RV64ZVE32F-NEXT: ld ra, 400(a1)
15416 ; RV64ZVE32F-NEXT: ld a6, 416(a1)
15417 ; RV64ZVE32F-NEXT: ld a5, 432(a1)
15418 ; RV64ZVE32F-NEXT: ld a2, 448(a1)
15419 ; RV64ZVE32F-NEXT: ld a3, 464(a1)
15420 ; RV64ZVE32F-NEXT: ld a4, 480(a1)
15421 ; RV64ZVE32F-NEXT: ld a1, 496(a1)
15422 ; RV64ZVE32F-NEXT: sd a2, 224(a0)
15423 ; RV64ZVE32F-NEXT: sd a3, 232(a0)
15424 ; RV64ZVE32F-NEXT: sd a4, 240(a0)
15425 ; RV64ZVE32F-NEXT: sd a1, 248(a0)
15426 ; RV64ZVE32F-NEXT: sd s11, 192(a0)
15427 ; RV64ZVE32F-NEXT: sd ra, 200(a0)
15428 ; RV64ZVE32F-NEXT: sd a6, 208(a0)
15429 ; RV64ZVE32F-NEXT: sd a5, 216(a0)
15430 ; RV64ZVE32F-NEXT: sd s7, 160(a0)
15431 ; RV64ZVE32F-NEXT: sd s8, 168(a0)
15432 ; RV64ZVE32F-NEXT: sd s9, 176(a0)
15433 ; RV64ZVE32F-NEXT: sd s10, 184(a0)
15434 ; RV64ZVE32F-NEXT: sd s3, 128(a0)
15435 ; RV64ZVE32F-NEXT: sd s4, 136(a0)
15436 ; RV64ZVE32F-NEXT: sd s5, 144(a0)
15437 ; RV64ZVE32F-NEXT: sd s6, 152(a0)
15438 ; RV64ZVE32F-NEXT: sd t6, 96(a0)
15439 ; RV64ZVE32F-NEXT: sd s0, 104(a0)
15440 ; RV64ZVE32F-NEXT: sd s1, 112(a0)
15441 ; RV64ZVE32F-NEXT: sd s2, 120(a0)
15442 ; RV64ZVE32F-NEXT: sd t2, 64(a0)
15443 ; RV64ZVE32F-NEXT: sd t3, 72(a0)
15444 ; RV64ZVE32F-NEXT: sd t4, 80(a0)
15445 ; RV64ZVE32F-NEXT: sd t5, 88(a0)
15446 ; RV64ZVE32F-NEXT: ld a1, 0(sp) # 8-byte Folded Reload
15447 ; RV64ZVE32F-NEXT: sd a1, 32(a0)
15448 ; RV64ZVE32F-NEXT: sd a7, 40(a0)
15449 ; RV64ZVE32F-NEXT: sd t0, 48(a0)
15450 ; RV64ZVE32F-NEXT: sd t1, 56(a0)
15451 ; RV64ZVE32F-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
15452 ; RV64ZVE32F-NEXT: sd a1, 0(a0)
15453 ; RV64ZVE32F-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
15454 ; RV64ZVE32F-NEXT: sd a1, 8(a0)
15455 ; RV64ZVE32F-NEXT: ld a1, 16(sp) # 8-byte Folded Reload
15456 ; RV64ZVE32F-NEXT: sd a1, 16(a0)
15457 ; RV64ZVE32F-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
15458 ; RV64ZVE32F-NEXT: sd a1, 24(a0)
15459 ; RV64ZVE32F-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
15460 ; RV64ZVE32F-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
15461 ; RV64ZVE32F-NEXT: ld s1, 120(sp) # 8-byte Folded Reload
15462 ; RV64ZVE32F-NEXT: ld s2, 112(sp) # 8-byte Folded Reload
15463 ; RV64ZVE32F-NEXT: ld s3, 104(sp) # 8-byte Folded Reload
15464 ; RV64ZVE32F-NEXT: ld s4, 96(sp) # 8-byte Folded Reload
15465 ; RV64ZVE32F-NEXT: ld s5, 88(sp) # 8-byte Folded Reload
15466 ; RV64ZVE32F-NEXT: ld s6, 80(sp) # 8-byte Folded Reload
15467 ; RV64ZVE32F-NEXT: ld s7, 72(sp) # 8-byte Folded Reload
15468 ; RV64ZVE32F-NEXT: ld s8, 64(sp) # 8-byte Folded Reload
15469 ; RV64ZVE32F-NEXT: ld s9, 56(sp) # 8-byte Folded Reload
15470 ; RV64ZVE32F-NEXT: ld s10, 48(sp) # 8-byte Folded Reload
15471 ; RV64ZVE32F-NEXT: ld s11, 40(sp) # 8-byte Folded Reload
15472 ; RV64ZVE32F-NEXT: .cfi_restore ra
15473 ; RV64ZVE32F-NEXT: .cfi_restore s0
15474 ; RV64ZVE32F-NEXT: .cfi_restore s1
15475 ; RV64ZVE32F-NEXT: .cfi_restore s2
15476 ; RV64ZVE32F-NEXT: .cfi_restore s3
15477 ; RV64ZVE32F-NEXT: .cfi_restore s4
15478 ; RV64ZVE32F-NEXT: .cfi_restore s5
15479 ; RV64ZVE32F-NEXT: .cfi_restore s6
15480 ; RV64ZVE32F-NEXT: .cfi_restore s7
15481 ; RV64ZVE32F-NEXT: .cfi_restore s8
15482 ; RV64ZVE32F-NEXT: .cfi_restore s9
15483 ; RV64ZVE32F-NEXT: .cfi_restore s10
15484 ; RV64ZVE32F-NEXT: .cfi_restore s11
15485 ; RV64ZVE32F-NEXT: addi sp, sp, 144
15486 ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 0
15487 ; RV64ZVE32F-NEXT: ret
15488 %ptrs = getelementptr inbounds i64, ptr %base, <32 x i64> <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30, i64 32, i64 34, i64 36, i64 38, i64 40, i64 42, i64 44, i64 46, i64 48, i64 50, i64 52, i64 54, i64 56, i64 58, i64 60, i64 62>
15489 %x = call <32 x i64> @llvm.masked.gather.v32i64.v32p0(<32 x ptr> %ptrs, i32 8, <32 x i1> splat (i1 true), <32 x i64> poison)
15493 define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) {
15494 ; RV32V-LABEL: masked_gather_widen_sew_negative_stride:
15496 ; RV32V-NEXT: addi a0, a0, 136
15497 ; RV32V-NEXT: li a1, -136
15498 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
15499 ; RV32V-NEXT: vlse64.v v8, (a0), a1
15502 ; RV64V-LABEL: masked_gather_widen_sew_negative_stride:
15504 ; RV64V-NEXT: addi a0, a0, 136
15505 ; RV64V-NEXT: li a1, -136
15506 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
15507 ; RV64V-NEXT: vlse64.v v8, (a0), a1
15510 ; RV32ZVE32F-LABEL: masked_gather_widen_sew_negative_stride:
15511 ; RV32ZVE32F: # %bb.0:
15512 ; RV32ZVE32F-NEXT: lui a1, 16393
15513 ; RV32ZVE32F-NEXT: addi a1, a1, -888
15514 ; RV32ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
15515 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
15516 ; RV32ZVE32F-NEXT: vluxei8.v v8, (a0), v9
15517 ; RV32ZVE32F-NEXT: ret
15519 ; RV64ZVE32F-LABEL: masked_gather_widen_sew_negative_stride:
15520 ; RV64ZVE32F: # %bb.0:
15521 ; RV64ZVE32F-NEXT: lw a1, 136(a0)
15522 ; RV64ZVE32F-NEXT: lw a2, 140(a0)
15523 ; RV64ZVE32F-NEXT: lw a3, 0(a0)
15524 ; RV64ZVE32F-NEXT: lw a0, 4(a0)
15525 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
15526 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1
15527 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
15528 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
15529 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
15530 ; RV64ZVE32F-NEXT: ret
15531 %ptrs = getelementptr i32, ptr %base, <4 x i64> <i64 34, i64 35, i64 0, i64 1>
15532 %x = call <4 x i32> @llvm.masked.gather.v4i32.v32p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 true), <4 x i32> poison)
15536 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
15537 ; RV32V-ZVFH: {{.*}}
15538 ; RV32V-ZVFHMIN: {{.*}}
15539 ; RV32ZVE32F-ZVFH: {{.*}}
15540 ; RV32ZVE32F-ZVFHMIN: {{.*}}
15542 ; RV64V-ZVFH: {{.*}}
15543 ; RV64V-ZVFHMIN: {{.*}}